2022-10-12 19:21:35 +08:00
|
|
|
use std::borrow::Cow;
|
2023-11-28 17:11:17 +08:00
|
|
|
use std::ffi::CStr;
|
2022-10-12 19:21:35 +08:00
|
|
|
use std::str;
|
|
|
|
|
|
|
|
use charabia::{Language, Script};
|
2023-11-23 01:21:19 +08:00
|
|
|
use heed::BoxedError;
|
2022-10-12 19:21:35 +08:00
|
|
|
|
|
|
|
pub struct ScriptLanguageCodec;
|
|
|
|
|
|
|
|
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
|
|
|
|
type DItem = (Script, Language);
|
|
|
|
|
2023-11-23 01:21:19 +08:00
|
|
|
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
2023-11-28 17:11:17 +08:00
|
|
|
let cstr = CStr::from_bytes_until_nul(bytes)?;
|
|
|
|
let script = cstr.to_str()?;
|
2022-10-12 19:21:35 +08:00
|
|
|
let script_name = Script::from_name(script);
|
2023-02-01 22:24:49 +08:00
|
|
|
// skip '\0' byte between the two strings.
|
2023-11-28 17:11:17 +08:00
|
|
|
let lan = str::from_utf8(&bytes[script.len() + 1..])?;
|
|
|
|
let lan_name = Language::from_name(lan);
|
2022-10-12 19:21:35 +08:00
|
|
|
|
2023-11-23 01:21:19 +08:00
|
|
|
Ok((script_name, lan_name))
|
2022-10-12 19:21:35 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
|
|
|
type EItem = (Script, Language);
|
|
|
|
|
2024-07-09 23:25:39 +08:00
|
|
|
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
2022-10-19 20:03:46 +08:00
|
|
|
let script_name = script.name().as_bytes();
|
|
|
|
let lan_name = lan.name().as_bytes();
|
2022-10-12 19:21:35 +08:00
|
|
|
|
2022-10-19 20:03:46 +08:00
|
|
|
let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len() + 1);
|
|
|
|
bytes.extend_from_slice(script_name);
|
|
|
|
bytes.push(0);
|
|
|
|
bytes.extend_from_slice(lan_name);
|
2022-10-12 19:21:35 +08:00
|
|
|
|
2023-11-23 01:21:19 +08:00
|
|
|
Ok(Cow::Owned(bytes))
|
2022-10-12 19:21:35 +08:00
|
|
|
}
|
|
|
|
}
|