Remove a lot of serialization unreachable errors

This commit is contained in:
Kerollmops 2021-06-09 15:26:40 +02:00
parent 65b1d09d55
commit d2b1ecc885
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 62 additions and 33 deletions

View File

@ -5,6 +5,14 @@ use crate::FieldId;
pub struct FacetValueStringCodec; pub struct FacetValueStringCodec;
impl FacetValueStringCodec {
pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec<u8>) {
out.reserve(value.len() + 1);
out.push(field_id);
out.extend_from_slice(value.as_bytes());
}
}
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec { impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
type DItem = (FieldId, &'a str); type DItem = (FieldId, &'a str);
@ -19,9 +27,8 @@ impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
type EItem = (FieldId, &'a str); type EItem = (FieldId, &'a str);
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(value.len() + 1); let mut bytes = Vec::new();
bytes.push(*field_id); FacetValueStringCodec::serialize_into(*field_id, value, &mut bytes);
bytes.extend_from_slice(value.as_bytes());
Some(Cow::Owned(bytes)) Some(Cow::Owned(bytes))
} }
} }

View File

@ -6,6 +6,15 @@ use crate::{FieldId, DocumentId};
pub struct FieldDocIdFacetStringCodec; pub struct FieldDocIdFacetStringCodec;
impl FieldDocIdFacetStringCodec {
pub fn serialize_into(field_id: FieldId, document_id: DocumentId, value: &str, out: &mut Vec<u8>) {
out.reserve(1 + 4 + value.len());
out.push(field_id);
out.extend_from_slice(&document_id.to_be_bytes());
out.extend_from_slice(value.as_bytes());
}
}
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec { impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
type DItem = (FieldId, DocumentId, &'a str); type DItem = (FieldId, DocumentId, &'a str);
@ -22,10 +31,8 @@ impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
type EItem = (FieldId, DocumentId, &'a str); type EItem = (FieldId, DocumentId, &'a str);
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(1 + 4 + value.len()); let mut bytes = Vec::new();
bytes.push(*field_id); FieldDocIdFacetStringCodec::serialize_into(*field_id, *document_id, value, &mut bytes);
bytes.extend_from_slice(&document_id.to_be_bytes());
bytes.extend_from_slice(value.as_bytes());
Some(Cow::Owned(bytes)) Some(Cow::Owned(bytes))
} }
} }

View File

@ -6,6 +6,13 @@ use roaring::RoaringBitmap;
pub struct BoRoaringBitmapCodec; pub struct BoRoaringBitmapCodec;
impl BoRoaringBitmapCodec {
pub fn serialize_into(bitmap: &RoaringBitmap, out: &mut Vec<u8>) {
out.reserve(bitmap.len() as usize * size_of::<u32>());
bitmap.iter().map(u32::to_ne_bytes).for_each(|bytes| out.extend_from_slice(&bytes));
}
}
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
type DItem = RoaringBitmap; type DItem = RoaringBitmap;
@ -25,12 +32,8 @@ impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap; type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
let mut out = Vec::with_capacity(item.len() as usize * size_of::<u32>()); let mut out = Vec::new();
BoRoaringBitmapCodec::serialize_into(item, &mut out);
item.iter()
.map(|i| i.to_ne_bytes())
.for_each(|bytes| out.extend_from_slice(&bytes));
Some(Cow::Owned(out)) Some(Cow::Owned(out))
} }
} }

View File

@ -421,6 +421,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
{ {
// We prefix the words by the document id. // We prefix the words by the document id.
let mut key = id.to_be_bytes().to_vec(); let mut key = id.to_be_bytes().to_vec();
let mut buffer = Vec::new();
let base_size = key.len(); let base_size = key.len();
// We order the words lexicographically, this way we avoid passing by a sorter. // We order the words lexicographically, this way we avoid passing by a sorter.
@ -429,13 +430,15 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
for (word, positions) in words_positions { for (word, positions) in words_positions {
key.truncate(base_size); key.truncate(base_size);
key.extend_from_slice(word.as_bytes()); key.extend_from_slice(word.as_bytes());
buffer.clear();
// We serialize the positions into a buffer. // We serialize the positions into a buffer.
let positions = RoaringBitmap::from_iter(positions.iter().cloned()); let positions = RoaringBitmap::from_iter(positions.iter().cloned());
let bytes = BoRoaringBitmapCodec::bytes_encode(&positions) BoRoaringBitmapCodec::serialize_into(&positions, &mut buffer);
.with_context(|| "could not serialize positions")?;
// that we write under the generated key into MTBL // that we write under the generated key into MTBL
if lmdb_key_valid_size(&key) { if lmdb_key_valid_size(&key) {
writer.insert(&key, &bytes)?; writer.insert(&key, &buffer)?;
} }
} }
@ -483,14 +486,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
) -> anyhow::Result<()> ) -> anyhow::Result<()>
where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)> where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>
{ {
let mut key_buffer = Vec::new();
let mut data_buffer = Vec::new();
for ((field_id, value), docids) in iter { for ((field_id, value), docids) in iter {
let key = FacetValueStringCodec::bytes_encode(&(field_id, &value)) key_buffer.clear();
.map(Cow::into_owned) data_buffer.clear();
.context("could not serialize facet key")?;
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids) FacetValueStringCodec::serialize_into(field_id, &value, &mut key_buffer);
.context("could not serialize docids")?; CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
if lmdb_key_valid_size(&key) {
sorter.insert(&key, &bytes)?; if lmdb_key_valid_size(&key_buffer) {
sorter.insert(&key_buffer, &data_buffer)?;
} }
} }
@ -503,14 +510,19 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
) -> anyhow::Result<()> ) -> anyhow::Result<()>
where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)> where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>
{ {
let mut data_buffer = Vec::new();
for ((field_id, value), docids) in iter { for ((field_id, value), docids) in iter {
data_buffer.clear();
let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value)) let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
.map(Cow::into_owned) .map(Cow::into_owned)
.context("could not serialize facet key")?; .context("could not serialize facet level value key")?;
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
.context("could not serialize docids")?; CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
if lmdb_key_valid_size(&key) { if lmdb_key_valid_size(&key) {
sorter.insert(&key, &bytes)?; sorter.insert(&key, &data_buffer)?;
} }
} }
@ -526,7 +538,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
{ {
let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value)) let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
.map(Cow::into_owned) .map(Cow::into_owned)
.context("could not serialize facet key")?; .context("could not serialize facet level value key")?;
if lmdb_key_valid_size(&key) { if lmdb_key_valid_size(&key) {
sorter.insert(&key, &[])?; sorter.insert(&key, &[])?;
@ -542,12 +554,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
value: &str, value: &str,
) -> anyhow::Result<()> ) -> anyhow::Result<()>
{ {
let key = FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, value)) let mut buffer = Vec::new();
.map(Cow::into_owned)
.context("could not serialize facet key")?;
if lmdb_key_valid_size(&key) { FieldDocIdFacetStringCodec::serialize_into(field_id, document_id, value, &mut buffer);
sorter.insert(&key, &[])?;
if lmdb_key_valid_size(&buffer) {
sorter.insert(&buffer, &[])?;
} }
Ok(()) Ok(())