Rename StrStrU8Codec to U8StrStrCodec and reorder its fields

This commit is contained in:
Loïc Lecrenier 2022-09-14 14:01:53 +02:00 committed by Loïc Lecrenier
parent bdeb47305e
commit 1dbbd8694f
7 changed files with 61 additions and 61 deletions

View File

@ -15,4 +15,4 @@ pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
}; };
pub use self::str_beu32_codec::StrBEU32Codec; pub use self::str_beu32_codec::StrBEU32Codec;
pub use self::str_str_u8_codec::{StrStrU8Codec, UncheckedStrStrU8Codec}; pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};

View File

@ -1,10 +1,10 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::str; use std::str;
pub struct StrStrU8Codec; pub struct U8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for StrStrU8Codec { impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
type DItem = (&'a str, &'a str, u8); type DItem = (u8, &'a str, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?; let (n, bytes) = bytes.split_first()?;
@ -13,14 +13,14 @@ impl<'a> heed::BytesDecode<'a> for StrStrU8Codec {
let s2_bytes = &rest[1..]; let s2_bytes = &rest[1..];
let s1 = str::from_utf8(s1_bytes).ok()?; let s1 = str::from_utf8(s1_bytes).ok()?;
let s2 = str::from_utf8(s2_bytes).ok()?; let s2 = str::from_utf8(s2_bytes).ok()?;
Some((s1, s2, *n)) Some((*n, s1, s2))
} }
} }
impl<'a> heed::BytesEncode<'a> for StrStrU8Codec { impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
type EItem = (&'a str, &'a str, u8); type EItem = (u8, &'a str, &'a str);
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n); bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes()); bytes.extend_from_slice(s1.as_bytes());
@ -29,24 +29,24 @@ impl<'a> heed::BytesEncode<'a> for StrStrU8Codec {
Some(Cow::Owned(bytes)) Some(Cow::Owned(bytes))
} }
} }
pub struct UncheckedStrStrU8Codec; pub struct UncheckedU8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for UncheckedStrStrU8Codec { impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
type DItem = (&'a [u8], &'a [u8], u8); type DItem = (u8, &'a [u8], &'a [u8]);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?; let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?; let s1_end = bytes.iter().position(|b| *b == 0)?;
let (s1_bytes, rest) = bytes.split_at(s1_end); let (s1_bytes, rest) = bytes.split_at(s1_end);
let s2_bytes = &rest[1..]; let s2_bytes = &rest[1..];
Some((s1_bytes, s2_bytes, *n)) Some((*n, s1_bytes, s2_bytes))
} }
} }
impl<'a> heed::BytesEncode<'a> for UncheckedStrStrU8Codec { impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
type EItem = (&'a [u8], &'a [u8], u8); type EItem = (u8, &'a [u8], &'a [u8]);
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n); bytes.push(*n);
bytes.extend_from_slice(s1); bytes.extend_from_slice(s1);

View File

@ -21,7 +21,7 @@ use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Search, StrBEU32Codec, StrStrU8Codec, BEU16, BEU32, Search, StrBEU32Codec, U8StrStrCodec, BEU16, BEU32,
}; };
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -106,9 +106,9 @@ pub struct Index {
pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>, pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
/// Maps the proximity between a pair of words with all the docids where this relation appears. /// Maps the proximity between a pair of words with all the docids where this relation appears.
pub word_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>, pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the proximity between a pair of word and prefix with all the docids where this relation appears. /// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
pub word_prefix_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>, pub word_prefix_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the word and the position with the docids that corresponds to it. /// Maps the word and the position with the docids that corresponds to it.
pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>, pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,

View File

@ -37,7 +37,7 @@ pub use self::fields_ids_map::FieldsIdsMap;
pub use self::heed_codec::{ pub use self::heed_codec::{
BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec,
CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec,
RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, UncheckedStrStrU8Codec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec, UncheckedU8StrStrCodec,
}; };
pub use self::index::Index; pub use self::index::Index;
pub use self::search::{ pub use self::search::{

View File

@ -138,7 +138,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
right: &str, right: &str,
proximity: u8, proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> { ) -> heed::Result<Option<RoaringBitmap>> {
let key = (left, right, proximity); let key = (proximity, left, right);
self.index.word_pair_proximity_docids.get(self.rtxn, &key) self.index.word_pair_proximity_docids.get(self.rtxn, &key)
} }
@ -148,7 +148,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
right: &str, right: &str,
proximity: u8, proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> { ) -> heed::Result<Option<RoaringBitmap>> {
let key = (left, right, proximity); let key = (proximity, left, right);
self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key) self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)
} }

View File

@ -182,16 +182,16 @@ pub fn snap_docid_word_positions(index: &Index) -> String {
} }
pub fn snap_word_pair_proximity_docids(index: &Index) -> String { pub fn snap_word_pair_proximity_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, word_pair_proximity_docids, |( let snap = make_db_snap_from_iter!(index, word_pair_proximity_docids, |(
(word1, word2, proximity), (proximity, word1, word2),
b, b,
)| { )| {
&format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b)) &format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b))
}); });
snap snap
} }
pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String { pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |( let snap = make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |(
(word1, prefix, proximity), (proximity, word1, prefix),
b, b,
)| { )| {
&format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b)) &format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b))

View File

@ -177,7 +177,7 @@ use log::debug;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_writer, merge_cbo_roaring_bitmaps, CursorClonableMmap, create_writer, merge_cbo_roaring_bitmaps, CursorClonableMmap,
}; };
use crate::{CboRoaringBitmapCodec, Index, Result, UncheckedStrStrU8Codec}; use crate::{CboRoaringBitmapCodec, Index, Result, UncheckedU8StrStrCodec};
pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> { pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -259,9 +259,9 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
&mut cursor, &mut cursor,
|cursor| { |cursor| {
if let Some((key, value)) = cursor.move_on_next()? { if let Some((key, value)) = cursor.move_on_next()? {
let (word1, word2, proximity) = UncheckedStrStrU8Codec::bytes_decode(key) let (proximity, word1, word2) = UncheckedU8StrStrCodec::bytes_decode(key)
.ok_or(heed::Error::Decoding)?; .ok_or(heed::Error::Decoding)?;
Ok(Some(((word1, word2, proximity), value))) Ok(Some(((proximity, word1, word2), value)))
} else { } else {
Ok(None) Ok(None)
} }
@ -293,7 +293,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
let mut db_iter = self let mut db_iter = self
.index .index
.word_pair_proximity_docids .word_pair_proximity_docids
.remap_key_type::<UncheckedStrStrU8Codec>() .remap_key_type::<UncheckedU8StrStrCodec>()
.remap_data_type::<ByteSlice>() .remap_data_type::<ByteSlice>()
.iter(self.wtxn)?; .iter(self.wtxn)?;
@ -358,7 +358,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
mut next_word_pair_proximity: impl for<'a> FnMut( mut next_word_pair_proximity: impl for<'a> FnMut(
&'a mut I, &'a mut I,
) -> Result< ) -> Result<
Option<((&'a [u8], &'a [u8], u8), &'a [u8])>, Option<((u8, &'a [u8], &'a [u8]), &'a [u8])>,
>, >,
prefixes: &PrefixTrieNode, prefixes: &PrefixTrieNode,
max_proximity: u8, max_proximity: u8,
@ -376,14 +376,14 @@ fn execute_on_word_pairs_and_prefixes<I>(
let mut prefix_buffer = Vec::with_capacity(8); let mut prefix_buffer = Vec::with_capacity(8);
let mut merge_buffer = Vec::with_capacity(65_536); let mut merge_buffer = Vec::with_capacity(65_536);
while let Some(((word1, word2, proximity), data)) = next_word_pair_proximity(iter)? { while let Some(((proximity, word1, word2), data)) = next_word_pair_proximity(iter)? {
// skip this iteration if the proximity is over the threshold // skip this iteration if the proximity is over the threshold
if proximity > max_proximity { if proximity > max_proximity {
break; break;
}; };
let word2_start_different_than_prev = word2[0] != prev_word2_start; let word2_start_different_than_prev = word2[0] != prev_word2_start;
// if there were no potential prefixes for the previous word2 based on its first letter, // if there were no potential prefixes for the previous word2 based on its first letter,
// and if the current word2 starts with the same letter, then there is also no potential // and if the current word2 starts with the s`ame letter, then there is also no potential
// prefixes for the current word2, and we can skip to the next iteration // prefixes for the current word2, and we can skip to the next iteration
if empty_prefixes && !word2_start_different_than_prev { if empty_prefixes && !word2_start_different_than_prev {
continue; continue;
@ -683,7 +683,7 @@ mod tests {
use super::*; use super::*;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::{db_snap, CboRoaringBitmapCodec, StrStrU8Codec}; use crate::{db_snap, CboRoaringBitmapCodec, U8StrStrCodec};
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> { fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
let mut documents = Vec::new(); let mut documents = Vec::new();
@ -858,40 +858,40 @@ mod tests {
CboRoaringBitmapCodec::serialize_into(&bitmap_ranges, &mut serialised_bitmap_ranges); CboRoaringBitmapCodec::serialize_into(&bitmap_ranges, &mut serialised_bitmap_ranges);
let word_pairs = [ let word_pairs = [
(("healthy", "arbres", 1), &serialised_bitmap123), ((1, "healthy", "arbres"), &serialised_bitmap123),
(("healthy", "boat", 1), &serialised_bitmap123), ((1, "healthy", "boat"), &serialised_bitmap123),
(("healthy", "ca", 1), &serialised_bitmap123), ((1, "healthy", "ca"), &serialised_bitmap123),
(("healthy", "cats", 1), &serialised_bitmap456), ((1, "healthy", "cats"), &serialised_bitmap456),
(("healthy", "cattos", 1), &serialised_bitmap123), ((1, "healthy", "cattos"), &serialised_bitmap123),
(("jittery", "cat", 1), &serialised_bitmap123), ((1, "jittery", "cat"), &serialised_bitmap123),
(("jittery", "cata", 1), &serialised_bitmap456), ((1, "jittery", "cata"), &serialised_bitmap456),
(("jittery", "catb", 1), &serialised_bitmap789), ((1, "jittery", "catb"), &serialised_bitmap789),
(("jittery", "catc", 1), &serialised_bitmap_ranges), ((1, "jittery", "catc"), &serialised_bitmap_ranges),
(("healthy", "arbre", 2), &serialised_bitmap123), ((2, "healthy", "arbre"), &serialised_bitmap123),
(("healthy", "arbres", 2), &serialised_bitmap456), ((2, "healthy", "arbres"), &serialised_bitmap456),
(("healthy", "cats", 2), &serialised_bitmap789), ((2, "healthy", "cats"), &serialised_bitmap789),
(("healthy", "cattos", 2), &serialised_bitmap_ranges), ((2, "healthy", "cattos"), &serialised_bitmap_ranges),
(("healthy", "arbre", 3), &serialised_bitmap456), ((3, "healthy", "arbre"), &serialised_bitmap456),
(("healthy", "arbres", 3), &serialised_bitmap789), ((3, "healthy", "arbres"), &serialised_bitmap789),
]; ];
let expected_result = [ let expected_result = [
(("healthy", "arb", 1), bitmap123.clone()), ((1, "healthy", "arb"), bitmap123.clone()),
(("healthy", "arbre", 1), bitmap123.clone()), ((1, "healthy", "arbre"), bitmap123.clone()),
(("healthy", "cat", 1), &bitmap456 | &bitmap123), ((1, "healthy", "cat"), &bitmap456 | &bitmap123),
(("healthy", "catto", 1), bitmap123.clone()), ((1, "healthy", "catto"), bitmap123.clone()),
(("jittery", "cat", 1), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)), ((1, "jittery", "cat"), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)),
(("healthy", "arb", 2), &bitmap123 | &bitmap456), ((2, "healthy", "arb"), &bitmap123 | &bitmap456),
(("healthy", "arbre", 2), &bitmap123 | &bitmap456), ((2, "healthy", "arbre"), &bitmap123 | &bitmap456),
(("healthy", "cat", 2), &bitmap789 | &bitmap_ranges), ((2, "healthy", "cat"), &bitmap789 | &bitmap_ranges),
(("healthy", "catto", 2), bitmap_ranges.clone()), ((2, "healthy", "catto"), bitmap_ranges.clone()),
]; ];
let mut result = vec![]; let mut result = vec![];
let mut iter = let mut iter =
IntoIterator::into_iter(word_pairs).map(|((word1, word2, proximity), data)| { IntoIterator::into_iter(word_pairs).map(|((proximity, word1, word2), data)| {
((word1.as_bytes(), word2.as_bytes(), proximity), data.as_slice()) ((proximity, word1.as_bytes(), word2.as_bytes()), data.as_slice())
}); });
execute_on_word_pairs_and_prefixes( execute_on_word_pairs_and_prefixes(
&mut iter, &mut iter,
@ -899,7 +899,7 @@ mod tests {
&prefixes, &prefixes,
2, 2,
|k, v| { |k, v| {
let (word1, prefix, proximity) = StrStrU8Codec::bytes_decode(k).unwrap(); let (word1, prefix, proximity) = U8StrStrCodec::bytes_decode(k).unwrap();
let bitmap = CboRoaringBitmapCodec::bytes_decode(v).unwrap(); let bitmap = CboRoaringBitmapCodec::bytes_decode(v).unwrap();
result.push(((word1.to_owned(), prefix.to_owned(), proximity.to_owned()), bitmap)); result.push(((word1.to_owned(), prefix.to_owned(), proximity.to_owned()), bitmap));
Ok(()) Ok(())
@ -908,8 +908,8 @@ mod tests {
.unwrap(); .unwrap();
for (x, y) in result.into_iter().zip(IntoIterator::into_iter(expected_result)) { for (x, y) in result.into_iter().zip(IntoIterator::into_iter(expected_result)) {
let ((actual_word1, actual_prefix, actual_proximity), actual_bitmap) = x; let ((actual_proximity, actual_word1, actual_prefix), actual_bitmap) = x;
let ((expected_word1, expected_prefix, expected_proximity), expected_bitmap) = y; let ((expected_proximity, expected_word1, expected_prefix), expected_bitmap) = y;
assert_eq!(actual_word1, expected_word1); assert_eq!(actual_word1, expected_word1);
assert_eq!(actual_prefix, expected_prefix); assert_eq!(actual_prefix, expected_prefix);