mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Fix bug in encoding of word_position_docids and word_fid_docids
This commit is contained in:
parent
bd9aba4d77
commit
84d9c731f8
@ -45,11 +45,12 @@ impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let footer_len = size_of::<u16>();
|
||||
|
||||
if bytes.len() < footer_len {
|
||||
if bytes.len() < footer_len + 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let (_, word) = word_plus_nul_byte.split_last()?;
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
|
||||
@ -63,8 +64,9 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + pos.len());
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
|
@ -126,9 +126,9 @@ pub struct Index {
|
||||
|
||||
/// Maps the field id and the word count with the docids that corresponds to it.
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
||||
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word and the field id with the docids that corresponds to it.
|
||||
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the script and language with all the docids that corresponds to it.
|
||||
|
@ -261,22 +261,6 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub fn get_db_word_position_docids(
|
||||
&mut self,
|
||||
word: Interned<String>,
|
||||
position: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
self.txn,
|
||||
(word, position),
|
||||
&(self.word_interner.get(word).as_str(), position),
|
||||
&mut self.db_cache.word_position_docids,
|
||||
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub fn get_db_word_fid_docids(
|
||||
&mut self,
|
||||
word: Interned<String>,
|
||||
@ -361,6 +345,22 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
Ok(fids)
|
||||
}
|
||||
|
||||
pub fn get_db_word_position_docids(
|
||||
&mut self,
|
||||
word: Interned<String>,
|
||||
position: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value(
|
||||
self.txn,
|
||||
(word, position),
|
||||
&(self.word_interner.get(word).as_str(), position),
|
||||
&mut self.db_cache.word_position_docids,
|
||||
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
|
||||
)?
|
||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_position_docids(
|
||||
&mut self,
|
||||
word_prefix: Interned<String>,
|
||||
|
@ -1,4 +1,6 @@
|
||||
use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{
|
||||
db_snap, index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -6,7 +8,7 @@ fn create_index() -> TempIndex {
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_searchable_fields(vec!["text".to_owned(), "other".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Attribute]);
|
||||
})
|
||||
.unwrap();
|
||||
@ -33,20 +35,84 @@ fn create_index() -> TempIndex {
|
||||
"id": 4,
|
||||
"text": "the quick brown fox",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
the quick brown fox",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"text": "quick a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
brown",
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
quick brown",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_attribute_fid_simple() {
|
||||
fn test_attribute_position_simple() {
|
||||
let index = create_index();
|
||||
|
||||
db_snap!(index, word_position_docids, @"fe86911166fa4c0903c512fd86ec65e4");
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("quick brown");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 4, 2, 1, 0, 6, 7, 5]");
|
||||
}
|
||||
#[test]
|
||||
fn test_attribute_position_repeated() {
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox");
|
||||
s.query("a a a a a");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 4, 2, 1, 0]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 7, 6]");
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||
for position in read_u32_ne_bytes(value) {
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(word_bytes);
|
||||
key_buffer.push(0);
|
||||
let (fid, _) = relative_from_absolute_position(position);
|
||||
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
word_fid_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
|
||||
|
@ -39,6 +39,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
for position in read_u32_ne_bytes(value) {
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(word_bytes);
|
||||
key_buffer.push(0);
|
||||
let (_, position) = relative_from_absolute_position(position);
|
||||
let position = bucketed_position(position);
|
||||
key_buffer.extend_from_slice(&position.to_be_bytes());
|
||||
|
Loading…
Reference in New Issue
Block a user