mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
query exact_word_docids in resolve_query_tree
This commit is contained in:
parent
8d46a5b0b5
commit
c4c6e35352
@ -68,6 +68,7 @@ impl Default for Candidates {
|
|||||||
pub trait Context<'c> {
|
pub trait Context<'c> {
|
||||||
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
||||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
|
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
fn word_pair_proximity_docids(
|
fn word_pair_proximity_docids(
|
||||||
&self,
|
&self,
|
||||||
@ -118,6 +119,10 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||||||
self.index.word_docids.get(self.rtxn, &word)
|
self.index.word_docids.get(self.rtxn, &word)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
|
self.index.exact_word_docids.get(self.rtxn, &word)
|
||||||
|
}
|
||||||
|
|
||||||
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index.word_prefix_docids.get(self.rtxn, &word)
|
self.index.word_prefix_docids.get(self.rtxn, &word)
|
||||||
}
|
}
|
||||||
@ -400,11 +405,14 @@ fn query_docids(
|
|||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (word, _typo) in words {
|
for (word, _typo) in words {
|
||||||
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
||||||
docids |= current_docids;
|
let exact_current_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
|
||||||
|
docids |= current_docids | exact_current_docids;
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
} else {
|
} else {
|
||||||
Ok(ctx.word_docids(&word)?.unwrap_or_default())
|
let word_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
||||||
|
let exact_word_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
|
||||||
|
Ok(word_docids | exact_word_docids)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
QueryKind::Tolerant { typo, word } => {
|
QueryKind::Tolerant { typo, word } => {
|
||||||
@ -512,6 +520,7 @@ pub mod test {
|
|||||||
pub struct TestContext<'t> {
|
pub struct TestContext<'t> {
|
||||||
words_fst: fst::Set<Cow<'t, [u8]>>,
|
words_fst: fst::Set<Cow<'t, [u8]>>,
|
||||||
word_docids: HashMap<String, RoaringBitmap>,
|
word_docids: HashMap<String, RoaringBitmap>,
|
||||||
|
exact_word_docids: HashMap<String, RoaringBitmap>,
|
||||||
word_prefix_docids: HashMap<String, RoaringBitmap>,
|
word_prefix_docids: HashMap<String, RoaringBitmap>,
|
||||||
word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
|
word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
|
||||||
word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
|
word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
|
||||||
@ -527,6 +536,10 @@ pub mod test {
|
|||||||
Ok(self.word_docids.get(&word.to_string()).cloned())
|
Ok(self.word_docids.get(&word.to_string()).cloned())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
|
Ok(self.exact_word_docids.get(&word.to_string()).cloned())
|
||||||
|
}
|
||||||
|
|
||||||
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
|
Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
|
||||||
}
|
}
|
||||||
@ -643,6 +656,8 @@ pub mod test {
|
|||||||
s("morning") => random_postings(rng, 125),
|
s("morning") => random_postings(rng, 125),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let exact_word_docids = HashMap::new();
|
||||||
|
|
||||||
let mut docid_words = HashMap::new();
|
let mut docid_words = HashMap::new();
|
||||||
for (word, docids) in word_docids.iter() {
|
for (word, docids) in word_docids.iter() {
|
||||||
for docid in docids {
|
for docid in docids {
|
||||||
@ -712,6 +727,7 @@ pub mod test {
|
|||||||
TestContext {
|
TestContext {
|
||||||
words_fst,
|
words_fst,
|
||||||
word_docids,
|
word_docids,
|
||||||
|
exact_word_docids,
|
||||||
word_prefix_docids,
|
word_prefix_docids,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
|
@ -284,7 +284,7 @@ where
|
|||||||
let mut word_pair_proximity_docids = None;
|
let mut word_pair_proximity_docids = None;
|
||||||
let mut word_position_docids = None;
|
let mut word_position_docids = None;
|
||||||
let mut word_docids = None;
|
let mut word_docids = None;
|
||||||
let mut _exact_word_docids = None;
|
let mut exact_word_docids = None;
|
||||||
|
|
||||||
let mut databases_seen = 0;
|
let mut databases_seen = 0;
|
||||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
@ -299,7 +299,7 @@ where
|
|||||||
word_docids = Some(cloneable_chunk);
|
word_docids = Some(cloneable_chunk);
|
||||||
let cloneable_chunk =
|
let cloneable_chunk =
|
||||||
unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
|
unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
|
||||||
_exact_word_docids = Some(cloneable_chunk);
|
exact_word_docids = Some(cloneable_chunk);
|
||||||
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
|
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
|
||||||
}
|
}
|
||||||
TypedChunk::WordPairProximityDocids(chunk) => {
|
TypedChunk::WordPairProximityDocids(chunk) => {
|
||||||
@ -352,6 +352,7 @@ where
|
|||||||
|
|
||||||
self.execute_prefix_databases(
|
self.execute_prefix_databases(
|
||||||
word_docids,
|
word_docids,
|
||||||
|
exact_word_docids,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_position_docids,
|
word_position_docids,
|
||||||
)?;
|
)?;
|
||||||
@ -363,6 +364,7 @@ where
|
|||||||
pub fn execute_prefix_databases(
|
pub fn execute_prefix_databases(
|
||||||
self,
|
self,
|
||||||
word_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
word_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
||||||
|
exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
||||||
word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
||||||
word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
@ -433,7 +435,10 @@ where
|
|||||||
if let Some(word_docids) = word_docids {
|
if let Some(word_docids) = word_docids {
|
||||||
let mut word_docids_builder = grenad::MergerBuilder::new(merge_nothing as MergeFn);
|
let mut word_docids_builder = grenad::MergerBuilder::new(merge_nothing as MergeFn);
|
||||||
word_docids_builder.push(word_docids.into_cursor()?);
|
word_docids_builder.push(word_docids.into_cursor()?);
|
||||||
// TODO: push exact_word_docids
|
if let Some(exact_word_docids) = exact_word_docids {
|
||||||
|
word_docids_builder.push(exact_word_docids.into_cursor()?);
|
||||||
|
}
|
||||||
|
|
||||||
let word_docids_iter = word_docids_builder.build().into_stream_merger_iter()?;
|
let word_docids_iter = word_docids_builder.build().into_stream_merger_iter()?;
|
||||||
// Run the word prefix docids update operation.
|
// Run the word prefix docids update operation.
|
||||||
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
|
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
|
||||||
|
Loading…
Reference in New Issue
Block a user