mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 19:45:05 +08:00
Remove proximity database, forcing us to remove phrase search and splitwords
This commit is contained in:
parent
5637978fe4
commit
42bbfebf70
@ -12,7 +12,8 @@ use super::Word;
|
||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec,
|
||||
RoaringBitmapLenCodec, SearchContext,
|
||||
};
|
||||
|
||||
/// A cache storing pointers to values in the LMDB databases.
|
||||
@ -259,6 +260,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
@ -278,6 +280,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<u64>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
@ -291,12 +294,23 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_docids_len(&mut self, word: Interned<String>) -> Result<Option<u64>> {
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||
&mut self,
|
||||
word1: Interned<String>,
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, prefix2),
|
||||
@ -315,6 +329,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
unreachable!();
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, left_prefix, right),
|
||||
|
@ -295,11 +295,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
ranking_rules.push(Box::new(Typo::new(None)));
|
||||
}
|
||||
crate::Criterion::Proximity => {
|
||||
if proximity {
|
||||
// if proximity {
|
||||
continue;
|
||||
}
|
||||
proximity = true;
|
||||
ranking_rules.push(Box::new(Proximity::new(None)));
|
||||
// }
|
||||
// proximity = true;
|
||||
// ranking_rules.push(Box::new(Proximity::new(None)));
|
||||
}
|
||||
crate::Criterion::Attribute => {
|
||||
if attribute {
|
||||
|
@ -265,11 +265,11 @@ pub fn partially_initialized_term_from_word(
|
||||
}
|
||||
|
||||
fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Interned<Phrase>>> {
|
||||
if let Some((l, r)) = split_best_frequency(ctx, word)? {
|
||||
Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
|
||||
} else {
|
||||
// if let Some((l, r)) = split_best_frequency(ctx, word)? {
|
||||
// Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
|
||||
// } else {
|
||||
Ok(None)
|
||||
}
|
||||
// }
|
||||
}
|
||||
|
||||
impl Interned<QueryTerm> {
|
||||
@ -416,11 +416,20 @@ fn split_best_frequency(
|
||||
let left = ctx.word_interner.insert(left.to_owned());
|
||||
let right = ctx.word_interner.insert(right.to_owned());
|
||||
|
||||
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
||||
if let (Some(l_freq), Some(r_freq)) =
|
||||
(ctx.get_db_word_docids_len(left)?, ctx.get_db_word_docids_len(right)?)
|
||||
{
|
||||
let frequency = l_freq.min(r_freq);
|
||||
if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||
best = Some((frequency, left, right));
|
||||
}
|
||||
}
|
||||
|
||||
// if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
||||
// if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||
// best = Some((frequency, left, right));
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
Ok(best.map(|(_, left, right)| (left, right)))
|
||||
|
@ -82,41 +82,41 @@ pub fn located_query_terms_from_tokens(
|
||||
position = position.wrapping_add(7);
|
||||
}
|
||||
|
||||
phrase = 'phrase: {
|
||||
let phrase = phrase.take();
|
||||
// phrase = 'phrase: {
|
||||
// let phrase = phrase.take();
|
||||
|
||||
// If we have a hard separator inside a phrase, we immediately start a new phrase
|
||||
let phrase = if separator_kind == SeparatorKind::Hard {
|
||||
if let Some(phrase) = phrase {
|
||||
if let Some(located_query_term) = phrase.build(ctx) {
|
||||
located_terms.push(located_query_term)
|
||||
}
|
||||
Some(PhraseBuilder::empty())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
phrase
|
||||
};
|
||||
// // If we have a hard separator inside a phrase, we immediately start a new phrase
|
||||
// let phrase = if separator_kind == SeparatorKind::Hard {
|
||||
// if let Some(phrase) = phrase {
|
||||
// if let Some(located_query_term) = phrase.build(ctx) {
|
||||
// located_terms.push(located_query_term)
|
||||
// }
|
||||
// Some(PhraseBuilder::empty())
|
||||
// } else {
|
||||
// None
|
||||
// }
|
||||
// } else {
|
||||
// phrase
|
||||
// };
|
||||
|
||||
// We close and start a new phrase depending on the number of double quotes
|
||||
let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
|
||||
if quote_count == 0 {
|
||||
break 'phrase phrase;
|
||||
}
|
||||
// // We close and start a new phrase depending on the number of double quotes
|
||||
// let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
|
||||
// if quote_count == 0 {
|
||||
// break 'phrase phrase;
|
||||
// }
|
||||
|
||||
// Consume the closing quote and the phrase
|
||||
if let Some(phrase) = phrase {
|
||||
// Per the check above, quote_count > 0
|
||||
quote_count -= 1;
|
||||
if let Some(located_query_term) = phrase.build(ctx) {
|
||||
located_terms.push(located_query_term)
|
||||
}
|
||||
}
|
||||
// // Consume the closing quote and the phrase
|
||||
// if let Some(phrase) = phrase {
|
||||
// // Per the check above, quote_count > 0
|
||||
// quote_count -= 1;
|
||||
// if let Some(located_query_term) = phrase.build(ctx) {
|
||||
// located_terms.push(located_query_term)
|
||||
// }
|
||||
// }
|
||||
|
||||
// Start new phrase if the token ends with an opening quote
|
||||
(quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
||||
};
|
||||
// // Start new phrase if the token ends with an opening quote
|
||||
// (quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
||||
// };
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user