mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 03:55:07 +08:00
Remove proximity database, forcing us to remove phrase search and splitwords
This commit is contained in:
parent
5637978fe4
commit
42bbfebf70
@ -12,7 +12,8 @@ use super::Word;
|
|||||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||||
use crate::{
|
use crate::{
|
||||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec,
|
||||||
|
RoaringBitmapLenCodec, SearchContext,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A cache storing pointers to values in the LMDB databases.
|
/// A cache storing pointers to values in the LMDB databases.
|
||||||
@ -259,6 +260,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
|
unreachable!();
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, word1, word2),
|
(proximity, word1, word2),
|
||||||
@ -278,6 +280,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<u64>> {
|
) -> Result<Option<u64>> {
|
||||||
|
unreachable!();
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, word1, word2),
|
(proximity, word1, word2),
|
||||||
@ -291,12 +294,23 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_db_word_docids_len(&mut self, word: Interned<String>) -> Result<Option<u64>> {
|
||||||
|
DatabaseCache::get_value::<_, _, RoaringBitmapLenCodec>(
|
||||||
|
self.txn,
|
||||||
|
word,
|
||||||
|
self.word_interner.get(word).as_str(),
|
||||||
|
&mut self.db_cache.word_docids,
|
||||||
|
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
prefix2: Interned<String>,
|
prefix2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
|
unreachable!();
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, word1, prefix2),
|
(proximity, word1, prefix2),
|
||||||
@ -315,6 +329,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
right: Interned<String>,
|
right: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
|
unreachable!();
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
(proximity, left_prefix, right),
|
(proximity, left_prefix, right),
|
||||||
|
@ -295,11 +295,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
ranking_rules.push(Box::new(Typo::new(None)));
|
ranking_rules.push(Box::new(Typo::new(None)));
|
||||||
}
|
}
|
||||||
crate::Criterion::Proximity => {
|
crate::Criterion::Proximity => {
|
||||||
if proximity {
|
// if proximity {
|
||||||
continue;
|
continue;
|
||||||
}
|
// }
|
||||||
proximity = true;
|
// proximity = true;
|
||||||
ranking_rules.push(Box::new(Proximity::new(None)));
|
// ranking_rules.push(Box::new(Proximity::new(None)));
|
||||||
}
|
}
|
||||||
crate::Criterion::Attribute => {
|
crate::Criterion::Attribute => {
|
||||||
if attribute {
|
if attribute {
|
||||||
|
@ -265,11 +265,11 @@ pub fn partially_initialized_term_from_word(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Interned<Phrase>>> {
|
fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Interned<Phrase>>> {
|
||||||
if let Some((l, r)) = split_best_frequency(ctx, word)? {
|
// if let Some((l, r)) = split_best_frequency(ctx, word)? {
|
||||||
Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
|
// Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
|
||||||
} else {
|
// } else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Interned<QueryTerm> {
|
impl Interned<QueryTerm> {
|
||||||
@ -416,11 +416,20 @@ fn split_best_frequency(
|
|||||||
let left = ctx.word_interner.insert(left.to_owned());
|
let left = ctx.word_interner.insert(left.to_owned());
|
||||||
let right = ctx.word_interner.insert(right.to_owned());
|
let right = ctx.word_interner.insert(right.to_owned());
|
||||||
|
|
||||||
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
if let (Some(l_freq), Some(r_freq)) =
|
||||||
|
(ctx.get_db_word_docids_len(left)?, ctx.get_db_word_docids_len(right)?)
|
||||||
|
{
|
||||||
|
let frequency = l_freq.min(r_freq);
|
||||||
if best.map_or(true, |(old, _, _)| frequency > old) {
|
if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||||
best = Some((frequency, left, right));
|
best = Some((frequency, left, right));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
||||||
|
// if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||||
|
// best = Some((frequency, left, right));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(best.map(|(_, left, right)| (left, right)))
|
Ok(best.map(|(_, left, right)| (left, right)))
|
||||||
|
@ -82,41 +82,41 @@ pub fn located_query_terms_from_tokens(
|
|||||||
position = position.wrapping_add(7);
|
position = position.wrapping_add(7);
|
||||||
}
|
}
|
||||||
|
|
||||||
phrase = 'phrase: {
|
// phrase = 'phrase: {
|
||||||
let phrase = phrase.take();
|
// let phrase = phrase.take();
|
||||||
|
|
||||||
// If we have a hard separator inside a phrase, we immediately start a new phrase
|
// // If we have a hard separator inside a phrase, we immediately start a new phrase
|
||||||
let phrase = if separator_kind == SeparatorKind::Hard {
|
// let phrase = if separator_kind == SeparatorKind::Hard {
|
||||||
if let Some(phrase) = phrase {
|
// if let Some(phrase) = phrase {
|
||||||
if let Some(located_query_term) = phrase.build(ctx) {
|
// if let Some(located_query_term) = phrase.build(ctx) {
|
||||||
located_terms.push(located_query_term)
|
// located_terms.push(located_query_term)
|
||||||
}
|
// }
|
||||||
Some(PhraseBuilder::empty())
|
// Some(PhraseBuilder::empty())
|
||||||
} else {
|
// } else {
|
||||||
None
|
// None
|
||||||
}
|
// }
|
||||||
} else {
|
// } else {
|
||||||
phrase
|
// phrase
|
||||||
};
|
// };
|
||||||
|
|
||||||
// We close and start a new phrase depending on the number of double quotes
|
// // We close and start a new phrase depending on the number of double quotes
|
||||||
let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
|
// let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
|
||||||
if quote_count == 0 {
|
// if quote_count == 0 {
|
||||||
break 'phrase phrase;
|
// break 'phrase phrase;
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Consume the closing quote and the phrase
|
// // Consume the closing quote and the phrase
|
||||||
if let Some(phrase) = phrase {
|
// if let Some(phrase) = phrase {
|
||||||
// Per the check above, quote_count > 0
|
// // Per the check above, quote_count > 0
|
||||||
quote_count -= 1;
|
// quote_count -= 1;
|
||||||
if let Some(located_query_term) = phrase.build(ctx) {
|
// if let Some(located_query_term) = phrase.build(ctx) {
|
||||||
located_terms.push(located_query_term)
|
// located_terms.push(located_query_term)
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Start new phrase if the token ends with an opening quote
|
// // Start new phrase if the token ends with an opening quote
|
||||||
(quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
// (quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
||||||
};
|
// };
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user