diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 413e9c732..1b186b8b8 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::collections::HashMap; use std::collections::HashSet; use std::convert::TryFrom; use std::mem; @@ -28,7 +29,8 @@ use crate::distinct_map::{BufferedDistinctMap, DistinctMap}; use crate::raw_document::RawDocument; use crate::{database::MainT, reordered_attrs::ReorderedAttrs}; use crate::{store, Document, DocumentId, MResult}; -use crate::query_tree::{create_query_tree, traverse_query_tree, QueryResult, PostingsKey}; +use crate::query_tree::{create_query_tree, traverse_query_tree}; +use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey}; use crate::query_tree::Context as QTContext; use crate::store::Postings; @@ -88,6 +90,17 @@ where println!("{:?}", operation); println!("{:?}", mapping); + fn recurs_operation<'o>(map: &mut HashMap, operation: &'o Operation) { + match operation { + Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)), + Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)), + Operation::Query(query) => { map.insert(query.id, &query.kind); }, + } + } + + let mut queries_kinds = HashMap::new(); + recurs_operation(&mut queries_kinds, &operation); + let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation).unwrap(); println!("found {} documents", docids.len()); println!("number of postings {:?}", queries.len()); @@ -99,7 +112,6 @@ where mk_arena!(arena); for (PostingsKey{ query, input, distance, is_exact }, matches) in queries { - let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); let pllen = postings_list_view.len() as f32; @@ -126,7 +138,6 @@ where } } else { - let mut offset = 0; for id in docids.as_slice() { let di = DocIndex { document_id: *id, ..DocIndex::default() }; @@ -234,7 +245,7 @@ where debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed)); let iter = raw_documents.into_iter().skip(range.start).take(range.len()); - let iter = iter.map(|rd| Document::from_raw(rd, &arena, searchable_attrs.as_ref())); + let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref())); let documents = iter.collect(); debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed()); diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index a2722488a..195848777 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -31,9 +31,13 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; pub use query_words_mapper::QueryWordsMapper; +use std::convert::TryFrom; +use std::collections::HashMap; use compact_arena::SmallArena; + use crate::bucket_sort::PostingsListView; use crate::levenshtein::prefix_damerau_levenshtein; +use crate::query_tree::{QueryId, QueryKind}; use crate::reordered_attrs::ReorderedAttrs; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -47,6 +51,7 @@ pub struct Document { fn highlights_from_raw_document<'a, 'tag, 'txn>( raw_document: &RawDocument<'a, 'tag>, + queries_kinds: &HashMap, arena: &SmallArena<'tag, PostingsListView<'txn>>, searchable_attrs: Option<&ReorderedAttrs>, ) -> Vec @@ -56,14 +61,20 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>( for bm in raw_document.bare_matches.iter() { let postings_list = &arena[bm.postings_list]; let input = postings_list.input(); - // let query = &automatons[bm.query_index as usize].query; + let kind = &queries_kinds.get(&bm.query_index); for di in postings_list.iter() { - // let covered_area = if query.len() > input.len() { - // input.len() - // } else { - // prefix_damerau_levenshtein(query.as_bytes(), input).1 - // }; + let covered_area = match kind { + Some(QueryKind::Exact(query)) | Some(QueryKind::Tolerant(query)) => { + let len = if query.len() > input.len() { + input.len() + } else { + prefix_damerau_levenshtein(query.as_bytes(), input).1 + }; + u16::try_from(len).unwrap_or(u16::max_value()) + }, + _ => di.char_length, + }; let attribute = searchable_attrs .and_then(|sa| sa.reverse(di.attribute)) @@ -72,7 +83,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>( let highlight = Highlight { attribute: attribute, char_index: di.char_index, - char_length: di.char_length, + char_length: covered_area, }; highlights.push(highlight); @@ -96,12 +107,14 @@ impl Document { #[cfg(not(test))] pub fn from_raw<'a, 'tag, 'txn>( raw_document: RawDocument<'a, 'tag>, + queries_kinds: &HashMap, arena: &SmallArena<'tag, PostingsListView<'txn>>, searchable_attrs: Option<&ReorderedAttrs>, ) -> Document { let highlights = highlights_from_raw_document( &raw_document, + queries_kinds, arena, searchable_attrs, ); @@ -112,6 +125,7 @@ impl Document { #[cfg(test)] pub fn from_raw<'a, 'tag, 'txn>( raw_document: RawDocument<'a, 'tag>, + queries_kinds: &HashMap, arena: &SmallArena<'tag, PostingsListView<'txn>>, searchable_attrs: Option<&ReorderedAttrs>, ) -> Document @@ -120,6 +134,7 @@ impl Document { let highlights = highlights_from_raw_document( &raw_document, + queries_kinds, arena, searchable_attrs, ); diff --git a/meilisearch-core/src/query_tree.rs b/meilisearch-core/src/query_tree.rs index 089eaa3af..5467ad4df 100644 --- a/meilisearch-core/src/query_tree.rs +++ b/meilisearch-core/src/query_tree.rs @@ -285,7 +285,6 @@ pub struct PostingsKey<'o> { pub is_exact: bool, } -pub type Distance = u8; pub type Postings<'o, 'txn> = HashMap, Cow<'txn, Set>>; pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set>>;