diff --git a/meilidb-core/src/distinct_map.rs b/meilidb-core/src/distinct_map.rs index 3ac956245..c53ad0ea4 100644 --- a/meilidb-core/src/distinct_map.rs +++ b/meilidb-core/src/distinct_map.rs @@ -1,5 +1,4 @@ use std::hash::Hash; - use hashbrown::HashMap; pub struct DistinctMap { diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs index 72435ea46..0976fbde8 100644 --- a/meilidb-core/src/lib.rs +++ b/meilidb-core/src/lib.rs @@ -4,6 +4,7 @@ mod automaton; mod distinct_map; mod query_builder; +mod reordered_attrs; mod store; pub mod criterion; @@ -59,73 +60,53 @@ pub struct DocIndex { /// /// The order of the field is important because it defines /// the way these structures are ordered between themselves. -/// -/// The word in itself is not important. -// TODO do data oriented programming ? very arrays ? #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Match { - /// The word index in the query sentence. - /// Same as the `attribute_index` but for the query words. - /// - /// Used to retrieve the automaton that match this word. - pub query_index: u32, - - /// The distance the word has with the query word - /// (i.e. the Levenshtein distance). - pub distance: u8, - +pub struct Highlight { /// The attribute in the document where the word was found /// along with the index in it. pub attribute: u16, - pub word_index: u16, - /// Whether the word that match is an exact match or a prefix. - pub is_exact: bool, - - /// The position in bytes where the word was found - /// along with the length of it. + /// The position in bytes where the word was found. /// /// It informs on the original word area in the text indexed /// without needing to run the tokenizer again. pub char_index: u16, + + /// The length in bytes of the found word. + /// + /// It informs on the original word area in the text indexed + /// without needing to run the tokenizer again. pub char_length: u16, } -impl Match { - pub fn zero() -> Self { - Match { - query_index: 0, - distance: 0, - attribute: 0, - word_index: 0, - is_exact: false, - char_index: 0, - char_length: 0, - } - } - - pub fn max() -> Self { - Match { - query_index: u32::max_value(), - distance: u8::max_value(), - attribute: u16::max_value(), - word_index: u16::max_value(), - is_exact: true, - char_index: u16::max_value(), - char_length: u16::max_value(), - } - } +#[doc(hidden)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TmpMatch { + pub query_index: u32, + pub distance: u8, + pub attribute: u16, + pub word_index: u16, + pub is_exact: bool, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Document { pub id: DocumentId, - pub matches: Vec, + pub highlights: Vec, + + #[cfg(test)] + pub matches: Vec, } impl Document { - fn from_raw(raw: &RawDocument) -> Document { - let len = raw.matches.range.len(); + #[cfg(not(test))] + fn from_raw(raw: RawDocument) -> Document { + Document { id: raw.id, highlights: raw.highlights } + } + + #[cfg(test)] + fn from_raw(raw: RawDocument) -> Document { + let len = raw.query_index().len(); let mut matches = Vec::with_capacity(len); let query_index = raw.query_index(); @@ -133,23 +114,19 @@ impl Document { let attribute = raw.attribute(); let word_index = raw.word_index(); let is_exact = raw.is_exact(); - let char_index = raw.char_index(); - let char_length = raw.char_length(); for i in 0..len { - let match_ = Match { + let match_ = TmpMatch { query_index: query_index[i], distance: distance[i], attribute: attribute[i], word_index: word_index[i], is_exact: is_exact[i], - char_index: char_index[i], - char_length: char_length[i], }; matches.push(match_); } - Document { id: raw.id, matches } + Document { id: raw.id, matches, highlights: raw.highlights } } } @@ -157,11 +134,12 @@ impl Document { pub struct RawDocument { pub id: DocumentId, pub matches: SharedMatches, + pub highlights: Vec, } impl RawDocument { - fn new(id: DocumentId, range: Range, matches: Arc) -> RawDocument { - RawDocument { id, matches: SharedMatches { range, matches } } + fn new(id: DocumentId, matches: SharedMatches, highlights: Vec) -> RawDocument { + RawDocument { id, matches, highlights } } pub fn query_index(&self) -> &[u32] { @@ -198,20 +176,6 @@ impl RawDocument { // can only be done in this module unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) } } - - pub fn char_index(&self) -> &[u16] { - let r = self.matches.range; - // it is safe because construction/modifications - // can only be done in this module - unsafe { &self.matches.matches.char_index.get_unchecked(r.start..r.end) } - } - - pub fn char_length(&self) -> &[u16] { - let r = self.matches.range; - // it is safe because construction/modifications - // can only be done in this module - unsafe { &self.matches.matches.char_length.get_unchecked(r.start..r.end) } - } } impl fmt::Debug for RawDocument { @@ -223,27 +187,30 @@ impl fmt::Debug for RawDocument { .field("attribute", &self.attribute()) .field("word_index", &self.word_index()) .field("is_exact", &self.is_exact()) - .field("char_index", &self.char_index()) - .field("char_length", &self.char_length()) .finish() } } -pub fn raw_documents_from_matches(matches: SetBuf<(DocumentId, Match)>) -> Vec { - let mut docs_ranges = Vec::<(_, Range)>::new(); +fn raw_documents_from_matches(matches: SetBuf<(DocumentId, TmpMatch, Highlight)>) -> Vec { + let mut docs_ranges: Vec<(_, Range, _)> = Vec::new(); let mut matches2 = Matches::with_capacity(matches.len()); - for group in matches.linear_group_by(|(a, _), (b, _)| a == b) { - let id = group[0].0; - let start = docs_ranges.last().map(|(_, r)| r.end).unwrap_or(0); + for group in matches.linear_group_by(|(a, _, _), (b, _, _)| a == b) { + let document_id = group[0].0; + let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0); let end = start + group.len(); - docs_ranges.push((id, Range { start, end })); + + let highlights = group.iter().map(|(_, _, h)| *h).collect(); + docs_ranges.push((document_id, Range { start, end }, highlights)); matches2.extend_from_slice(group); } let matches = Arc::new(matches2); - docs_ranges.into_iter().map(|(i, r)| RawDocument::new(i, r, matches.clone())).collect() + docs_ranges.into_iter().map(|(i, range, highlights)| { + let matches = SharedMatches { range, matches: matches.clone() }; + RawDocument::new(i, matches, highlights) + }).collect() } #[derive(Debug, Copy, Clone)] @@ -252,12 +219,6 @@ struct Range { end: usize, } -impl Range { - fn len(self) -> usize { - self.end - self.start - } -} - #[derive(Clone)] pub struct SharedMatches { range: Range, @@ -271,8 +232,6 @@ struct Matches { attribute: Vec, word_index: Vec, is_exact: Vec, - char_index: Vec, - char_length: Vec, } impl Matches { @@ -283,25 +242,20 @@ impl Matches { attribute: Vec::with_capacity(cap), word_index: Vec::with_capacity(cap), is_exact: Vec::with_capacity(cap), - char_index: Vec::with_capacity(cap), - char_length: Vec::with_capacity(cap), } } - fn extend_from_slice(&mut self, matches: &[(DocumentId, Match)]) { - for (_, match_) in matches { + fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch, Highlight)]) { + for (_, match_, _) in matches { self.query_index.push(match_.query_index); self.distance.push(match_.distance); self.attribute.push(match_.attribute); self.word_index.push(match_.word_index); self.is_exact.push(match_.is_exact); - self.char_index.push(match_.char_index); - self.char_length.push(match_.char_length); } } } - #[cfg(test)] mod tests { use super::*; diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs index b02caa30c..43da389a8 100644 --- a/meilidb-core/src/query_builder.rs +++ b/meilidb-core/src/query_builder.rs @@ -5,7 +5,7 @@ use std::time::Instant; use std::{cmp, mem}; use fst::{Streamer, IntoStreamer}; -use hashbrown::{HashMap, HashSet}; +use hashbrown::HashMap; use log::info; use meilidb_tokenizer::{is_cjk, split_query_string}; use rayon::slice::ParallelSliceMut; @@ -17,7 +17,8 @@ use crate::automaton::{build_dfa, build_prefix_dfa}; use crate::distinct_map::{DistinctMap, BufferedDistinctMap}; use crate::criterion::Criteria; use crate::raw_documents_from_matches; -use crate::{Match, DocumentId, Store, RawDocument, Document}; +use crate::reordered_attrs::ReorderedAttrs; +use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document}; const NGRAMS: usize = 3; @@ -177,12 +178,12 @@ fn generate_automatons(query: &str, store: &S) -> Result bool> { store: S, criteria: Criteria<'c>, - searchable_attrs: Option>, + searchable_attrs: Option, filter: Option, } @@ -228,8 +229,8 @@ impl<'c, S, FI> QueryBuilder<'c, S, FI> } pub fn add_searchable_attribute(&mut self, attribute: u16) { - let attributes = self.searchable_attrs.get_or_insert_with(HashSet::new); - attributes.insert(attribute); + let reorders = self.searchable_attrs.get_or_insert_with(ReorderedAttrs::new); + reorders.insert_attribute(attribute); } } @@ -239,6 +240,7 @@ where S: Store, fn query_all(&self, query: &str) -> Result, S::Error> { let automatons = generate_automatons(query, &self.store)?; let words = self.store.words()?.as_fst(); + let searchables = self.searchable_attrs.as_ref(); let mut stream = { let mut op_builder = fst::raw::OpBuilder::new(); @@ -264,18 +266,21 @@ where S: Store, }; for di in doc_indexes.as_slice() { - if self.searchable_attrs.as_ref().map_or(true, |r| r.contains(&di.attribute)) { - let match_ = Match { + let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute)); + if let Some(attribute) = attribute { + let match_ = TmpMatch { query_index: query_index as u32, distance, - attribute: di.attribute, + attribute, word_index: di.word_index, is_exact, + }; + let highlight = Highlight { + attribute: di.attribute, char_index: di.char_index, char_length: di.char_length, }; - matches.push((di.document_id, match_)); - + matches.push((di.document_id, match_, highlight)); } } } @@ -286,7 +291,11 @@ where S: Store, rewrite_matched_positions(&mut matches); let total_matches = matches.len(); - let padded_matches = SetBuf::from_dirty(matches); + let padded_matches = { + matches.par_sort_unstable(); + matches.dedup(); + SetBuf::new_unchecked(matches) + }; let raw_documents = raw_documents_from_matches(padded_matches); info!("{} total documents to classify", raw_documents.len()); @@ -346,7 +355,7 @@ where S: Store, let offset = cmp::min(documents.len(), range.start); let iter = documents.into_iter().skip(offset).take(range.len()); - Ok(iter.map(|d| Document::from_raw(&d)).collect()) + Ok(iter.map(|d| Document::from_raw(d)).collect()) } } @@ -473,7 +482,7 @@ where S: Store, }; if distinct_accepted && seen.len() > range.start { - out_documents.push(Document::from_raw(&document)); + out_documents.push(Document::from_raw(document)); if out_documents.len() == range.len() { break } } } @@ -616,11 +625,11 @@ mod tests { let results = builder.query("iphone from apple", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); - assert_matches!(matches.next(), Some(Match { query_index: 1, word_index: 1, .. })); - assert_matches!(matches.next(), Some(Match { query_index: 2, word_index: 2, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -638,9 +647,9 @@ mod tests { let results = builder.query("hello", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -649,9 +658,9 @@ mod tests { let results = builder.query("bonjour", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -670,9 +679,9 @@ mod tests { let results = builder.query("sal", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -681,9 +690,9 @@ mod tests { let results = builder.query("bonj", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -713,9 +722,9 @@ mod tests { let results = builder.query("salutution", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -724,9 +733,9 @@ mod tests { let results = builder.query("saluttion", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -748,19 +757,19 @@ mod tests { let results = builder.query("hello", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 3, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 3, .. })); assert_matches!(matches.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 5, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 5, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -769,19 +778,19 @@ mod tests { let results = builder.query("bonjour", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 3, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 3, .. })); assert_matches!(matches.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 5, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 5, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -790,19 +799,19 @@ mod tests { let results = builder.query("salut", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); assert_matches!(matches.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 3, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 3, .. })); assert_matches!(matches.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => { let mut matches = matches.into_iter(); - assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 5, .. })); + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 5, .. })); assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -828,18 +837,18 @@ mod tests { let results = builder.query("NY subway", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); assert_matches!(iter.next(), None); @@ -848,18 +857,18 @@ mod tests { let results = builder.query("NYC subway", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); assert_matches!(iter.next(), None); @@ -888,18 +897,18 @@ mod tests { let results = builder.query("NY subway", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); assert_matches!(iter.next(), None); @@ -908,18 +917,18 @@ mod tests { let results = builder.query("NYC subway", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); assert_matches!(iter.next(), None); @@ -950,20 +959,20 @@ mod tests { let results = builder.query("NY subway broken", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // underground = subway - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // train = subway - assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 3, .. })); // broken + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // underground = subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // train = subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 3, .. })); // broken assert_matches!(iter.next(), None); // position rewritten ^ }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); }); assert_matches!(iter.next(), None); @@ -972,19 +981,19 @@ mod tests { let results = builder.query("NYC subway", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // underground = subway - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // train = subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // underground = subway + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // train = subway assert_matches!(iter.next(), None); // position rewritten ^ }); assert_matches!(iter.next(), None); @@ -1017,27 +1026,41 @@ mod tests { let results = builder.query("new york underground train broken", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, highlights }) => { + let mut matches = matches.into_iter(); + let mut highlights = highlights.into_iter(); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york + assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new + assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // york + assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 1, .. })); // underground + assert_matches!(highlights.next(), Some(Highlight { char_index: 2, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 2, .. })); // train + assert_matches!(highlights.next(), Some(Highlight { char_index: 3, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 3, .. })); // broken + assert_matches!(highlights.next(), Some(Highlight { char_index: 4, .. })); + + assert_matches!(matches.next(), None); + }); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // york - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // new - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 0, char_index: 1, .. })); // york - assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 1, char_index: 2, .. })); // underground - assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 2, char_index: 3, .. })); // train - assert_matches!(iter.next(), Some(Match { query_index: 4, word_index: 3, char_index: 4, .. })); // broken + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NYC = new york + assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, .. })); // subway = underground train + assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 3, .. })); // broken assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NYC = new york - assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 2, .. })); // subway = underground train - assert_matches!(iter.next(), Some(Match { query_index: 4, word_index: 3, .. })); // broken - assert_matches!(iter.next(), None); - }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY = new york - assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 1, .. })); // subway = underground train + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY = new york + assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 1, .. })); // subway = underground train assert_matches!(iter.next(), None); }); assert_matches!(iter.next(), None); @@ -1046,27 +1069,41 @@ mod tests { let results = builder.query("new york city underground train broken", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, highlights }) => { + let mut matches = matches.into_iter(); + let mut highlights = highlights.into_iter(); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york + assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new + assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // york + assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 1, .. })); // underground + assert_matches!(highlights.next(), Some(Highlight { char_index: 2, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 2, .. })); // train + assert_matches!(highlights.next(), Some(Highlight { char_index: 3, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 5, word_index: 3, .. })); // broken + assert_matches!(highlights.next(), Some(Highlight { char_index: 4, .. })); + + assert_matches!(matches.next(), None); + }); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // york - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // new - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 0, char_index: 1, .. })); // york - assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 1, char_index: 2, .. })); // underground - assert_matches!(iter.next(), Some(Match { query_index: 4, word_index: 2, char_index: 3, .. })); // train - assert_matches!(iter.next(), Some(Match { query_index: 5, word_index: 3, char_index: 4, .. })); // broken + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NYC = new york city + assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 2, .. })); // subway = underground train + assert_matches!(iter.next(), Some(TmpMatch { query_index: 5, word_index: 3, .. })); // broken assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NYC = new york city - assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 2, .. })); // subway = underground train - assert_matches!(iter.next(), Some(Match { query_index: 5, word_index: 3, .. })); // broken - assert_matches!(iter.next(), None); - }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY = new york city - assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 1, .. })); // subway = underground train + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY = new york city + assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 1, .. })); // subway = underground train assert_matches!(iter.next(), None); }); assert_matches!(iter.next(), None); @@ -1086,15 +1123,15 @@ mod tests { let results = builder.query("telephone", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, .. })); + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. })); assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, .. })); - assert_matches!(iter.next(), Some(Match { query_index: 0, .. })); + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. })); + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. })); assert_matches!(iter.next(), None); }); assert_matches!(iter.next(), None); @@ -1103,15 +1140,15 @@ mod tests { let results = builder.query("téléphone", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, .. })); + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. })); assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, .. })); - assert_matches!(iter.next(), Some(Match { query_index: 0, .. })); + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. })); + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. })); assert_matches!(iter.next(), None); }); assert_matches!(iter.next(), None); @@ -1120,14 +1157,14 @@ mod tests { let results = builder.query("télephone", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, .. })); + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. })); assert_matches!(iter.next(), None); }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, distance: 1, .. })); // téléphone + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, distance: 1, .. })); // téléphone assert_matches!(iter.next(), None); }); assert_matches!(iter.next(), None); @@ -1144,11 +1181,11 @@ mod tests { let results = builder.query("i phone case", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, distance: 0, .. })); // iphone - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 0, distance: 1, .. })); // phone - assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 1, distance: 0, .. })); // case + assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, distance: 0, .. })); // iphone + assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 0, distance: 1, .. })); // phone + assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 1, distance: 0, .. })); // case assert_matches!(iter.next(), None); }); assert_matches!(iter.next(), None); @@ -1167,11 +1204,17 @@ mod tests { let results = builder.query("portefeuille", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // porte - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // feuille - assert_matches!(iter.next(), None); + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, highlights }) => { + let mut matches = matches.into_iter(); + let mut highlights = highlights.into_iter(); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // porte + assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // feuille + assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. })); + + assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); @@ -1179,11 +1222,17 @@ mod tests { let results = builder.query("searchengine", 0..20).unwrap(); let mut iter = results.into_iter(); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // search - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // engine - assert_matches!(iter.next(), None); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, highlights }) => { + let mut matches = matches.into_iter(); + let mut highlights = highlights.into_iter(); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // search + assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. })); + + assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // engine + assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. })); + + assert_matches!(matches.next(), None); }); assert_matches!(iter.next(), None); } diff --git a/meilidb-core/src/reordered_attrs.rs b/meilidb-core/src/reordered_attrs.rs new file mode 100644 index 000000000..ad7b2c324 --- /dev/null +++ b/meilidb-core/src/reordered_attrs.rs @@ -0,0 +1,24 @@ +#[derive(Default)] +pub struct ReorderedAttrs { + count: usize, + reorders: Vec>, +} + +impl ReorderedAttrs { + pub fn new() -> ReorderedAttrs { + ReorderedAttrs { count: 0, reorders: Vec::new() } + } + + pub fn insert_attribute(&mut self, attribute: u16) { + self.reorders.resize(attribute as usize + 1, None); + self.reorders[attribute as usize] = Some(self.count as u16); + self.count += 1; + } + + pub fn get(&self, attribute: u16) -> Option { + match self.reorders.get(attribute as usize) { + Some(Some(attribute)) => Some(*attribute), + _ => None, + } + } +} diff --git a/meilidb/examples/query-database.rs b/meilidb/examples/query-database.rs index 1da6ebf0a..72244d1b8 100644 --- a/meilidb/examples/query-database.rs +++ b/meilidb/examples/query-database.rs @@ -11,7 +11,7 @@ use std::error::Error; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use structopt::StructOpt; -use meilidb_core::Match; +use meilidb_core::Highlight; use meilidb_data::Database; use meilidb_schema::SchemaAttr; @@ -71,12 +71,12 @@ fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) (byte_index, byte_length) } -fn create_highlight_areas(text: &str, matches: &[Match]) -> Vec { +fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec { let mut byte_indexes = BTreeMap::new(); - for match_ in matches { - let char_index = match_.char_index as usize; - let char_length = match_.char_length as usize; + for highlight in highlights { + let char_index = highlight.char_index as usize; + let char_length = highlight.char_length as usize; let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text); match byte_indexes.entry(byte_index) { @@ -111,26 +111,26 @@ fn create_highlight_areas(text: &str, matches: &[Match]) -> Vec { /// ``` fn crop_text( text: &str, - matches: impl IntoIterator, + highlights: impl IntoIterator, context: usize, -) -> (String, Vec) +) -> (String, Vec) { - let mut matches = matches.into_iter().peekable(); + let mut highlights = highlights.into_iter().peekable(); - let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0); + let char_index = highlights.peek().map(|m| m.char_index as usize).unwrap_or(0); let start = char_index.saturating_sub(context); let text = text.chars().skip(start).take(context * 2).collect(); - let matches = matches + let highlights = highlights .take_while(|m| { (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2) }) - .map(|match_| { - Match { char_index: match_.char_index - start as u16, ..match_ } + .map(|highlight| { + Highlight { char_index: highlight.char_index - start as u16, ..highlight } }) .collect(); - (text, matches) + (text, highlights) } fn main() -> Result<(), Box> { @@ -168,7 +168,7 @@ fn main() -> Result<(), Box> { let number_of_documents = documents.len(); for mut doc in documents { - doc.matches.sort_unstable_by_key(|m| (m.char_index, m.char_index)); + doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length)); let start_retrieve = Instant::now(); let result = index.document::(Some(&fields), doc.id); @@ -180,11 +180,11 @@ fn main() -> Result<(), Box> { print!("{}: ", name); let attr = schema.attribute(&name).unwrap(); - let matches = doc.matches.iter() + let highlights = doc.highlights.iter() .filter(|m| SchemaAttr::new(m.attribute) == attr) .cloned(); - let (text, matches) = crop_text(&text, matches, opt.char_context); - let areas = create_highlight_areas(&text, &matches); + let (text, highlights) = crop_text(&text, highlights, opt.char_context); + let areas = create_highlight_areas(&text, &highlights); display_highlights(&text, &areas)?; println!(); } @@ -194,8 +194,8 @@ fn main() -> Result<(), Box> { } let mut matching_attributes = HashSet::new(); - for _match in doc.matches { - let attr = SchemaAttr::new(_match.attribute); + for highlight in doc.highlights { + let attr = SchemaAttr::new(highlight.attribute); let name = schema.attribute_name(attr); matching_attributes.insert(name); }