From da8abebfa22e5a2972d16357b51c89d1a3ab0595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 13 Jan 2020 13:29:47 +0100 Subject: [PATCH] Introduce the query words mapping along with the query tree --- Cargo.lock | 10 + meilisearch-core/Cargo.toml | 1 + meilisearch-core/src/bucket_sort.rs | 3 +- meilisearch-core/src/lib.rs | 2 + meilisearch-core/src/query_tree.rs | 133 +++++-- meilisearch-core/src/query_words_mapper.rs | 415 +++++++++++++++++++++ 6 files changed, 523 insertions(+), 41 deletions(-) create mode 100644 meilisearch-core/src/query_words_mapper.rs diff --git a/Cargo.lock b/Cargo.lock index 6cdab9a30..46d3b0347 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -799,6 +799,14 @@ dependencies = [ "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "intervaltree" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "iovec" version = "0.1.4" @@ -952,6 +960,7 @@ dependencies = [ "hashbrown 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "heed 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "intervaltree 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", "jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2715,6 +2724,7 @@ dependencies = [ "checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" "checksum idna 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" "checksum indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712d7b3ea5827fcb9d4fda14bf4da5f136f0db2ae9c8f4bd4e2d1c6fde4e6db2" +"checksum intervaltree 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "af39074dd8d5eff756ddea3d8f34c7ae287d4dadb6f29fb1b67ca6b3f5036482" "checksum iovec 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" "checksum itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" "checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index a0d50ed01..8078bf52b 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -17,6 +17,7 @@ env_logger = "0.7.0" fst = { version = "0.3.5", default-features = false } hashbrown = { version = "0.6.0", features = ["serde"] } heed = "0.6.1" +intervaltree = "0.2.4" itertools = "0.8.2" # kill me please levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] } log = "0.4.8" diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index ba024da57..b8049987c 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -61,8 +61,9 @@ where prefix_postings_lists: prefix_postings_lists_cache_store, }; - let operation = create_query_tree(reader, &context, query).unwrap(); + let (operation, mapping) = create_query_tree(reader, &context, query).unwrap(); println!("{:?}", operation); + println!("{:?}", mapping); let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation).unwrap(); println!("found {} documents", docids.len()); diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index 755cb4759..fa16ed77a 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -11,6 +11,7 @@ mod levenshtein; mod number; mod query_builder; mod query_tree; +mod query_words_mapper; mod ranked_map; mod raw_document; mod reordered_attrs; @@ -28,6 +29,7 @@ pub use self::raw_document::RawDocument; pub use self::store::Index; pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType}; pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; +pub use query_words_mapper::QueryWordsMapper; use compact_arena::SmallArena; use crate::bucket_sort::{QueryWordAutomaton, PostingsListView}; diff --git a/meilisearch-core/src/query_tree.rs b/meilisearch-core/src/query_tree.rs index aa8467629..5eae8c3bd 100644 --- a/meilisearch-core/src/query_tree.rs +++ b/meilisearch-core/src/query_tree.rs @@ -1,5 +1,7 @@ use std::borrow::Cow; use std::collections::HashMap; +use std::hash::{Hash, Hasher}; +use std::ops::Range; use std::time::Instant; use std::{cmp, fmt, iter::once}; @@ -11,8 +13,9 @@ use fst::{IntoStreamer, Streamer}; use crate::database::MainT; use crate::{store, DocumentId, DocIndex, MResult}; use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa}; +use crate::QueryWordsMapper; -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, PartialEq, Eq, Hash)] pub enum Operation { And(Vec), Or(Vec), @@ -39,36 +42,49 @@ impl fmt::Debug for Operation { } } +impl Operation { + fn tolerant(id: QueryId, prefix: bool, s: &str) -> Operation { + Operation::Query(Query { id, prefix, kind: QueryKind::Tolerant(s.to_string()) }) + } + + fn exact(id: QueryId, prefix: bool, s: &str) -> Operation { + Operation::Query(Query { id, prefix, kind: QueryKind::Exact(s.to_string()) }) + } + + fn phrase2(id: QueryId, prefix: bool, (left, right): (&str, &str)) -> Operation { + Operation::Query(Query { id, prefix, kind: QueryKind::Phrase(vec![left.to_owned(), right.to_owned()]) }) + } +} + pub type QueryId = usize; -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Eq)] pub struct Query { pub id: QueryId, pub prefix: bool, pub kind: QueryKind, } -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +impl PartialEq for Query { + fn eq(&self, other: &Self) -> bool { + self.prefix == other.prefix && self.kind == other.kind + } +} + +impl Hash for Query { + fn hash(&self, state: &mut H) { + self.prefix.hash(state); + self.kind.hash(state); + } +} + +#[derive(Clone, PartialEq, Eq, Hash)] pub enum QueryKind { Tolerant(String), Exact(String), Phrase(Vec), } -impl Query { - fn tolerant(id: QueryId, prefix: bool, s: &str) -> Query { - Query { id, prefix, kind: QueryKind::Tolerant(s.to_string()) } - } - - fn exact(id: QueryId, prefix: bool, s: &str) -> Query { - Query { id, prefix, kind: QueryKind::Exact(s.to_string()) } - } - - fn phrase2(id: QueryId, prefix: bool, (left, right): (&str, &str)) -> Query { - Query { id, prefix, kind: QueryKind::Phrase(vec![left.to_owned(), right.to_owned()]) } - } -} - impl fmt::Debug for Query { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Query { id, prefix, kind } = self; @@ -151,54 +167,88 @@ where I: IntoIterator, const MAX_NGRAM: usize = 3; -pub fn create_query_tree(reader: &heed::RoTxn, ctx: &Context, query: &str) -> MResult { +pub fn create_query_tree( + reader: &heed::RoTxn, + ctx: &Context, + query: &str, +) -> MResult<(Operation, HashMap>)> +{ let query = query.to_lowercase(); - let words = query.linear_group_by_key(char::is_whitespace).map(ToOwned::to_owned); - let words = words.filter(|s| !s.contains(char::is_whitespace)).enumerate(); - let words: Vec<_> = words.collect(); + let words: Vec<_> = words.filter(|s| !s.contains(char::is_whitespace)).enumerate().collect(); + let mut mapper = QueryWordsMapper::new(words.iter().map(|(_, w)| w)); let mut ngrams = Vec::new(); for ngram in 1..=MAX_NGRAM { + let ngiter = words.windows(ngram).enumerate().map(|(i, group)| { - let before = words[..i].windows(1); - let after = words[i + ngram..].windows(1); - before.chain(Some(group)).chain(after) + let before = words[0..i].windows(1).enumerate().map(|(i, g)| (i..i+1, g)); + let after = words[i + ngram..].windows(1) + .enumerate() + .map(move |(j, g)| (i + j + ngram..i + j + ngram + 1, g)); + before.chain(Some((i..i + ngram, group))).chain(after) }); for group in ngiter { - let mut ops = Vec::new(); - for (is_last, words) in is_last(group) { + let mut ops = Vec::new(); + for (is_last, (range, words)) in is_last(group) { + let mut alts = Vec::new(); match words { [(id, word)] => { + let mut idgen = ((id + 1) * 100)..; + let phrase = split_best_frequency(reader, ctx, word)? - .map(|ws| Query::phrase2(*id, is_last, ws)) - .map(Operation::Query); + .map(|ws| { + let id = idgen.next().unwrap(); + idgen.next().unwrap(); + mapper.declare(range.clone(), id, &[ws.0, ws.1]); + Operation::phrase2(id, is_last, ws) + }); - let synonyms = fetch_synonyms(reader, ctx, &[word])?.into_iter().map(|alts| { - let iter = alts.into_iter().map(|w| Query::exact(*id, false, &w)).map(Operation::Query); - create_operation(iter, Operation::And) - }); + let synonyms = fetch_synonyms(reader, ctx, &[word])? + .into_iter() + .map(|alts| { + let id = idgen.next().unwrap(); + mapper.declare(range.clone(), id, &alts); - let query = Query::tolerant(*id, is_last, word); + let mut idgen = once(id).chain(&mut idgen); + let iter = alts.into_iter().map(|w| { + let id = idgen.next().unwrap(); + Operation::exact(id, false, &w) + }); - alts.push(Operation::Query(query)); + create_operation(iter, Operation::And) + }); + + let query = Operation::tolerant(*id, is_last, word); + + alts.push(query); alts.extend(synonyms.chain(phrase)); }, words => { let id = words[0].0; + let mut idgen = ((id + 1) * 100_usize.pow(ngram as u32))..; + let words: Vec<_> = words.iter().map(|(_, s)| s.as_str()).collect(); for synonym in fetch_synonyms(reader, ctx, &words)? { - let synonym = synonym.into_iter().map(|s| Operation::Query(Query::exact(id, false, &s))); - let synonym = create_operation(synonym, Operation::And); - alts.push(synonym); + let id = idgen.next().unwrap(); + mapper.declare(range.clone(), id, &synonym); + + let mut idgen = once(id).chain(&mut idgen); + let synonym = synonym.into_iter().map(|s| { + let id = idgen.next().unwrap(); + Operation::exact(id, false, &s) + }); + alts.push(create_operation(synonym, Operation::And)); } - let query = Query::exact(id, is_last, &words.concat()); - alts.push(Operation::Query(query)); + let id = idgen.next().unwrap(); + let concat = words.concat(); + alts.push(Operation::exact(id, is_last, &concat)); + mapper.declare(range.clone(), id, &[concat]); } } @@ -210,7 +260,10 @@ pub fn create_query_tree(reader: &heed::RoTxn, ctx: &Context, query: &str } } - Ok(create_operation(ngrams, Operation::Or)) + let mapping = mapper.mapping(); + let operation = create_operation(ngrams, Operation::Or); + + Ok((operation, mapping)) } pub type Postings<'o, 'txn> = HashMap<(&'o Query, Vec), Cow<'txn, Set>>; diff --git a/meilisearch-core/src/query_words_mapper.rs b/meilisearch-core/src/query_words_mapper.rs new file mode 100644 index 000000000..b9816a347 --- /dev/null +++ b/meilisearch-core/src/query_words_mapper.rs @@ -0,0 +1,415 @@ +use std::collections::HashMap; +use std::iter::FromIterator; +use std::ops::Range; +use intervaltree::{Element, IntervalTree}; + +pub type QueryId = usize; + +pub struct QueryWordsMapper { + originals: Vec, + mappings: HashMap, Vec)>, +} + +impl QueryWordsMapper { + pub fn new(originals: I) -> QueryWordsMapper + where I: IntoIterator, + A: ToString, + { + let originals = originals.into_iter().map(|s| s.to_string()).collect(); + QueryWordsMapper { originals, mappings: HashMap::new() } + } + + pub fn declare(&mut self, range: Range, id: QueryId, replacement: I) + where I: IntoIterator, + A: ToString, + { + assert!(range.len() != 0); + assert!(self.originals.get(range.clone()).is_some()); + assert!(id >= self.originals.len()); + + let replacement: Vec<_> = replacement.into_iter().map(|s| s.to_string()).collect(); + + assert!(!replacement.is_empty()); + + // We detect words at the end and at the front of the + // replacement that are common with the originals: + // + // x a b c d e f g + // ^^^/ \^^^ + // a b x c d k j e f + // ^^^ ^^^ + // + + let left = &self.originals[..range.start]; + let right = &self.originals[range.end..]; + + let common_left = longest_common_prefix(left, &replacement); + let common_right = longest_common_prefix(&replacement, right); + + for i in 0..common_left { + let range = range.start - common_left + i..range.start - common_left + i + 1; + let replacement = vec![replacement[i].clone()]; + self.mappings.insert(id + i, (range, replacement)); + } + + { + let replacement = replacement[common_left..replacement.len() - common_right].iter().cloned().collect(); + self.mappings.insert(id + common_left, (range.clone(), replacement)); + } + + for i in 0..common_right { + let id = id + replacement.len() - common_right + i; + let range = range.end + i..range.end + i + 1; + let replacement = vec![replacement[replacement.len() - common_right + i].clone()]; + self.mappings.insert(id, (range, replacement)); + } + } + + pub fn mapping(self) -> HashMap> { + let mappings = self.mappings.into_iter().map(|(i, (r, v))| (r, (i, v))); + let intervals = IntervalTree::from_iter(mappings); + + let mut output = HashMap::new(); + let mut offset = 0; + + // We map each original word to the biggest number of + // associated words. + for i in 0..self.originals.len() { + let max = intervals.query_point(i) + .filter_map(|e| { + if e.range.end - 1 == i { + let len = e.value.1.iter().skip(i - e.range.start).count(); + if len != 0 { Some(len) } else { None } + } else { None } + }) + .max() + .unwrap_or(1); + + let range = i + offset..i + offset + max; + output.insert(i, range); + offset += max - 1; + } + + // We retrieve the range that each original word + // is mapped to and apply it to each of the words. + for i in 0..self.originals.len() { + + let iter = intervals.query_point(i).filter(|e| e.range.end - 1 == i); + for Element { range, value: (id, words) } in iter { + + // We ask for the complete range mapped to the area we map. + let start = output.get(&range.start).map(|r| r.start).unwrap_or(range.start); + let end = output.get(&(range.end - 1)).map(|r| r.end).unwrap_or(range.end); + let range = start..end; + + // We map each query id to one word until the last, + // we map it to the remainings words. + let add = range.len() - words.len(); + for (j, x) in range.take(words.len()).enumerate() { + let add = if j == words.len() - 1 { add } else { 0 }; // is last? + let range = x..x + 1 + add; + output.insert(id + j, range); + } + } + } + + output + } +} + +fn longest_common_prefix(a: &[T], b: &[T]) -> usize { + let mut best = None; + for i in (0..a.len()).rev() { + let count = a[i..].iter().zip(b).take_while(|(a, b)| a == b).count(); + best = match best { + Some(old) if count > old => Some(count), + Some(_) => break, + None => Some(count), + }; + } + best.unwrap_or(0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn original_unmodified() { + let query = ["new", "york", "city", "subway"]; + // 0 1 2 3 + let mut builder = QueryWordsMapper::new(&query); + + // new york = new york city + builder.declare(0..2, 4, &["new", "york", "city"]); + // ^ 4 5 6 + + // new = new york city + builder.declare(0..1, 7, &["new", "york", "city"]); + // ^ 7 8 9 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // new + assert_eq!(mapping[&1], 1..2); // york + assert_eq!(mapping[&2], 2..3); // city + assert_eq!(mapping[&3], 3..4); // subway + + assert_eq!(mapping[&4], 0..1); // new + assert_eq!(mapping[&5], 1..2); // york + assert_eq!(mapping[&6], 2..3); // city + + assert_eq!(mapping[&7], 0..1); // new + assert_eq!(mapping[&8], 1..2); // york + assert_eq!(mapping[&9], 2..3); // city + } + + #[test] + fn original_unmodified2() { + let query = ["new", "york", "city", "subway"]; + // 0 1 2 3 + let mut builder = QueryWordsMapper::new(&query); + + // city subway = new york city underground train + builder.declare(2..4, 4, &["new", "york", "city", "underground", "train"]); + // ^ 4 5 6 7 8 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // new + assert_eq!(mapping[&1], 1..2); // york + assert_eq!(mapping[&2], 2..3); // city + assert_eq!(mapping[&3], 3..5); // subway + + assert_eq!(mapping[&4], 0..1); // new + assert_eq!(mapping[&5], 1..2); // york + assert_eq!(mapping[&6], 2..3); // city + assert_eq!(mapping[&7], 3..4); // underground + assert_eq!(mapping[&8], 4..5); // train + } + + #[test] + fn original_unmodified3() { + let query = ["a", "b", "x", "x", "a", "b", "c", "d", "e", "f", "g"]; + // 0 1 2 3 4 5 6 7 8 9 10 + let mut builder = QueryWordsMapper::new(&query); + + // c d = a b x c d k j e f + builder.declare(6..8, 11, &["a", "b", "x", "c", "d", "k", "j", "e", "f"]); + // ^^ 11 12 13 14 15 16 17 18 19 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // a + assert_eq!(mapping[&1], 1..2); // b + assert_eq!(mapping[&2], 2..3); // x + assert_eq!(mapping[&3], 3..4); // x + assert_eq!(mapping[&4], 4..5); // a + assert_eq!(mapping[&5], 5..6); // b + assert_eq!(mapping[&6], 6..7); // c + assert_eq!(mapping[&7], 7..11); // d + assert_eq!(mapping[&8], 11..12); // e + assert_eq!(mapping[&9], 12..13); // f + assert_eq!(mapping[&10], 13..14); // g + + assert_eq!(mapping[&11], 4..5); // a + assert_eq!(mapping[&12], 5..6); // b + assert_eq!(mapping[&13], 6..7); // x + assert_eq!(mapping[&14], 7..8); // c + assert_eq!(mapping[&15], 8..9); // d + assert_eq!(mapping[&16], 9..10); // k + assert_eq!(mapping[&17], 10..11); // j + assert_eq!(mapping[&18], 11..12); // e + assert_eq!(mapping[&19], 12..13); // f + } + + #[test] + fn simple_growing() { + let query = ["new", "york", "subway"]; + // 0 1 2 + let mut builder = QueryWordsMapper::new(&query); + + // new york = new york city + builder.declare(0..2, 3, &["new", "york", "city"]); + // ^ 3 4 5 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // new + assert_eq!(mapping[&1], 1..3); // york + assert_eq!(mapping[&2], 3..4); // subway + assert_eq!(mapping[&3], 0..1); // new + assert_eq!(mapping[&4], 1..2); // york + assert_eq!(mapping[&5], 2..3); // city + } + + #[test] + fn same_place_growings() { + let query = ["NY", "subway"]; + // 0 1 + let mut builder = QueryWordsMapper::new(&query); + + // NY = new york + builder.declare(0..1, 2, &["new", "york"]); + // ^ 2 3 + + // NY = new york city + builder.declare(0..1, 4, &["new", "york", "city"]); + // ^ 4 5 6 + + // NY = NYC + builder.declare(0..1, 7, &["NYC"]); + // ^ 7 + + // NY = new york city + builder.declare(0..1, 8, &["new", "york", "city"]); + // ^ 8 9 10 + + // subway = underground train + builder.declare(1..2, 11, &["underground", "train"]); + // ^ 11 12 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..3); // NY + assert_eq!(mapping[&1], 3..5); // subway + assert_eq!(mapping[&2], 0..1); // new + assert_eq!(mapping[&3], 1..3); // york + assert_eq!(mapping[&4], 0..1); // new + assert_eq!(mapping[&5], 1..2); // york + assert_eq!(mapping[&6], 2..3); // city + assert_eq!(mapping[&7], 0..3); // NYC + assert_eq!(mapping[&8], 0..1); // new + assert_eq!(mapping[&9], 1..2); // york + assert_eq!(mapping[&10], 2..3); // city + assert_eq!(mapping[&11], 3..4); // underground + assert_eq!(mapping[&12], 4..5); // train + } + + #[test] + fn bigger_growing() { + let query = ["NYC", "subway"]; + // 0 1 + let mut builder = QueryWordsMapper::new(&query); + + // NYC = new york city + builder.declare(0..1, 2, &["new", "york", "city"]); + // ^ 2 3 4 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..3); // NYC + assert_eq!(mapping[&1], 3..4); // subway + assert_eq!(mapping[&2], 0..1); // new + assert_eq!(mapping[&3], 1..2); // york + assert_eq!(mapping[&4], 2..3); // city + } + + #[test] + fn middle_query_growing() { + let query = ["great", "awesome", "NYC", "subway"]; + // 0 1 2 3 + let mut builder = QueryWordsMapper::new(&query); + + // NYC = new york city + builder.declare(2..3, 4, &["new", "york", "city"]); + // ^ 4 5 6 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // great + assert_eq!(mapping[&1], 1..2); // awesome + assert_eq!(mapping[&2], 2..5); // NYC + assert_eq!(mapping[&3], 5..6); // subway + assert_eq!(mapping[&4], 2..3); // new + assert_eq!(mapping[&5], 3..4); // york + assert_eq!(mapping[&6], 4..5); // city + } + + #[test] + fn end_query_growing() { + let query = ["NYC", "subway"]; + // 0 1 + let mut builder = QueryWordsMapper::new(&query); + + // NYC = new york city + builder.declare(1..2, 2, &["underground", "train"]); + // ^ 2 3 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // NYC + assert_eq!(mapping[&1], 1..3); // subway + assert_eq!(mapping[&2], 1..2); // underground + assert_eq!(mapping[&3], 2..3); // train + } + + #[test] + fn multiple_growings() { + let query = ["great", "awesome", "NYC", "subway"]; + // 0 1 2 3 + let mut builder = QueryWordsMapper::new(&query); + + // NYC = new york city + builder.declare(2..3, 4, &["new", "york", "city"]); + // ^ 4 5 6 + + // subway = underground train + builder.declare(3..4, 7, &["underground", "train"]); + // ^ 7 8 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // great + assert_eq!(mapping[&1], 1..2); // awesome + assert_eq!(mapping[&2], 2..5); // NYC + assert_eq!(mapping[&3], 5..7); // subway + assert_eq!(mapping[&4], 2..3); // new + assert_eq!(mapping[&5], 3..4); // york + assert_eq!(mapping[&6], 4..5); // city + assert_eq!(mapping[&7], 5..6); // underground + assert_eq!(mapping[&8], 6..7); // train + } + + #[test] + fn multiple_probable_growings() { + let query = ["great", "awesome", "NYC", "subway"]; + // 0 1 2 3 + let mut builder = QueryWordsMapper::new(&query); + + // NYC = new york city + builder.declare(2..3, 4, &["new", "york", "city"]); + // ^ 4 5 6 + + // subway = underground train + builder.declare(3..4, 7, &["underground", "train"]); + // ^ 7 8 + + // great awesome = good + builder.declare(0..2, 9, &["good"]); + // ^ 9 + + // awesome NYC = NY + builder.declare(1..3, 10, &["NY"]); + // ^^ 10 + + // NYC subway = metro + builder.declare(2..4, 11, &["metro"]); + // ^^ 11 + + let mapping = builder.mapping(); + + assert_eq!(mapping[&0], 0..1); // great + assert_eq!(mapping[&1], 1..2); // awesome + assert_eq!(mapping[&2], 2..5); // NYC + assert_eq!(mapping[&3], 5..7); // subway + assert_eq!(mapping[&4], 2..3); // new + assert_eq!(mapping[&5], 3..4); // york + assert_eq!(mapping[&6], 4..5); // city + assert_eq!(mapping[&7], 5..6); // underground + assert_eq!(mapping[&8], 6..7); // train + assert_eq!(mapping[&9], 0..2); // good + assert_eq!(mapping[&10], 1..5); // NY + assert_eq!(mapping[&11], 2..7); // metro + } +}