meilisearch/meilisearch-core/src/automaton/mod.rs

mod dfa;
mod query_enhancer;

use std::cmp::Reverse;
use std::{cmp, fmt, vec};

use fst::{IntoStreamer, Streamer};
use levenshtein_automata::DFA;
use meilisearch_tokenizer::{is_cjk, split_query_string};

use crate::database::MainT;
use crate::error::MResult;
use crate::store;

use self::dfa::{build_dfa, build_prefix_dfa};
pub use self::query_enhancer::QueryEnhancer;
use self::query_enhancer::QueryEnhancerBuilder;

const NGRAMS: usize = 3;

pub struct AutomatonProducer {
    automatons: Vec<AutomatonGroup>,
}

impl AutomatonProducer {
    pub fn new(
        reader: &heed::RoTxn<MainT>,
        query: &str,
        main_store: store::Main,
        postings_list_store: store::PostingsLists,
        synonyms_store: store::Synonyms,
    ) -> MResult<(AutomatonProducer, QueryEnhancer)> {
        let (automatons, query_enhancer) = generate_automatons(
            reader,
            query,
            main_store,
            postings_list_store,
            synonyms_store,
        )?;

        Ok((AutomatonProducer { automatons }, query_enhancer))
    }

    pub fn into_iter(self) -> vec::IntoIter<AutomatonGroup> {
        self.automatons.into_iter()
    }
}

#[derive(Debug)]
pub struct AutomatonGroup {
    pub is_phrase_query: bool,
    pub automatons: Vec<Automaton>,
}

impl AutomatonGroup {
    fn normal(automatons: Vec<Automaton>) -> AutomatonGroup {
        AutomatonGroup {
            is_phrase_query: false,
            automatons,
        }
    }

    fn phrase_query(automatons: Vec<Automaton>) -> AutomatonGroup {
        AutomatonGroup {
            is_phrase_query: true,
            automatons,
        }
    }
}

pub struct Automaton {
    pub index: usize,
    pub ngram: usize,
    pub query_len: usize,
    pub is_exact: bool,
    pub is_prefix: bool,
    pub query: String,
}

impl fmt::Debug for Automaton {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("Automaton")
            .field("query", &self.query)
            .finish()
    }
}

impl Automaton {
    pub fn dfa(&self) -> DFA {
        if self.is_prefix {
            build_prefix_dfa(&self.query)
        } else {
            build_dfa(&self.query)
        }
    }

    fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
        Automaton {
            index,
            ngram,
            query_len: query.len(),
            is_exact: true,
            is_prefix: false,
            query: query.to_string(),
        }
    }

    fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
        Automaton {
            index,
            ngram,
            query_len: query.len(),
            is_exact: true,
            is_prefix: true,
            query: query.to_string(),
        }
    }

    fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
        Automaton {
            index,
            ngram,
            query_len: query.len(),
            is_exact: false,
            is_prefix: false,
            query: query.to_string(),
        }
    }
}

pub fn normalize_str(string: &str) -> String {
    let mut string = string.to_lowercase();

    if !string.contains(is_cjk) {
        string = deunicode::deunicode_with_tofu(&string, "");
    }

    string
}

fn split_best_frequency<'a>(
    reader: &heed::RoTxn<MainT>,
    word: &'a str,
    postings_lists_store: store::PostingsLists,
) -> MResult<Option<(&'a str, &'a str)>> {
    let chars = word.char_indices().skip(1);
    let mut best = None;

    for (i, _) in chars {
        let (left, right) = word.split_at(i);

        let left_freq = postings_lists_store
            .postings_list(reader, left.as_ref())?
            .map_or(0, |i| i.len());

        let right_freq = postings_lists_store
            .postings_list(reader, right.as_ref())?
            .map_or(0, |i| i.len());

        let min_freq = cmp::min(left_freq, right_freq);
        if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
            best = Some((min_freq, left, right));
        }
    }

    Ok(best.map(|(_, l, r)| (l, r)))
}

fn generate_automatons(
    reader: &heed::RoTxn<MainT>,
    query: &str,
    main_store: store::Main,
    postings_lists_store: store::PostingsLists,
    synonym_store: store::Synonyms,
) -> MResult<(Vec<AutomatonGroup>, QueryEnhancer)> {
    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
    let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
    let synonyms = match main_store.synonyms_fst(reader)? {
        Some(synonym) => synonym,
        None => fst::Set::default(),
    };

    let mut automaton_index = 0;
    let mut automatons = Vec::new();
    let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);

    // We must not declare the original words to the query enhancer
    // *but* we need to push them in the automatons list first
    let mut original_automatons = Vec::new();
    let mut original_words = query_words.iter().peekable();
    while let Some(word) = original_words.next() {
        let has_following_word = original_words.peek().is_some();
        let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);

        let automaton = if not_prefix_dfa {
            Automaton::exact(automaton_index, 1, word)
        } else {
            Automaton::prefix_exact(automaton_index, 1, word)
        };
        automaton_index += 1;
        original_automatons.push(automaton);
    }

    automatons.push(AutomatonGroup::normal(original_automatons));

    for n in 1..=NGRAMS {
        let mut ngrams = query_words.windows(n).enumerate().peekable();
        while let Some((query_index, ngram_slice)) = ngrams.next() {
            let query_range = query_index..query_index + n;
            let ngram_nb_words = ngram_slice.len();
            let ngram = ngram_slice.join(" ");

            let has_following_word = ngrams.peek().is_some();
            let not_prefix_dfa =
                has_following_word || has_end_whitespace || ngram.chars().all(is_cjk);

            // automaton of synonyms of the ngrams
            let normalized = normalize_str(&ngram);
            let lev = if not_prefix_dfa {
                build_dfa(&normalized)
            } else {
                build_prefix_dfa(&normalized)
            };

            let mut stream = synonyms.search(&lev).into_stream();
            while let Some(base) = stream.next() {
                // only trigger alternatives when the last word has been typed
                // i.e. "new " do not but "new yo" triggers alternatives to "new york"
                let base = std::str::from_utf8(base).unwrap();
                let base_nb_words = split_query_string(base).count();
                if ngram_nb_words != base_nb_words {
                    continue;
                }

                if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
                    let mut stream = synonyms.into_stream();
                    while let Some(synonyms) = stream.next() {
                        let synonyms = std::str::from_utf8(synonyms).unwrap();
                        let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
                        let nb_synonym_words = synonyms_words.len();

                        let real_query_index = automaton_index;
                        enhancer_builder.declare(
                            query_range.clone(),
                            real_query_index,
                            &synonyms_words,
                        );

                        for synonym in synonyms_words {
                            let automaton = if nb_synonym_words == 1 {
                                Automaton::exact(automaton_index, n, synonym)
                            } else {
                                Automaton::non_exact(automaton_index, n, synonym)
                            };
                            automaton_index += 1;
                            automatons.push(AutomatonGroup::normal(vec![automaton]));
                        }
                    }
                }
            }

            if n == 1 {
                if let Some((left, right)) =
                    split_best_frequency(reader, &normalized, postings_lists_store)?
                {
                    let a = Automaton::exact(automaton_index, 1, left);
                    enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
                    automaton_index += 1;

                    let b = Automaton::exact(automaton_index, 1, right);
                    enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
                    automaton_index += 1;

                    automatons.push(AutomatonGroup::phrase_query(vec![a, b]));
                }
            } else {
                // automaton of concatenation of query words
                let concat = ngram_slice.concat();
                let normalized = normalize_str(&concat);

                let real_query_index = automaton_index;
                enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);

                let automaton = Automaton::exact(automaton_index, n, &normalized);
                automaton_index += 1;
                automatons.push(AutomatonGroup::normal(vec![automaton]));
            }
        }
    }

    // order automatons, the most important first,
    // we keep the original automatons at the front.
    automatons[1..].sort_by_key(|group| {
        let a = group.automatons.first().unwrap();
        (
            Reverse(a.is_exact),
            a.ngram,
            Reverse(group.automatons.len()),
        )
    });

    Ok((automatons, enhancer_builder.build()))
}
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`mod dfa;`
			`mod query_enhancer;`

			`use std::cmp::Reverse;`
Before improving fields AttrCount Removing the fields_count fetching reduced by 2 times the serach time, we should look at lazily pulling them form the criterions in needs ugly-test: Make the fields_count fetching lazy Just before running the exactness criterion 2019-11-29 23:31:47 +08:00			`use std::{cmp, fmt, vec};`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`use fst::{IntoStreamer, Streamer};`
			`use levenshtein_automata::DFA;`
Rename MeiliDB into MeiliSearch 2019-11-26 18:06:55 +08:00			`use meilisearch_tokenizer::{is_cjk, split_query_string};`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
Separate the update and main databases We used the heed typed transaction to make it safe (https://github.com/Kerollmops/heed/pull/27). 2019-11-26 23:12:06 +08:00			`use crate::database::MainT;`
Port all tests to the TempDatabase struct 2019-10-08 22:16:30 +08:00			`use crate::error::MResult;`
Cargo fmt pass 2019-10-18 19:05:28 +08:00			`use crate::store;`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`use self::dfa::{build_dfa, build_prefix_dfa};`
			`pub use self::query_enhancer::QueryEnhancer;`
Cargo fmt pass 2019-10-18 19:05:28 +08:00			`use self::query_enhancer::QueryEnhancerBuilder;`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`const NGRAMS: usize = 3;`

			`pub struct AutomatonProducer {`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`automatons: Vec<AutomatonGroup>,`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`}`

			`impl AutomatonProducer {`
			`pub fn new(`
Separate the update and main databases We used the heed typed transaction to make it safe (https://github.com/Kerollmops/heed/pull/27). 2019-11-26 23:12:06 +08:00			`reader: &heed::RoTxn<MainT>,`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`query: &str,`
Port all tests to the TempDatabase struct 2019-10-08 22:16:30 +08:00			`main_store: store::Main,`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`postings_list_store: store::PostingsLists,`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`synonyms_store: store::Synonyms,`
Cargo fmt pass 2019-10-18 19:05:28 +08:00			`) -> MResult<(AutomatonProducer, QueryEnhancer)> {`
Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`let (automatons, query_enhancer) = generate_automatons(`
			`reader,`
			`query,`
			`main_store,`
			`postings_list_store,`
			`synonyms_store,`
			`)?;`
Port all tests to the TempDatabase struct 2019-10-08 22:16:30 +08:00
			`Ok((AutomatonProducer { automatons }, query_enhancer))`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`}`

Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`pub fn into_iter(self) -> vec::IntoIter<AutomatonGroup> {`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`self.automatons.into_iter()`
			`}`
			`}`

Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`#[derive(Debug)]`
Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`pub struct AutomatonGroup {`
			`pub is_phrase_query: bool,`
			`pub automatons: Vec<Automaton>,`
			`}`

			`impl AutomatonGroup {`
			`fn normal(automatons: Vec<Automaton>) -> AutomatonGroup {`
			`AutomatonGroup {`
			`is_phrase_query: false,`
			`automatons,`
			`}`
			`}`

			`fn phrase_query(automatons: Vec<Automaton>) -> AutomatonGroup {`
			`AutomatonGroup {`
			`is_phrase_query: true,`
			`automatons,`
			`}`
			`}`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`}`

Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`pub struct Automaton {`
			`pub index: usize,`
			`pub ngram: usize,`
			`pub query_len: usize,`
			`pub is_exact: bool,`
			`pub is_prefix: bool,`
			`pub query: String,`
			`}`

Before improving fields AttrCount Removing the fields_count fetching reduced by 2 times the serach time, we should look at lazily pulling them form the criterions in needs ugly-test: Make the fields_count fetching lazy Just before running the exactness criterion 2019-11-29 23:31:47 +08:00			`impl fmt::Debug for Automaton {`
			`fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {`
			`f.debug_struct("Automaton")`
			`.field("query", &self.query)`
			`.finish()`
			`}`
			`}`

Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`impl Automaton {`
			`pub fn dfa(&self) -> DFA {`
			`if self.is_prefix {`
			`build_prefix_dfa(&self.query)`
			`} else {`
			`build_dfa(&self.query)`
			`}`
			`}`

			`fn exact(index: usize, ngram: usize, query: &str) -> Automaton {`
			`Automaton {`
			`index,`
			`ngram,`
			`query_len: query.len(),`
			`is_exact: true,`
			`is_prefix: false,`
			`query: query.to_string(),`
			`}`
			`}`

			`fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {`
			`Automaton {`
			`index,`
			`ngram,`
			`query_len: query.len(),`
			`is_exact: true,`
			`is_prefix: true,`
			`query: query.to_string(),`
			`}`
			`}`

			`fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {`
			`Automaton {`
			`index,`
			`ngram,`
			`query_len: query.len(),`
			`is_exact: false,`
			`is_prefix: false,`
			`query: query.to_string(),`
			`}`
			`}`
			`}`

			`pub fn normalize_str(string: &str) -> String {`
			`let mut string = string.to_lowercase();`

			`if !string.contains(is_cjk) {`
			`string = deunicode::deunicode_with_tofu(&string, "");`
			`}`

			`string`
			`}`

Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`fn split_best_frequency<'a>(`
Separate the update and main databases We used the heed typed transaction to make it safe (https://github.com/Kerollmops/heed/pull/27). 2019-11-26 23:12:06 +08:00			`reader: &heed::RoTxn<MainT>,`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`word: &'a str,`
			`postings_lists_store: store::PostingsLists,`
			`) -> MResult<Option<(&'a str, &'a str)>> {`
			`let chars = word.char_indices().skip(1);`
			`let mut best = None;`

			`for (i, _) in chars {`
			`let (left, right) = word.split_at(i);`

			`let left_freq = postings_lists_store`
			`.postings_list(reader, left.as_ref())?`
			`.map_or(0, \|i\| i.len());`

			`let right_freq = postings_lists_store`
			`.postings_list(reader, right.as_ref())?`
			`.map_or(0, \|i\| i.len());`

			`let min_freq = cmp::min(left_freq, right_freq);`
			`if min_freq != 0 && best.map_or(true, \|(old, _, _)\| min_freq > old) {`
			`best = Some((min_freq, left, right));`
			`}`
			`}`

			`Ok(best.map(\|(_, l, r)\| (l, r)))`
			`}`

Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`fn generate_automatons(`
Separate the update and main databases We used the heed typed transaction to make it safe (https://github.com/Kerollmops/heed/pull/27). 2019-11-26 23:12:06 +08:00			`reader: &heed::RoTxn<MainT>,`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`query: &str,`
Port all tests to the TempDatabase struct 2019-10-08 22:16:30 +08:00			`main_store: store::Main,`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`postings_lists_store: store::PostingsLists,`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`synonym_store: store::Synonyms,`
Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`) -> MResult<(Vec<AutomatonGroup>, QueryEnhancer)> {`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);`
			`let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();`
Port all tests to the TempDatabase struct 2019-10-08 22:16:30 +08:00			`let synonyms = match main_store.synonyms_fst(reader)? {`
			`Some(synonym) => synonym,`
			`None => fst::Set::default(),`
			`};`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`let mut automaton_index = 0;`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`let mut automatons = Vec::new();`
			`let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);`

			`// We must not declare the original words to the query enhancer`
			`// but we need to push them in the automatons list first`
			`let mut original_automatons = Vec::new();`
			`let mut original_words = query_words.iter().peekable();`
			`while let Some(word) = original_words.next() {`
			`let has_following_word = original_words.peek().is_some();`
			`let not_prefix_dfa = has_following_word \|\| has_end_whitespace \|\| word.chars().all(is_cjk);`

			`let automaton = if not_prefix_dfa {`
Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`Automaton::exact(automaton_index, 1, word)`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`} else {`
Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`Automaton::prefix_exact(automaton_index, 1, word)`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`};`
Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`automaton_index += 1;`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`original_automatons.push(automaton);`
			`}`

Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`automatons.push(AutomatonGroup::normal(original_automatons));`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`for n in 1..=NGRAMS {`
			`let mut ngrams = query_words.windows(n).enumerate().peekable();`
			`while let Some((query_index, ngram_slice)) = ngrams.next() {`
			`let query_range = query_index..query_index + n;`
			`let ngram_nb_words = ngram_slice.len();`
			`let ngram = ngram_slice.join(" ");`

			`let has_following_word = ngrams.peek().is_some();`
Cargo fmt pass 2019-10-18 19:05:28 +08:00			`let not_prefix_dfa =`
			`has_following_word \|\| has_end_whitespace \|\| ngram.chars().all(is_cjk);`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`// automaton of synonyms of the ngrams`
			`let normalized = normalize_str(&ngram);`
Cargo fmt pass 2019-10-18 19:05:28 +08:00			`let lev = if not_prefix_dfa {`
			`build_dfa(&normalized)`
			`} else {`
			`build_prefix_dfa(&normalized)`
			`};`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`let mut stream = synonyms.search(&lev).into_stream();`
			`while let Some(base) = stream.next() {`
			`// only trigger alternatives when the last word has been typed`
			`// i.e. "new " do not but "new yo" triggers alternatives to "new york"`
			`let base = std::str::from_utf8(base).unwrap();`
			`let base_nb_words = split_query_string(base).count();`
Cargo fmt pass 2019-10-18 19:05:28 +08:00			`if ngram_nb_words != base_nb_words {`
			`continue;`
			`}`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
Port all tests to the TempDatabase struct 2019-10-08 22:16:30 +08:00			`if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`let mut stream = synonyms.into_stream();`
			`while let Some(synonyms) = stream.next() {`
			`let synonyms = std::str::from_utf8(synonyms).unwrap();`
			`let synonyms_words: Vec<_> = split_query_string(synonyms).collect();`
			`let nb_synonym_words = synonyms_words.len();`

Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`let real_query_index = automaton_index;`
Cargo fmt pass 2019-10-18 19:05:28 +08:00			`enhancer_builder.declare(`
			`query_range.clone(),`
			`real_query_index,`
			`&synonyms_words,`
			`);`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`for synonym in synonyms_words {`
			`let automaton = if nb_synonym_words == 1 {`
Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`Automaton::exact(automaton_index, n, synonym)`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`} else {`
Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`Automaton::non_exact(automaton_index, n, synonym)`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`};`
Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`automaton_index += 1;`
Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`automatons.push(AutomatonGroup::normal(vec![automaton]));`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`}`
			`}`
			`}`
			`}`

Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`if n == 1 {`
Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`if let Some((left, right)) =`
			`split_best_frequency(reader, &normalized, postings_lists_store)?`
			`{`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`let a = Automaton::exact(automaton_index, 1, left);`
			`enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);`
			`automaton_index += 1;`

			`let b = Automaton::exact(automaton_index, 1, right);`
			`enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);`
			`automaton_index += 1;`

Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`automatons.push(AutomatonGroup::phrase_query(vec![a, b]));`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`}`
			`} else {`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`// automaton of concatenation of query words`
			`let concat = ngram_slice.concat();`
			`let normalized = normalize_str(&concat);`

Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`let real_query_index = automaton_index;`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);`

Fix many indexing and searching related bugs 2019-10-08 20:53:35 +08:00			`let automaton = Automaton::exact(automaton_index, n, &normalized);`
			`automaton_index += 1;`
Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`automatons.push(AutomatonGroup::normal(vec![automaton]));`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`}`
			`}`
			`}`

			`// order automatons, the most important first,`
			`// we keep the original automatons at the front.`
Introduce a basic working version of phrase query for splitting words 2019-10-23 00:15:43 +08:00			`automatons[1..].sort_by_key(\|group\| {`
Clean up the fetch algorithm 2019-10-23 18:06:21 +08:00			`let a = group.automatons.first().unwrap();`
Make sure that automatons group with more automatons are better 2019-10-24 20:20:07 +08:00			`(`
			`Reverse(a.is_exact),`
			`a.ngram,`
			`Reverse(group.automatons.len()),`
			`)`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`});`

			`Ok((automatons, enhancer_builder.build()))`
			`}`