meilisearch/meilisearch-core/src/criterion/exact.rs

use std::cmp::{Ordering, Reverse};
use std::collections::hash_map::{HashMap, Entry};
use meilisearch_schema::SchemaAttr;
use slice_group_by::GroupBy;
use crate::{RawDocument, MResult};
use crate::bucket_sort::BareMatch;
use super::{Criterion, Context, ContextMut};

pub struct Exact;

impl Criterion for Exact {
    fn name(&self) -> &str { "exact" }

    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
        &self,
        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
        documents: &mut [RawDocument<'r, 'tag>],
    ) -> MResult<()>
    {
        let store = ctx.documents_fields_counts_store;
        let reader = ctx.reader;

        'documents: for doc in documents {
            doc.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));

            // mark the document if we find a "one word field" that matches
            let mut fields_counts = HashMap::new();
            for group in doc.raw_matches.linear_group_by_key(|bm| bm.query_index) {
                for group in group.linear_group_by_key(|bm| bm.is_exact) {
                    if !group[0].is_exact { break }

                    for bm in group {
                        for di in ctx.postings_lists[bm.postings_list].as_ref() {

                            let attr = SchemaAttr(di.attribute);
                            let count = match fields_counts.entry(attr) {
                                Entry::Occupied(entry) => *entry.get(),
                                Entry::Vacant(entry) => {
                                    let count = store.document_field_count(reader, doc.id, attr)?;
                                    *entry.insert(count)
                                },
                            };

                            if count == Some(1) {
                                doc.contains_one_word_field = true;
                                continue 'documents
                            }
                        }
                    }
                }
            }
        }

        Ok(())
    }

    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
        #[inline]
        fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
            let mut sum_exact_query_words = 0;

            for group in matches.linear_group_by_key(|bm| bm.query_index) {
                sum_exact_query_words += group[0].is_exact as usize;
            }

            sum_exact_query_words
        }

        // does it contains a "one word field"
        lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
        // if not, with document contains the more exact words
        .then_with(|| {
            let lhs = sum_exact_query_words(&lhs.raw_matches);
            let rhs = sum_exact_query_words(&rhs.raw_matches);
            lhs.cmp(&rhs).reverse()
        })
    }
}
Update the criteria to the new ones 2019-12-12 00:02:10 +08:00			`use std::cmp::{Ordering, Reverse};`
Reintroduce exacteness for one word document field 2019-12-13 18:33:22 +08:00			`use std::collections::hash_map::{HashMap, Entry};`
			`use meilisearch_schema::SchemaAttr;`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`use slice_group_by::GroupBy;`
Make the test pass again 2019-12-13 18:14:12 +08:00			`use crate::{RawDocument, MResult};`
Introduce ContextMut and Context structs 2019-12-12 18:33:39 +08:00			`use crate::bucket_sort::BareMatch;`
			`use super::{Criterion, Context, ContextMut};`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00
			`pub struct Exact;`

			`impl Criterion for Exact {`
Update the criteria to the new ones 2019-12-12 00:02:10 +08:00			`fn name(&self) -> &str { "exact" }`

Make the test pass again 2019-12-13 18:14:12 +08:00			`fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(`
			`&self,`
Reintroduce exacteness for one word document field 2019-12-13 18:33:22 +08:00			`ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,`
Make the test pass again 2019-12-13 18:14:12 +08:00			`documents: &mut [RawDocument<'r, 'tag>],`
			`) -> MResult<()>`
			`{`
Reintroduce exacteness for one word document field 2019-12-13 18:33:22 +08:00			`let store = ctx.documents_fields_counts_store;`
			`let reader = ctx.reader;`

			`'documents: for doc in documents {`
			`doc.raw_matches.sort_unstable_by_key(\|bm\| (bm.query_index, Reverse(bm.is_exact)));`

			`// mark the document if we find a "one word field" that matches`
			`let mut fields_counts = HashMap::new();`
			`for group in doc.raw_matches.linear_group_by_key(\|bm\| bm.query_index) {`
			`for group in group.linear_group_by_key(\|bm\| bm.is_exact) {`
			`if !group[0].is_exact { break }`

			`for bm in group {`
			`for di in ctx.postings_lists[bm.postings_list].as_ref() {`

			`let attr = SchemaAttr(di.attribute);`
			`let count = match fields_counts.entry(attr) {`
			`Entry::Occupied(entry) => *entry.get(),`
			`Entry::Vacant(entry) => {`
			`let count = store.document_field_count(reader, doc.id, attr)?;`
			`*entry.insert(count)`
			`},`
			`};`

			`if count == Some(1) {`
			`doc.contains_one_word_field = true;`
			`continue 'documents`
			`}`
			`}`
			`}`
			`}`
			`}`
Update the criteria to the new ones 2019-12-12 00:02:10 +08:00			`}`
Reintroduce exacteness for one word document field 2019-12-13 18:33:22 +08:00
Make the test pass again 2019-12-13 18:14:12 +08:00			`Ok(())`
Update the exact criterion to use the documents fields counts 2019-10-15 00:48:54 +08:00			`}`

Introduce ContextMut and Context structs 2019-12-12 18:33:39 +08:00			`fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {`
Update the criteria to the new ones 2019-12-12 00:02:10 +08:00			`#[inline]`
			`fn sum_exact_query_words(matches: &[BareMatch]) -> usize {`
			`let mut sum_exact_query_words = 0;`

			`for group in matches.linear_group_by_key(\|bm\| bm.query_index) {`
			`sum_exact_query_words += group[0].is_exact as usize;`
			`}`
Update the exact criterion to use the documents fields counts 2019-10-15 00:48:54 +08:00
Update the criteria to the new ones 2019-12-12 00:02:10 +08:00			`sum_exact_query_words`
			`}`
Update the exact criterion to use the documents fields counts 2019-10-15 00:48:54 +08:00
Reintroduce exacteness for one word document field 2019-12-13 18:33:22 +08:00			`// does it contains a "one word field"`
			`lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()`
			`// if not, with document contains the more exact words`
			`.then_with(\|\| {`
			`let lhs = sum_exact_query_words(&lhs.raw_matches);`
			`let rhs = sum_exact_query_words(&rhs.raw_matches);`
			`lhs.cmp(&rhs).reverse()`
			`})`
Introduce a basically working rkv based MeiliDB 2019-10-02 23:34:32 +08:00			`}`
			`}`