mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Merge #3824
3824: Changes the way words are counted in the word count DB r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3823 ## What does this PR do? - Apply offset when parsing query that is consistent with the indexing ### DB breaking changes - Count the number of words in `field_id_word_count_docids` - raise limit of word count for storing the entry in the DB from 10 to 30 Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
047d22fcb1
@ -79,7 +79,7 @@ pub fn located_query_terms_from_tokens(
|
|||||||
TokenKind::Separator(separator_kind) => {
|
TokenKind::Separator(separator_kind) => {
|
||||||
// add penalty for hard separators
|
// add penalty for hard separators
|
||||||
if let SeparatorKind::Hard = separator_kind {
|
if let SeparatorKind::Hard = separator_kind {
|
||||||
position = position.wrapping_add(1);
|
position = position.wrapping_add(7);
|
||||||
}
|
}
|
||||||
|
|
||||||
phrase = 'phrase: {
|
phrase = 'phrase: {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::{cmp, io};
|
use std::io;
|
||||||
|
|
||||||
use grenad::Sorter;
|
use grenad::Sorter;
|
||||||
|
|
||||||
@ -54,11 +54,10 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for position in read_u32_ne_bytes(value) {
|
for position in read_u32_ne_bytes(value) {
|
||||||
let (field_id, position) = relative_from_absolute_position(position);
|
let (field_id, _) = relative_from_absolute_position(position);
|
||||||
let word_count = position as u32 + 1;
|
|
||||||
|
|
||||||
let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0);
|
let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0);
|
||||||
*value = cmp::max(*value, word_count);
|
*value += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,7 +82,7 @@ fn drain_document_fid_wordcount_into_sorter(
|
|||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
|
|
||||||
for (fid, count) in document_fid_wordcount.drain() {
|
for (fid, count) in document_fid_wordcount.drain() {
|
||||||
if count <= 10 {
|
if count <= 30 {
|
||||||
key_buffer.clear();
|
key_buffer.clear();
|
||||||
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
key_buffer.push(count as u8);
|
key_buffer.push(count as u8);
|
||||||
|
Loading…
Reference in New Issue
Block a user