diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 69c2cd9c9..6f146b208 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -79,7 +79,7 @@ pub fn located_query_terms_from_tokens( TokenKind::Separator(separator_kind) => { // add penalty for hard separators if let SeparatorKind::Hard = separator_kind { - position = position.wrapping_add(1); + position = position.wrapping_add(7); } phrase = 'phrase: { diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 315ebdf0c..fe8eb93ed 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use std::fs::File; -use std::{cmp, io}; +use std::io; use grenad::Sorter; @@ -54,11 +54,10 @@ pub fn extract_fid_word_count_docids( } for position in read_u32_ne_bytes(value) { - let (field_id, position) = relative_from_absolute_position(position); - let word_count = position as u32 + 1; + let (field_id, _) = relative_from_absolute_position(position); let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0); - *value = cmp::max(*value, word_count); + *value += 1; } } @@ -83,7 +82,7 @@ fn drain_document_fid_wordcount_into_sorter( let mut key_buffer = Vec::new(); for (fid, count) in document_fid_wordcount.drain() { - if count <= 10 { + if count <= 30 { key_buffer.clear(); key_buffer.extend_from_slice(&fid.to_be_bytes()); key_buffer.push(count as u8);