diff --git a/examples/query-database.rs b/examples/query-database.rs index 64e035a8a..ca6733c30 100644 --- a/examples/query-database.rs +++ b/examples/query-database.rs @@ -126,7 +126,7 @@ fn crop_text( (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2) }) .map(|match_| { - Match { char_index: match_.char_index - start as u32, ..match_ } + Match { char_index: match_.char_index - start as u16, ..match_ } }) .collect(); diff --git a/src/database/serde/indexer_serializer.rs b/src/database/serde/indexer_serializer.rs index 75860d937..c25ffe98c 100644 --- a/src/database/serde/indexer_serializer.rs +++ b/src/database/serde/indexer_serializer.rs @@ -56,7 +56,7 @@ where B: TokenizerBuilder // FIXME must u32::try_from instead let attribute = self.attribute.0; - let word_index = word_index as u32; + let word_index = word_index as u16; // insert the exact representation let word_lower = word.to_lowercase(); @@ -69,7 +69,7 @@ where B: TokenizerBuilder let word_unidecoded = unidecode::unidecode(word).to_lowercase(); let word_unidecoded = word_unidecoded.trim(); if word_lower != word_unidecoded { - let char_index = char_index as u32; + let char_index = char_index as u16; let char_length = length; let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length }; @@ -77,7 +77,7 @@ where B: TokenizerBuilder } } - let char_index = char_index as u32; + let char_index = char_index as u16; let char_length = length; let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length }; diff --git a/src/lib.rs b/src/lib.rs index e77e03ecb..964de8f75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,14 +50,14 @@ pub struct DocIndex { /// The attribute in the document where the word was found /// along with the index in it. pub attribute: u16, - pub word_index: u32, + pub word_index: u16, /// The position in bytes where the word was found /// along with the length of it. /// /// It informs on the original word area in the text indexed /// without needing to run the tokenizer again. - pub char_index: u32, + pub char_index: u16, pub char_length: u16, } @@ -84,7 +84,7 @@ pub struct Match { /// The attribute in the document where the word was found /// along with the index in it. pub attribute: u16, - pub word_index: u32, + pub word_index: u16, /// Whether the word that match is an exact match or a prefix. pub is_exact: bool, @@ -94,7 +94,7 @@ pub struct Match { /// /// It informs on the original word area in the text indexed /// without needing to run the tokenizer again. - pub char_index: u32, + pub char_index: u16, pub char_length: u16, } @@ -116,9 +116,9 @@ impl Match { query_index: u32::max_value(), distance: u8::max_value(), attribute: u16::max_value(), - word_index: u32::max_value(), + word_index: u16::max_value(), is_exact: true, - char_index: u32::max_value(), + char_index: u16::max_value(), char_length: u16::max_value(), } } @@ -131,6 +131,6 @@ mod tests { #[test] fn docindex_mem_size() { - assert_eq!(mem::size_of::(), 24); + assert_eq!(mem::size_of::(), 16); } } diff --git a/src/rank/criterion/sum_of_words_position.rs b/src/rank/criterion/sum_of_words_position.rs index 73ea5978c..5938ce5ab 100644 --- a/src/rank/criterion/sum_of_words_position.rs +++ b/src/rank/criterion/sum_of_words_position.rs @@ -6,7 +6,7 @@ use crate::rank::criterion::Criterion; use crate::rank::RawDocument; #[inline] -fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u32]) -> usize { +fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize { let mut sum_word_index = 0; let mut index = 0; diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs index 42cc738ce..dbf26e21a 100644 --- a/src/rank/criterion/words_proximity.rs +++ b/src/rank/criterion/words_proximity.rs @@ -5,14 +5,14 @@ use slice_group_by::GroupBy; use crate::rank::criterion::Criterion; use crate::rank::RawDocument; -const MAX_DISTANCE: u32 = 8; +const MAX_DISTANCE: u16 = 8; #[inline] fn clone_tuple((a, b): (&T, &U)) -> (T, U) { (a.clone(), b.clone()) } -fn index_proximity(lhs: u32, rhs: u32) -> u32 { +fn index_proximity(lhs: u16, rhs: u16) -> u16 { if lhs < rhs { cmp::min(rhs - lhs, MAX_DISTANCE) } else { @@ -20,13 +20,13 @@ fn index_proximity(lhs: u32, rhs: u32) -> u32 { } } -fn attribute_proximity((lattr, lwi): (u16, u32), (rattr, rwi): (u16, u32)) -> u32 { +fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 { if lattr != rattr { return MAX_DISTANCE } index_proximity(lwi, rwi) } -fn min_proximity((lattr, lwi): (&[u16], &[u32]), (rattr, rwi): (&[u16], &[u32])) -> u32 { - let mut min_prox = u32::max_value(); +fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 { + let mut min_prox = u16::max_value(); for a in lattr.iter().zip(lwi) { for b in rattr.iter().zip(rwi) { @@ -43,8 +43,8 @@ fn matches_proximity( query_index: &[u32], distance: &[u8], attribute: &[u16], - word_index: &[u32], -) -> u32 + word_index: &[u16], +) -> u16 { let mut query_index_groups = query_index.linear_group(); let mut proximity = 0; diff --git a/src/rank/mod.rs b/src/rank/mod.rs index 3b31c0794..f5b07d27d 100644 --- a/src/rank/mod.rs +++ b/src/rank/mod.rs @@ -79,7 +79,7 @@ impl RawDocument { unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) } } - pub fn word_index(&self) -> &[u32] { + pub fn word_index(&self) -> &[u16] { let r = self.matches.range; // it is safe because construction/modifications // can only be done in this module @@ -93,7 +93,7 @@ impl RawDocument { unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) } } - pub fn char_index(&self) -> &[u32] { + pub fn char_index(&self) -> &[u16] { let r = self.matches.range; // it is safe because construction/modifications // can only be done in this module @@ -150,9 +150,9 @@ struct Matches { query_index: Vec, distance: Vec, attribute: Vec, - word_index: Vec, + word_index: Vec, is_exact: Vec, - char_index: Vec, + char_index: Vec, char_length: Vec, }