From 5d79617a56e8d00d295628dd20227b43c9eb31f2 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 7 Jul 2022 16:28:09 +0200 Subject: [PATCH 1/9] Chores: Enhance smart-crop code comments --- milli/src/search/matches/mod.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index e2bde3daf..46f87654f 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -11,6 +11,7 @@ const DEFAULT_CROP_MARKER: &'static str = "…"; const DEFAULT_HIGHLIGHT_PREFIX: &'static str = ""; const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = ""; +/// Structure used to build a Matcher allowing to customize formating tags. pub struct MatcherBuilder<'a, A> { matching_words: MatchingWords, tokenizer: Tokenizer<'a, A>, @@ -100,6 +101,8 @@ pub struct MatchBounds { pub length: usize, } +/// Structure used to analize a string, compute words that match, +/// and format the source string returning an highlighted and croped sub-string. pub struct Matcher<'t, 'm, A> { text: &'t str, matching_words: &'m MatchingWords, @@ -113,6 +116,8 @@ pub struct Matcher<'t, 'm, A> { impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { /// Iterates over tokens and save any of them that matches the query. fn compute_matches(&mut self) -> &mut Self { + /// some word are counted as matches only if there are close together and in the good order, + /// compute_partial_match peek into next words to validate if the match is complete. fn compute_partial_match<'a>( mut partial: PartialMatch, token_position: usize, @@ -246,9 +251,14 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { // matches needs to be counted in the crop len. let mut remaining_words = crop_size + first_match_word_position - last_match_word_position; + // create the initial state of the crop window: 2 iterators starting from the matches positions, + // a reverse iterator starting from the first match token position and going trew the beginning of the text, let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable(); + // an iterator starting from the last match token position and going trew the end of the text. let mut after_tokens = tokens[last_match_token_position..].iter().peekable(); + // gross the crop window peeking in both drections + // until the window contains the good number of words: while remaining_words > 0 { let before_token = before_tokens.peek().map(|t| t.separator_kind()); let after_token = after_tokens.peek().map(|t| t.separator_kind()); @@ -315,6 +325,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { } } + // finally, keep the byte index of each bounds of the crop window. let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end); let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start); @@ -353,7 +364,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { (uniq_score, distance_score, order_score) } - /// Returns the matches interval where the score computed by match_interval_score is maximal. + /// Returns the matches interval where the score computed by match_interval_score is the best. fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] { // we compute the matches interval if we have at least 2 matches. if matches.len() > 1 { @@ -408,6 +419,8 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { } else { match &self.matches { Some((tokens, matches)) => { + // If the text have to be croped, + // compute the best interval to crop around. let matches = match format_options.crop { Some(crop_size) if crop_size > 0 => { self.find_best_match_interval(matches, crop_size) @@ -415,6 +428,8 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { _ => matches, }; + // If the text have to be croped, + // crop around the best interval. let (byte_start, byte_end) = match format_options.crop { Some(crop_size) if crop_size > 0 => { self.crop_bounds(tokens, matches, crop_size) From 1da4ab5918ff7edd9a6531dd34a7985323d1b9e8 Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 10:18:03 +0200 Subject: [PATCH 2/9] Update milli/src/search/matches/mod.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 46f87654f..f63852b52 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -325,7 +325,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { } } - // finally, keep the byte index of each bounds of the crop window. + // finally, keep the byte index of each bound of the crop window. let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end); let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start); From e261ef64d7d4db66a4087fb3021cfa7bd60bdd0b Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 10:18:51 +0200 Subject: [PATCH 3/9] Update milli/src/search/matches/mod.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index f63852b52..b08268657 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -102,7 +102,7 @@ pub struct MatchBounds { } /// Structure used to analize a string, compute words that match, -/// and format the source string returning an highlighted and croped sub-string. +/// and format the source string, returning a highlighted and cropped sub-string. pub struct Matcher<'t, 'm, A> { text: &'t str, matching_words: &'m MatchingWords, From d7fd5c58cdc7daf3893cce4591869cc0a2042dde Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 17:45:06 +0200 Subject: [PATCH 4/9] Update milli/src/search/matches/mod.rs --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index b08268657..2b47c2c32 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -257,7 +257,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { // an iterator starting from the last match token position and going trew the end of the text. let mut after_tokens = tokens[last_match_token_position..].iter().peekable(); - // gross the crop window peeking in both drections + // grows the crop window peeking in both directions // until the window contains the good number of words: while remaining_words > 0 { let before_token = before_tokens.peek().map(|t| t.separator_kind()); From 1237cfc249a0c007f3a3280a765d8fdf366597dc Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 17:45:37 +0200 Subject: [PATCH 5/9] Update milli/src/search/matches/mod.rs --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 2b47c2c32..89ab0064f 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -116,7 +116,7 @@ pub struct Matcher<'t, 'm, A> { impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { /// Iterates over tokens and save any of them that matches the query. fn compute_matches(&mut self) -> &mut Self { - /// some word are counted as matches only if there are close together and in the good order, + /// some words are counted as matches only if they are close together and in the good order, /// compute_partial_match peek into next words to validate if the match is complete. fn compute_partial_match<'a>( mut partial: PartialMatch, From fb794c6b5ec84e3aaeb4fba0c382b5b21c4e201c Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 17:46:00 +0200 Subject: [PATCH 6/9] Update milli/src/search/matches/mod.rs --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 89ab0064f..8df93daa1 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -419,7 +419,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { } else { match &self.matches { Some((tokens, matches)) => { - // If the text have to be croped, + // If the text has to be cropped, // compute the best interval to crop around. let matches = match format_options.crop { Some(crop_size) if crop_size > 0 => { From a277daa1f2a6cc8f7a12f9bc9c7dfcabc79a16a8 Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 17:47:13 +0200 Subject: [PATCH 7/9] Update milli/src/search/matches/mod.rs --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 8df93daa1..649078f7c 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -252,7 +252,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { let mut remaining_words = crop_size + first_match_word_position - last_match_word_position; // create the initial state of the crop window: 2 iterators starting from the matches positions, - // a reverse iterator starting from the first match token position and going trew the beginning of the text, + // a reverse iterator starting from the first match token position and going towards the beginning of the text, let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable(); // an iterator starting from the last match token position and going trew the end of the text. let mut after_tokens = tokens[last_match_token_position..].iter().peekable(); From 8ddb4e750b99f65610c4d49a7c7ef03b298a67e8 Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 17:47:39 +0200 Subject: [PATCH 8/9] Update milli/src/search/matches/mod.rs --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 649078f7c..fe35b2a0a 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -254,7 +254,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { // create the initial state of the crop window: 2 iterators starting from the matches positions, // a reverse iterator starting from the first match token position and going towards the beginning of the text, let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable(); - // an iterator starting from the last match token position and going trew the end of the text. + // an iterator starting from the last match token position and going towards the end of the text. let mut after_tokens = tokens[last_match_token_position..].iter().peekable(); // grows the crop window peeking in both directions From 2d79720f5da563ecf3ff3f585576209132f93b9a Mon Sep 17 00:00:00 2001 From: Many the fish Date: Mon, 18 Jul 2022 17:48:04 +0200 Subject: [PATCH 9/9] Update milli/src/search/matches/mod.rs --- milli/src/search/matches/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index fe35b2a0a..72592c4cb 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -428,7 +428,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { _ => matches, }; - // If the text have to be croped, + // If the text has to be cropped, // crop around the best interval. let (byte_start, byte_end) = match format_options.crop { Some(crop_size) if crop_size > 0 => {