From 2da86b31a6097d0b4bb44de54d0ebe69fcd1315f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= <loic.lecrenier@icloud.com>
Date: Tue, 6 Jun 2023 11:31:26 +0200
Subject: [PATCH] Remove comments and add documentation

---
 milli/src/search/new/distinct.rs              |   1 -
 milli/src/search/new/exact_attribute.rs       |   2 +-
 milli/src/search/new/interner.rs              |   2 +-
 milli/src/search/new/limits.rs                |   1 -
 milli/src/search/new/query_graph.rs           |   4 +-
 milli/src/search/new/query_term/mod.rs        |   4 -
 .../src/search/new/query_term/parse_query.rs  |   3 -
 .../new/ranking_rule_graph/cheapest_paths.rs  | 109 +++++++++++++++---
 .../condition_docids_cache.rs                 |   6 +-
 .../new/ranking_rule_graph/dead_ends_cache.rs |   3 +-
 .../search/new/ranking_rule_graph/fid/mod.rs  |   9 +-
 .../new/ranking_rule_graph/position/mod.rs    |   7 +-
 .../proximity/compute_docids.rs               |  16 +--
 milli/src/search/new/resolve_query_graph.rs   |  10 --
 milli/src/search/new/sort.rs                  |   4 -
 milli/src/search/new/tests/distinct.rs        |   2 +-
 milli/src/search/new/tests/proximity.rs       |  16 +--
 milli/src/search/new/tests/proximity_typo.rs  |   7 +-
 milli/src/search/new/tests/typo.rs            |   4 +-
 19 files changed, 117 insertions(+), 93 deletions(-)
diff --git a/milli/src/search/new/distinct.rs b/milli/src/search/new/distinct.rs
index fbb7550a9..fff96bd5d 100644
--- a/milli/src/search/new/distinct.rs
+++ b/milli/src/search/new/distinct.rs
@@ -26,7 +26,6 @@ pub fn apply_distinct_rule(
     ctx: &mut SearchContext,
     field_id: u16,
     candidates: &RoaringBitmap,
-    // TODO: add a universe here, such that the `excluded` are a subset of the universe?
 ) -> Result<DistinctOutput> {
     let mut excluded = RoaringBitmap::new();
     let mut remaining = RoaringBitmap::new();
diff --git a/milli/src/search/new/exact_attribute.rs b/milli/src/search/new/exact_attribute.rs
index dc9c95d3d..6e0381295 100644
--- a/milli/src/search/new/exact_attribute.rs
+++ b/milli/src/search/new/exact_attribute.rs
@@ -206,7 +206,7 @@ impl State {
             )?;
             intersection &= &candidates;
             if !intersection.is_empty() {
-                // TODO: although not really worth it in terms of performance,
+                // Although not really worth it in terms of performance,
                 // if would be good to put this in cache for the sake of consistency
                 let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
                     ctx.index
diff --git a/milli/src/search/new/interner.rs b/milli/src/search/new/interner.rs
index ebf18f38c..c2d325a86 100644
--- a/milli/src/search/new/interner.rs
+++ b/milli/src/search/new/interner.rs
@@ -32,7 +32,7 @@ impl<T> Interned<T> {
 #[derive(Clone)]
 pub struct DedupInterner<T> {
     stable_store: Vec<T>,
-    lookup: FxHashMap<T, Interned<T>>, // TODO: Arc
+    lookup: FxHashMap<T, Interned<T>>,
 }
 impl<T> Default for DedupInterner<T> {
     fn default() -> Self {
diff --git a/milli/src/search/new/limits.rs b/milli/src/search/new/limits.rs
index 33a5a4a6c..d08946424 100644
--- a/milli/src/search/new/limits.rs
+++ b/milli/src/search/new/limits.rs
@@ -1,5 +1,4 @@
 /// Maximum number of tokens we consider in a single search.
-// TODO: Loic, find proper value here so we don't overflow the interner.
 pub const MAX_TOKEN_COUNT: usize = 1_000;
 
 /// Maximum number of prefixes that can be derived from a single word.
diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs
index dc25d1bc3..114eb8c4e 100644
--- a/milli/src/search/new/query_graph.rs
+++ b/milli/src/search/new/query_graph.rs
@@ -92,7 +92,7 @@ impl QueryGraph {
     /// which contains ngrams.
     pub fn from_query(
         ctx: &mut SearchContext,
-        // NOTE: the terms here must be consecutive
+        // The terms here must be consecutive
         terms: &[LocatedQueryTerm],
     ) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
         let mut new_located_query_terms = terms.to_vec();
@@ -103,7 +103,7 @@ impl QueryGraph {
         let root_node = 0;
         let end_node = 1;
 
-        // TODO: we could consider generalizing to 4,5,6,7,etc. ngrams
+        // Ee could consider generalizing to 4,5,6,7,etc. ngrams
         let (mut prev2, mut prev1, mut prev0): (Vec<u16>, Vec<u16>, Vec<u16>) =
             (vec![], vec![], vec![root_node]);
 
diff --git a/milli/src/search/new/query_term/mod.rs b/milli/src/search/new/query_term/mod.rs
index fb749a797..8db843037 100644
--- a/milli/src/search/new/query_term/mod.rs
+++ b/milli/src/search/new/query_term/mod.rs
@@ -132,7 +132,6 @@ impl QueryTermSubset {
         if full_query_term.ngram_words.is_some() {
             return None;
         }
-        // TODO: included in subset
         if let Some(phrase) = full_query_term.zero_typo.phrase {
             self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase))
         } else if let Some(word) = full_query_term.zero_typo.exact {
@@ -182,7 +181,6 @@ impl QueryTermSubset {
         let word = match &self.zero_typo_subset {
             NTypoTermSubset::All => Some(use_prefix_db),
             NTypoTermSubset::Subset { words, phrases: _ } => {
-                // TODO: use a subset of prefix words instead
                 if words.contains(&use_prefix_db) {
                     Some(use_prefix_db)
                 } else {
@@ -204,7 +202,6 @@ impl QueryTermSubset {
         ctx: &mut SearchContext,
     ) -> Result<BTreeSet<Word>> {
         let mut result = BTreeSet::default();
-        // TODO: a compute_partially funtion
         if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
             self.original.compute_fully_if_needed(ctx)?;
         }
@@ -300,7 +297,6 @@ impl QueryTermSubset {
         let mut result = BTreeSet::default();
 
         if !self.one_typo_subset.is_empty() {
-            // TODO: compute less than fully if possible
             self.original.compute_fully_if_needed(ctx)?;
         }
         let original = ctx.term_interner.get_mut(self.original);
diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs
index 6f146b208..5e97d6578 100644
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -139,7 +139,6 @@ pub fn number_of_typos_allowed<'ctx>(
     let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
     let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
 
-    // TODO: should `exact_words` also disable prefix search, ngrams, split words, or synonyms?
     let exact_words = ctx.index.exact_words(ctx.txn)?;
 
     Ok(Box::new(move |word: &str| {
@@ -250,8 +249,6 @@ impl PhraseBuilder {
         } else {
             // token has kind Word
             let word = ctx.word_interner.insert(token.lemma().to_string());
-            // TODO: in a phrase, check that every word exists
-            // otherwise return an empty term
             self.words.push(Some(word));
         }
     }
diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
index 8fd943e6e..e93a91d29 100644
--- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
+++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
@@ -1,5 +1,48 @@
-#![allow(clippy::too_many_arguments)]
+/** Implements a "PathVisitor" which finds all paths of a certain cost
+from the START to END node of a ranking rule graph.
 
+A path is a list of conditions. A condition is the data associated with
+an edge, given by the ranking rule. Some edges don't have a condition associated
+with them, they are "unconditional". These kinds of edges are used to "skip" a node.
+
+The algorithm uses a depth-first search. It benefits from two main optimisations:
+- The list of all possible costs to go from any node to the END node is precomputed
+- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges
+untraversable depending on what other edges were selected.
+
+These two optimisations are meant to avoid traversing edges that wouldn't lead
+to a valid path. In practically all cases, we avoid the exponential complexity
+that is inherent to depth-first search in a large ranking rule graph.
+
+The DeadEndsCache is a sort of prefix tree which associates a list of forbidden
+conditions to a list of traversed conditions.
+For example, the DeadEndsCache could say the following:
+- Immediately, from the start, the conditions `[a,b]` are forbidden
+    - if we take the condition `c`, then the conditions `[e]` are also forbidden
+        - and if after that, we take `f`, then `[h,i]` are also forbidden
+            - etc.
+    - if we take `g`, then `[f]` is also forbidden
+        - etc.
+    - etc.
+As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden
+conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden.
+
+When a path is found from START to END, we give it to the `visit` closure.
+This closure takes a mutable reference to the `DeadEndsCache`. This means that
+the caller can update this cache. Therefore, we must handle the case where the
+DeadEndsCache has been updated. This means potentially backtracking up to the point
+where the traversed conditions are all allowed by the new DeadEndsCache.
+
+The algorithm also implements the `TermsMatchingStrategy` logic.
+Some edges are augmented with a list of "nodes_to_skip". Skipping
+a node means "reaching this node through an unconditional edge". If we have
+already traversed (ie. not skipped) a node that is in this list, then we know that we
+can't traverse this edge. Otherwise, we traverse the edge but make sure to skip any
+future node that was present in the "nodes_to_skip" list.
+
+The caller can decide to stop the path finding algorithm
+by returning a `ControlFlow::Break` from the `visit` closure.
+*/
 use std::collections::{BTreeSet, VecDeque};
 use std::iter::FromIterator;
 use std::ops::ControlFlow;
@@ -12,30 +55,41 @@ use crate::search::new::query_graph::QueryNode;
 use crate::search::new::small_bitmap::SmallBitmap;
 use crate::Result;
 
+/// Closure which processes a path found by the `PathVisitor`
 type VisitFn<'f, G> = &'f mut dyn FnMut(
+    // the path as a list of conditions
     &[Interned<<G as RankingRuleGraphTrait>::Condition>],
     &mut RankingRuleGraph<G>,
+    // a mutable reference to the DeadEndsCache, to update it in case the given
+    // path doesn't resolve to any valid document ids
     &mut DeadEndsCache<<G as RankingRuleGraphTrait>::Condition>,
 ) -> Result<ControlFlow<()>>;
 
+/// A structure which is kept but not updated during the traversal of the graph.
+/// It can however be updated by the `visit` closure once a valid path has been found.
 struct VisitorContext<'a, G: RankingRuleGraphTrait> {
     graph: &'a mut RankingRuleGraph<G>,
     all_costs_from_node: &'a MappedInterner<QueryNode, Vec<u64>>,
     dead_ends_cache: &'a mut DeadEndsCache<G::Condition>,
 }
 
+/// The internal state of the traversal algorithm
 struct VisitorState<G: RankingRuleGraphTrait> {
+    /// Budget from the current node to the end node
     remaining_cost: u64,
-
+    /// Previously visited conditions, in order.
     path: Vec<Interned<G::Condition>>,
-
+    /// Previously visited conditions, as an efficient and compact set.
     visited_conditions: SmallBitmap<G::Condition>,
+    /// Previously visited (ie not skipped) nodes, as an efficient and compact set.
     visited_nodes: SmallBitmap<QueryNode>,
-
+    /// The conditions that cannot be visited anymore
     forbidden_conditions: SmallBitmap<G::Condition>,
-    forbidden_conditions_to_nodes: SmallBitmap<QueryNode>,
+    /// The nodes that cannot be visited anymore (they must be skipped)
+    nodes_to_skip: SmallBitmap<QueryNode>,
 }
 
+/// See module documentation
 pub struct PathVisitor<'a, G: RankingRuleGraphTrait> {
     state: VisitorState<G>,
     ctx: VisitorContext<'a, G>,
@@ -56,14 +110,13 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> {
                 forbidden_conditions: SmallBitmap::for_interned_values_in(
                     &graph.conditions_interner,
                 ),
-                forbidden_conditions_to_nodes: SmallBitmap::for_interned_values_in(
-                    &graph.query_graph.nodes,
-                ),
+                nodes_to_skip: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes),
             },
             ctx: VisitorContext { graph, all_costs_from_node, dead_ends_cache },
         }
     }
 
+    /// See module documentation
     pub fn visit_paths(mut self, visit: VisitFn<G>) -> Result<()> {
         let _ =
             self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?;
@@ -72,22 +125,31 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> {
 }
 
 impl<G: RankingRuleGraphTrait> VisitorState<G> {
+    /// Visits a node: traverse all its valid conditional and unconditional edges.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
     fn visit_node(
         &mut self,
         from_node: Interned<QueryNode>,
         visit: VisitFn<G>,
         ctx: &mut VisitorContext<G>,
     ) -> Result<ControlFlow<(), bool>> {
+        // any valid path will be found from this point
+        // if a valid path was found, then we know that the DeadEndsCache may have been updated,
+        // and we will need to do more work to potentially backtrack
         let mut any_valid = false;
 
         let edges = ctx.graph.edges_of_node.get(from_node).clone();
         for edge_idx in edges.iter() {
+            // could be none if the edge was deleted
             let Some(edge) = ctx.graph.edges_store.get(edge_idx).clone() else { continue };
 
             if self.remaining_cost < edge.cost as u64 {
                 continue;
             }
             self.remaining_cost -= edge.cost as u64;
+
             let cf = match edge.condition {
                 Some(condition) => self.visit_condition(
                     condition,
@@ -119,6 +181,10 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
         Ok(ControlFlow::Continue(any_valid))
     }
 
+    /// Visits an unconditional edge.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
     fn visit_no_condition(
         &mut self,
         dest_node: Interned<QueryNode>,
@@ -134,20 +200,29 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
         {
             return Ok(ControlFlow::Continue(false));
         }
+        // We've reached the END node!
         if dest_node == ctx.graph.query_graph.end_node {
             let control_flow = visit(&self.path, ctx.graph, ctx.dead_ends_cache)?;
+            // We could change the return type of the visit closure such that the caller
+            // tells us whether the dead ends cache was updated or not.
+            // Alternatively, maybe the DeadEndsCache should have a generation number
+            // to it, so that we don't need to play with these booleans at all.
             match control_flow {
                 ControlFlow::Continue(_) => Ok(ControlFlow::Continue(true)),
                 ControlFlow::Break(_) => Ok(ControlFlow::Break(())),
             }
         } else {
-            let old_fbct = self.forbidden_conditions_to_nodes.clone();
-            self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip);
+            let old_fbct = self.nodes_to_skip.clone();
+            self.nodes_to_skip.union(edge_new_nodes_to_skip);
             let cf = self.visit_node(dest_node, visit, ctx)?;
-            self.forbidden_conditions_to_nodes = old_fbct;
+            self.nodes_to_skip = old_fbct;
             Ok(cf)
         }
     }
+    /// Visits a conditional edge.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
     fn visit_condition(
         &mut self,
         condition: Interned<G::Condition>,
@@ -159,7 +234,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
         assert!(dest_node != ctx.graph.query_graph.end_node);
 
         if self.forbidden_conditions.contains(condition)
-            || self.forbidden_conditions_to_nodes.contains(dest_node)
+            || self.nodes_to_skip.contains(dest_node)
             || edge_new_nodes_to_skip.intersects(&self.visited_nodes)
         {
             return Ok(ControlFlow::Continue(false));
@@ -180,19 +255,19 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
         self.visited_nodes.insert(dest_node);
         self.visited_conditions.insert(condition);
 
-        let old_fc = self.forbidden_conditions.clone();
+        let old_forb_cond = self.forbidden_conditions.clone();
         if let Some(next_forbidden) =
             ctx.dead_ends_cache.forbidden_conditions_after_prefix(self.path.iter().copied())
         {
             self.forbidden_conditions.union(&next_forbidden);
         }
-        let old_fctn = self.forbidden_conditions_to_nodes.clone();
-        self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip);
+        let old_nodes_to_skip = self.nodes_to_skip.clone();
+        self.nodes_to_skip.union(edge_new_nodes_to_skip);
 
         let cf = self.visit_node(dest_node, visit, ctx)?;
 
-        self.forbidden_conditions_to_nodes = old_fctn;
-        self.forbidden_conditions = old_fc;
+        self.nodes_to_skip = old_nodes_to_skip;
+        self.forbidden_conditions = old_forb_cond;
 
         self.visited_conditions.remove(condition);
         self.visited_nodes.remove(dest_node);
diff --git a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs b/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs
index d0fcd8bd8..5d199c82a 100644
--- a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs
+++ b/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs
@@ -9,12 +9,8 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::SearchContext;
 use crate::Result;
 
-// TODO: give a generation to each universe, then be able to get the exact
-// delta of docids between two universes of different generations!
-
 /// A cache storing the document ids associated with each ranking rule edge
 pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
-    // TOOD: should be a mapped interner?
     pub cache: FxHashMap<Interned<G::Condition>, ComputedCondition>,
     _phantom: PhantomData<G>,
 }
@@ -54,7 +50,7 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
         }
         let condition = graph.conditions_interner.get_mut(interned_condition);
         let computed = G::resolve_condition(ctx, condition, universe)?;
-        // TODO: if computed.universe_len != universe.len() ?
+        // Can we put an assert here for computed.universe_len == universe.len() ?
         let _ = self.cache.insert(interned_condition, computed);
         let computed = &self.cache[&interned_condition];
         Ok(computed)
diff --git a/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs b/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs
index 4bbf91fcd..bac25da82 100644
--- a/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs
+++ b/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs
@@ -2,6 +2,7 @@ use crate::search::new::interner::{FixedSizeInterner, Interned};
 use crate::search::new::small_bitmap::SmallBitmap;
 
 pub struct DeadEndsCache<T> {
+    // conditions and next could/should be part of the same vector
     conditions: Vec<Interned<T>>,
     next: Vec<Self>,
     pub forbidden: SmallBitmap<T>,
@@ -27,7 +28,7 @@ impl<T> DeadEndsCache<T> {
         self.forbidden.insert(condition);
     }
 
-    pub fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> {
+    fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> {
         if let Some(idx) = self.conditions.iter().position(|c| *c == condition) {
             Some(&mut self.next[idx])
         } else {
diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
index 0f2cceaec..e3ccf23fa 100644
--- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
@@ -69,14 +69,9 @@ impl RankingRuleGraphTrait for FidGraph {
 
         let mut edges = vec![];
         for fid in all_fields {
-            // TODO: We can improve performances and relevancy by storing
-            //       the term subsets associated to each field ids fetched.
             edges.push((
-                fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
-                conditions_interner.insert(FidCondition {
-                    term: term.clone(), // TODO remove this ugly clone
-                    fid,
-                }),
+                fid as u32 * term.term_ids.len() as u32,
+                conditions_interner.insert(FidCondition { term: term.clone(), fid }),
             ));
         }
 
diff --git a/milli/src/search/new/ranking_rule_graph/position/mod.rs b/milli/src/search/new/ranking_rule_graph/position/mod.rs
index 9b0b6478f..c2e3b9012 100644
--- a/milli/src/search/new/ranking_rule_graph/position/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs
@@ -94,14 +94,9 @@ impl RankingRuleGraphTrait for PositionGraph {
         let mut edges = vec![];
 
         for (cost, positions) in positions_for_costs {
-            // TODO: We can improve performances and relevancy by storing
-            //       the term subsets associated to each position fetched
             edges.push((
                 cost,
-                conditions_interner.insert(PositionCondition {
-                    term: term.clone(), // TODO remove this ugly clone
-                    positions,
-                }),
+                conditions_interner.insert(PositionCondition { term: term.clone(), positions }),
             ));
         }
 
diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
index 057779a22..29a1876b4 100644
--- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
@@ -65,13 +65,6 @@ pub fn compute_docids(
         }
     }
 
-    // TODO: add safeguard in case the cartesian product is too large!
-    // even if we restrict the word derivations to a maximum of 100, the size of the
-    // caterisan product could reach a maximum of 10_000 derivations, which is way too much.
-    // Maybe prioritise the product of zero typo derivations, then the product of zero-typo/one-typo
-    // + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been
-    // reached
-
     for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)? {
         // Before computing the edges, check that the left word and left phrase
         // aren't disjoint with the universe, but only do it if there is more than
@@ -111,8 +104,6 @@ pub fn compute_docids(
     Ok(ComputedCondition {
         docids,
         universe_len: universe.len(),
-        // TODO: think about whether we want to reduce the subset,
-        // we probably should!
         start_term_subset: Some(left_term.clone()),
         end_term_subset: right_term.clone(),
     })
@@ -203,12 +194,7 @@ fn compute_non_prefix_edges(
             *docids |= new_docids;
         }
     }
-    if backward_proximity >= 1
-            // TODO: for now, we don't do any swapping when either term is a phrase
-            // but maybe we should. We'd need to look at the first/last word of the phrase
-            // depending on the context.
-            && left_phrase.is_none() && right_phrase.is_none()
-    {
+    if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() {
         if let Some(new_docids) =
             ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)?
         {
diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs
index 797db5875..d992cd22f 100644
--- a/milli/src/search/new/resolve_query_graph.rs
+++ b/milli/src/search/new/resolve_query_graph.rs
@@ -33,8 +33,6 @@ pub fn compute_query_term_subset_docids(
     ctx: &mut SearchContext,
     term: &QueryTermSubset,
 ) -> Result<RoaringBitmap> {
-    // TODO Use the roaring::MultiOps trait
-
     let mut docids = RoaringBitmap::new();
     for word in term.all_single_words_except_prefix_db(ctx)? {
         if let Some(word_docids) = ctx.word_docids(word)? {
@@ -59,8 +57,6 @@ pub fn compute_query_term_subset_docids_within_field_id(
     term: &QueryTermSubset,
     fid: u16,
 ) -> Result<RoaringBitmap> {
-    // TODO Use the roaring::MultiOps trait
-
     let mut docids = RoaringBitmap::new();
     for word in term.all_single_words_except_prefix_db(ctx)? {
         if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(word.interned(), fid)? {
@@ -71,7 +67,6 @@ pub fn compute_query_term_subset_docids_within_field_id(
     for phrase in term.all_phrases(ctx)? {
         // There may be false positives when resolving a phrase, so we're not
         // guaranteed that all of its words are within a single fid.
-        // TODO: fix this?
         if let Some(word) = phrase.words(ctx).iter().flatten().next() {
             if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(*word, fid)? {
                 docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids;
@@ -95,7 +90,6 @@ pub fn compute_query_term_subset_docids_within_position(
     term: &QueryTermSubset,
     position: u16,
 ) -> Result<RoaringBitmap> {
-    // TODO Use the roaring::MultiOps trait
     let mut docids = RoaringBitmap::new();
     for word in term.all_single_words_except_prefix_db(ctx)? {
         if let Some(word_position_docids) =
@@ -108,7 +102,6 @@ pub fn compute_query_term_subset_docids_within_position(
     for phrase in term.all_phrases(ctx)? {
         // It's difficult to know the expected position of the words in the phrase,
         // so instead we just check the first one.
-        // TODO: fix this?
         if let Some(word) = phrase.words(ctx).iter().flatten().next() {
             if let Some(word_position_docids) = ctx.get_db_word_position_docids(*word, position)? {
                 docids |= ctx.get_phrase_docids(phrase)? & word_position_docids
@@ -132,9 +125,6 @@ pub fn compute_query_graph_docids(
     q: &QueryGraph,
     universe: &RoaringBitmap,
 ) -> Result<RoaringBitmap> {
-    // TODO: there must be a faster way to compute this big
-    // roaring bitmap expression
-
     let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes);
     let mut path_nodes_docids = q.nodes.map(|_| RoaringBitmap::new());
 
diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs
index 53144d00d..3f57b2aa5 100644
--- a/milli/src/search/new/sort.rs
+++ b/milli/src/search/new/sort.rs
@@ -141,10 +141,6 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
         universe: &RoaringBitmap,
     ) -> Result<Option<RankingRuleOutput<Query>>> {
         let iter = self.iter.as_mut().unwrap();
-        // TODO: we should make use of the universe in the function below
-        // good for correctness, but ideally iter.next_bucket would take the current universe into account,
-        // as right now it could return buckets that don't intersect with the universe, meaning we will make many
-        // unneeded calls.
         if let Some(mut bucket) = iter.next_bucket()? {
             bucket.candidates &= universe;
             Ok(Some(bucket))
diff --git a/milli/src/search/new/tests/distinct.rs b/milli/src/search/new/tests/distinct.rs
index 2c147d514..c54600f27 100644
--- a/milli/src/search/new/tests/distinct.rs
+++ b/milli/src/search/new/tests/distinct.rs
@@ -527,7 +527,7 @@ fn test_distinct_all_candidates() {
     let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap();
     let candidates = candidates.iter().collect::<Vec<_>>();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
-    // TODO: this is incorrect!
+    // This is incorrect, but unfortunately impossible to do better efficiently.
     insta::assert_snapshot!(format!("{candidates:?}"), @"[1, 4, 7, 8, 14, 17, 19, 20, 23, 24, 25, 26]");
 }
 
diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs
index 401508866..6e4181a95 100644
--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@@ -122,11 +122,11 @@ fn create_edge_cases_index() -> TempIndex {
             sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz
             "
         },
-        // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. 
-        // If the search query is "sunflower", the split word "Sun Flower" will match some documents. 
+        // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`.
+        // If the search query is "sunflower", the split word "Sun Flower" will match some documents.
         // If the query is `sunflower wilting`, then we should make sure that
-        // the sprximity condition `flower wilting: sprx N` also comes with the condition
-        // `sun wilting: sprx N+1`. TODO: this is not the exact condition we use for now. 
+        // the proximity condition `flower wilting: sprx N` also comes with the condition
+        // `sun wilting: sprx N+1`, but this is not the exact condition we use for now.
         // We only check that the phrase `sun flower` exists and `flower wilting: sprx N`, which
         // is better than nothing but not the best.
         {
@@ -139,7 +139,7 @@ fn create_edge_cases_index() -> TempIndex {
         },
         {
             "id": 3,
-            // This document matches the query `sunflower wilting`, but the sprximity condition 
+            // This document matches the query `sunflower wilting`, but the sprximity condition
             // between `sunflower` and `wilting` cannot be through the split-word `Sun Flower`
             // which would reduce to only `flower` and `wilting` being in sprximity.
             "text": "A flower wilting under the sun, unlike a sunflower"
@@ -299,7 +299,7 @@ fn test_proximity_split_word() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]");
     let texts = collect_field_values(&index, &txn, "text", &documents_ids);
-    // TODO: "2" and "4" should be swapped ideally
+    // "2" and "4" should be swapped ideally
     insta::assert_debug_snapshot!(texts, @r###"
     [
         "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
@@ -316,7 +316,7 @@ fn test_proximity_split_word() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
     let texts = collect_field_values(&index, &txn, "text", &documents_ids);
-    // TODO: "2" and "4" should be swapped ideally
+    // "2" and "4" should be swapped ideally
     insta::assert_debug_snapshot!(texts, @r###"
     [
         "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
@@ -341,7 +341,7 @@ fn test_proximity_split_word() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
     let texts = collect_field_values(&index, &txn, "text", &documents_ids);
-    // TODO: "2" and "4" should be swapped ideally
+    // "2" and "4" should be swapped ideally
     insta::assert_debug_snapshot!(texts, @r###"
     [
         "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
diff --git a/milli/src/search/new/tests/proximity_typo.rs b/milli/src/search/new/tests/proximity_typo.rs
index ab98f99c0..b459b178b 100644
--- a/milli/src/search/new/tests/proximity_typo.rs
+++ b/milli/src/search/new/tests/proximity_typo.rs
@@ -2,9 +2,8 @@
 This module tests the interactions between the proximity and typo ranking rules.
 
 The proximity ranking rule should transform the query graph such that it
-only contains the word pairs that it used to compute its bucket.
-
-TODO: This is not currently implemented.
+only contains the word pairs that it used to compute its bucket, but this is not currently
+implemented.
 */
 
 use crate::index::tests::TempIndex;
@@ -64,7 +63,7 @@ fn test_trap_basic() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
     let texts = collect_field_values(&index, &txn, "text", &documents_ids);
-    // TODO: this is incorrect, 1 should come before 0
+    // This is incorrect, 1 should come before 0
     insta::assert_debug_snapshot!(texts, @r###"
     [
         "\"summer. holiday. sommer holidty\"",
diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs
index 8fd9de5fc..536f6653d 100644
--- a/milli/src/search/new/tests/typo.rs
+++ b/milli/src/search/new/tests/typo.rs
@@ -571,8 +571,8 @@ fn test_typo_synonyms() {
     s.terms_matching_strategy(TermsMatchingStrategy::All);
     s.query("the fast brownish fox jumps over the lackadaisical dog");
 
-    // TODO: is this correct? interaction of ngrams + synonyms means that the
-    // multi-word synonyms end up having a typo cost. This is probably not what we want.
+    // The interaction of ngrams + synonyms means that the multi-word synonyms end up having a typo cost.
+    // This is probably not what we want.
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]");
     let texts = collect_field_values(&index, &txn, "text", &documents_ids);