diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
index 49259cd64..444c3f7d5 100644
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -5,8 +5,8 @@ use std::ops::DerefMut as _;
 
 use bumpalo::collections::vec::Vec as BumpVec;
 use bumpalo::Bump;
-use heed::RoTxn;
 
+use super::match_searchable_field;
 use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use crate::update::new::extract::cache::BalancedCaches;
 use crate::update::new::extract::perm_json_p::contained_in;
@@ -17,8 +17,7 @@ use crate::update::new::ref_cell_ext::RefCellExt as _;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::update::new::DocumentChange;
-use crate::update::GrenadParameters;
-use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
+use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
 
 const MAX_COUNTED_WORDS: usize = 30;
 
@@ -207,9 +206,10 @@ impl<'extractor> WordDocidsCaches<'extractor> {
 }
 
 pub struct WordDocidsExtractorData<'a> {
-    tokenizer: &'a DocumentTokenizer<'a>,
-    grenad_parameters: &'a GrenadParameters,
+    tokenizer: DocumentTokenizer<'a>,
+    max_memory_by_thread: Option<usize>,
     buckets: usize,
+    searchable_attributes: Option<Vec<&'a str>>,
 }
 
 impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
@@ -218,7 +218,7 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
     fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
         Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
             self.buckets,
-            self.grenad_parameters.max_memory_by_thread(),
+            self.max_memory_by_thread,
             extractor_alloc,
         ))))
     }
@@ -230,7 +230,12 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
     ) -> Result<()> {
         for change in changes {
             let change = change?;
-            WordDocidsExtractors::extract_document_change(context, self.tokenizer, change)?;
+            WordDocidsExtractors::extract_document_change(
+                context,
+                &self.tokenizer,
+                self.searchable_attributes.as_deref(),
+                change,
+            )?;
         }
         Ok(())
     }
@@ -248,52 +253,42 @@ impl WordDocidsExtractors {
     where
         MSP: Fn() -> bool + Sync,
     {
-        let index = indexing_context.index;
-        let rtxn = index.read_txn()?;
-
-        let stop_words = index.stop_words(&rtxn)?;
-        let allowed_separators = index.allowed_separators(&rtxn)?;
+        // Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
+        let rtxn = indexing_context.index.read_txn()?;
+        let stop_words = indexing_context.index.stop_words(&rtxn)?;
+        let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
         let allowed_separators: Option<Vec<_>> =
             allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let dictionary = index.dictionary(&rtxn)?;
+        let dictionary = indexing_context.index.dictionary(&rtxn)?;
         let dictionary: Option<Vec<_>> =
             dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let builder = tokenizer_builder(
+        let mut builder = tokenizer_builder(
             stop_words.as_ref(),
             allowed_separators.as_deref(),
             dictionary.as_deref(),
         );
-        let tokenizer = builder.into_tokenizer();
-
-        let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
-        let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?;
+        let tokenizer = builder.build();
         let localized_attributes_rules =
-            index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
-
+            indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
         let document_tokenizer = DocumentTokenizer {
             tokenizer: &tokenizer,
-            attribute_to_extract: attributes_to_extract.as_deref(),
-            attribute_to_skip: attributes_to_skip.as_slice(),
             localized_attributes_rules: &localized_attributes_rules,
             max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
         };
-
+        let extractor_data = WordDocidsExtractorData {
+            tokenizer: document_tokenizer,
+            max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
+            buckets: rayon::current_num_threads(),
+            searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?,
+        };
         let datastore = ThreadLocal::new();
-
         {
             let span =
                 tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
             let _entered = span.enter();
-
-            let extractor = WordDocidsExtractorData {
-                tokenizer: &document_tokenizer,
-                grenad_parameters: indexing_context.grenad_parameters,
-                buckets: rayon::current_num_threads(),
-            };
-
             extract(
                 document_changes,
-                &extractor,
+                &extractor_data,
                 indexing_context,
                 extractor_allocs,
                 &datastore,
@@ -312,6 +307,7 @@ impl WordDocidsExtractors {
     fn extract_document_change(
         context: &DocumentChangeContext<RefCell<Option<WordDocidsBalancedCaches>>>,
         document_tokenizer: &DocumentTokenizer,
+        searchable_attributes: Option<&[&str]>,
         document_change: DocumentChange,
     ) -> Result<()> {
         let index = &context.index;
@@ -345,7 +341,9 @@ impl WordDocidsExtractors {
             }
             DocumentChange::Update(inner) => {
                 if !inner.has_changed_for_fields(
-                    document_tokenizer.attribute_to_extract,
+                    &mut |field_name: &str| {
+                        match_searchable_field(field_name, searchable_attributes)
+                    },
                     &context.rtxn,
                     context.index,
                     context.db_fields_ids_map,
@@ -408,15 +406,4 @@ impl WordDocidsExtractors {
         let mut buffer = BumpVec::with_capacity_in(buffer_size, &context.doc_alloc);
         cached_sorter.flush_fid_word_count(&mut buffer)
     }
-
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(Vec::new())
-    }
 }
diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
index e58c0efd2..0724b0513 100644
--- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@@ -2,30 +2,114 @@ use std::cell::RefCell;
 use std::collections::VecDeque;
 use std::rc::Rc;
 
-use heed::RoTxn;
+use bumpalo::Bump;
 
-use super::tokenize_document::DocumentTokenizer;
-use super::SearchableExtractor;
+use super::match_searchable_field;
+use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use crate::proximity::{index_proximity, MAX_DISTANCE};
 use crate::update::new::document::Document;
 use crate::update::new::extract::cache::BalancedCaches;
-use crate::update::new::indexer::document_changes::DocumentChangeContext;
+use crate::update::new::indexer::document_changes::{
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
+};
 use crate::update::new::ref_cell_ext::RefCellExt as _;
+use crate::update::new::steps::IndexingStep;
+use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
-use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
+use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
+
+pub struct WordPairProximityDocidsExtractorData<'a> {
+    tokenizer: DocumentTokenizer<'a>,
+    searchable_attributes: Option<Vec<&'a str>>,
+    max_memory_by_thread: Option<usize>,
+    buckets: usize,
+}
+
+impl<'a, 'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractorData<'a> {
+    type Data = RefCell<BalancedCaches<'extractor>>;
+
+    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
+        Ok(RefCell::new(BalancedCaches::new_in(
+            self.buckets,
+            self.max_memory_by_thread,
+            extractor_alloc,
+        )))
+    }
+
+    fn process<'doc>(
+        &self,
+        changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
+        context: &DocumentChangeContext<Self::Data>,
+    ) -> Result<()> {
+        for change in changes {
+            let change = change?;
+            WordPairProximityDocidsExtractor::extract_document_change(
+                context,
+                &self.tokenizer,
+                self.searchable_attributes.as_deref(),
+                change,
+            )?;
+        }
+        Ok(())
+    }
+}
 
 pub struct WordPairProximityDocidsExtractor;
 
-impl SearchableExtractor for WordPairProximityDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
+impl WordPairProximityDocidsExtractor {
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
+        document_changes: &DC,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
+        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
+        step: IndexingStep,
+    ) -> Result<Vec<BalancedCaches<'extractor>>>
+    where
+        MSP: Fn() -> bool + Sync,
+    {
+        // Warning: this is duplicated code from extract_word_docids.rs
+        let rtxn = indexing_context.index.read_txn()?;
+        let stop_words = indexing_context.index.stop_words(&rtxn)?;
+        let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
+        let allowed_separators: Option<Vec<_>> =
+            allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let dictionary = indexing_context.index.dictionary(&rtxn)?;
+        let dictionary: Option<Vec<_>> =
+            dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let mut builder = tokenizer_builder(
+            stop_words.as_ref(),
+            allowed_separators.as_deref(),
+            dictionary.as_deref(),
+        );
+        let tokenizer = builder.build();
+        let localized_attributes_rules =
+            indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
+        let document_tokenizer = DocumentTokenizer {
+            tokenizer: &tokenizer,
+            localized_attributes_rules: &localized_attributes_rules,
+            max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
+        };
+        let extractor_data = WordPairProximityDocidsExtractorData {
+            tokenizer: document_tokenizer,
+            searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?,
+            max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
+            buckets: rayon::current_num_threads(),
+        };
+        let datastore = ThreadLocal::new();
+        {
+            let span =
+                tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
+            let _entered = span.enter();
+            extract(
+                document_changes,
+                &extractor_data,
+                indexing_context,
+                extractor_allocs,
+                &datastore,
+                step,
+            )?;
+        }
 
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(Vec::new())
+        Ok(datastore.into_iter().map(RefCell::into_inner).collect())
     }
 
     // This method is reimplemented to count the number of words in the document in each field
@@ -34,6 +118,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
     fn extract_document_change(
         context: &DocumentChangeContext<RefCell<BalancedCaches>>,
         document_tokenizer: &DocumentTokenizer,
+        searchable_attributes: Option<&[&str]>,
         document_change: DocumentChange,
     ) -> Result<()> {
         let doc_alloc = &context.doc_alloc;
@@ -71,7 +156,9 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
             }
             DocumentChange::Update(inner) => {
                 if !inner.has_changed_for_fields(
-                    document_tokenizer.attribute_to_extract,
+                    &mut |field_name: &str| {
+                        match_searchable_field(field_name, searchable_attributes)
+                    },
                     rtxn,
                     index,
                     context.db_fields_ids_map,
diff --git a/crates/milli/src/update/new/extract/searchable/mod.rs b/crates/milli/src/update/new/extract/searchable/mod.rs
index 7c949a3ce..79a6fae87 100644
--- a/crates/milli/src/update/new/extract/searchable/mod.rs
+++ b/crates/milli/src/update/new/extract/searchable/mod.rs
@@ -2,145 +2,28 @@ mod extract_word_docids;
 mod extract_word_pair_proximity_docids;
 mod tokenize_document;
 
-use std::cell::RefCell;
-use std::marker::PhantomData;
-
-use bumpalo::Bump;
 pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
 pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
-use heed::RoTxn;
-use tokenize_document::{tokenizer_builder, DocumentTokenizer};
 
-use super::cache::BalancedCaches;
-use super::DocidsExtractor;
-use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
-};
-use crate::update::new::steps::IndexingStep;
-use crate::update::new::thread_local::{FullySend, ThreadLocal};
-use crate::update::new::DocumentChange;
-use crate::update::GrenadParameters;
-use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE};
+use crate::attribute_patterns::{match_field_legacy, PatternMatch};
 
-pub struct SearchableExtractorData<'a, EX: SearchableExtractor> {
-    tokenizer: &'a DocumentTokenizer<'a>,
-    grenad_parameters: &'a GrenadParameters,
-    buckets: usize,
-    _ex: PhantomData<EX>,
-}
+pub fn match_searchable_field(
+    field_name: &str,
+    searchable_fields: Option<&[&str]>,
+) -> PatternMatch {
+    let Some(searchable_fields) = searchable_fields else {
+        // If no searchable fields are provided, consider all fields as searchable
+        return PatternMatch::Match;
+    };
 
-impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
-    for SearchableExtractorData<'a, EX>
-{
-    type Data = RefCell<BalancedCaches<'extractor>>;
-
-    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
-        Ok(RefCell::new(BalancedCaches::new_in(
-            self.buckets,
-            self.grenad_parameters.max_memory_by_thread(),
-            extractor_alloc,
-        )))
-    }
-
-    fn process<'doc>(
-        &self,
-        changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
-        context: &DocumentChangeContext<Self::Data>,
-    ) -> Result<()> {
-        for change in changes {
-            let change = change?;
-            EX::extract_document_change(context, self.tokenizer, change)?;
+    let mut selection = PatternMatch::NoMatch;
+    for pattern in searchable_fields {
+        match match_field_legacy(pattern, field_name) {
+            PatternMatch::Match => return PatternMatch::Match,
+            PatternMatch::Parent => selection = PatternMatch::Parent,
+            PatternMatch::NoMatch => (),
         }
-        Ok(())
-    }
-}
-
-pub trait SearchableExtractor: Sized + Sync {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
-        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
-        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: IndexingStep,
-    ) -> Result<Vec<BalancedCaches<'extractor>>>
-    where
-        MSP: Fn() -> bool + Sync,
-    {
-        let rtxn = indexing_context.index.read_txn()?;
-        let stop_words = indexing_context.index.stop_words(&rtxn)?;
-        let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
-        let allowed_separators: Option<Vec<_>> =
-            allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let dictionary = indexing_context.index.dictionary(&rtxn)?;
-        let dictionary: Option<Vec<_>> =
-            dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-        let mut builder = tokenizer_builder(
-            stop_words.as_ref(),
-            allowed_separators.as_deref(),
-            dictionary.as_deref(),
-        );
-        let tokenizer = builder.build();
-
-        let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
-        let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
-        let localized_attributes_rules =
-            indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
-
-        let document_tokenizer = DocumentTokenizer {
-            tokenizer: &tokenizer,
-            attribute_to_extract: attributes_to_extract.as_deref(),
-            attribute_to_skip: attributes_to_skip.as_slice(),
-            localized_attributes_rules: &localized_attributes_rules,
-            max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
-        };
-
-        let extractor_data: SearchableExtractorData<Self> = SearchableExtractorData {
-            tokenizer: &document_tokenizer,
-            grenad_parameters: indexing_context.grenad_parameters,
-            buckets: rayon::current_num_threads(),
-            _ex: PhantomData,
-        };
-
-        let datastore = ThreadLocal::new();
-
-        {
-            let span =
-                tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
-            let _entered = span.enter();
-            extract(
-                document_changes,
-                &extractor_data,
-                indexing_context,
-                extractor_allocs,
-                &datastore,
-                step,
-            )?;
-        }
-
-        Ok(datastore.into_iter().map(RefCell::into_inner).collect())
     }
 
-    fn extract_document_change(
-        context: &DocumentChangeContext<RefCell<BalancedCaches>>,
-        document_tokenizer: &DocumentTokenizer,
-        document_change: DocumentChange,
-    ) -> Result<()>;
-
-    fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index)
-        -> Result<Option<Vec<&'a str>>>;
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
-}
-
-impl<T: SearchableExtractor> DocidsExtractor for T {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
-        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
-        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: IndexingStep,
-    ) -> Result<Vec<BalancedCaches<'extractor>>>
-    where
-        MSP: Fn() -> bool + Sync,
-    {
-        Self::run_extraction(document_changes, indexing_context, extractor_allocs, step)
-    }
+    selection
 }
diff --git a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
index 1c1605b66..dda46f24c 100644
--- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
@@ -3,9 +3,10 @@ use std::collections::HashMap;
 use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
 use serde_json::Value;
 
+use crate::attribute_patterns::PatternMatch;
 use crate::update::new::document::Document;
 use crate::update::new::extract::perm_json_p::{
-    seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, Selection,
+    seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
 };
 use crate::{
     FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
@@ -17,8 +18,6 @@ const MAX_DISTANCE: u32 = 8;
 
 pub struct DocumentTokenizer<'a> {
     pub tokenizer: &'a Tokenizer<'a>,
-    pub attribute_to_extract: Option<&'a [&'a str]>,
-    pub attribute_to_skip: &'a [&'a str],
     pub localized_attributes_rules: &'a [LocalizedAttributesRule],
     pub max_positions_per_attributes: u32,
 }
@@ -31,87 +30,94 @@ impl<'a> DocumentTokenizer<'a> {
         token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
     ) -> Result<()> {
         let mut field_position = HashMap::new();
+        let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
+            let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
+                return Err(UserError::AttributeLimitReached.into());
+            };
+
+            if meta.is_searchable() {
+                self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
+            }
+
+            // todo: should be a match on the field_name using `match_field_legacy` function,
+            // but for legacy reasons we iterate over all the fields to fill the field_id_map.
+            Ok(PatternMatch::Match)
+        };
 
         for entry in document.iter_top_level_fields() {
             let (field_name, value) = entry?;
-
-            let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
-                let Some(field_id) = field_id_map.id_or_insert(field_name) else {
-                    return Err(UserError::AttributeLimitReached.into());
-                };
-
-                if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
-                    != Selection::Select
-                {
-                    return Ok(());
-                }
-
-                let position = field_position
-                    .entry(field_id)
-                    .and_modify(|counter| *counter += MAX_DISTANCE)
-                    .or_insert(0);
-                if *position >= self.max_positions_per_attributes {
-                    return Ok(());
-                }
-
-                let text;
-                let tokens = match value {
-                    Value::Number(n) => {
-                        text = n.to_string();
-                        self.tokenizer.tokenize(text.as_str())
-                    }
-                    Value::Bool(b) => {
-                        text = b.to_string();
-                        self.tokenizer.tokenize(text.as_str())
-                    }
-                    Value::String(text) => {
-                        let locales = self
-                            .localized_attributes_rules
-                            .iter()
-                            .find(|rule| rule.match_str(field_name))
-                            .map(|rule| rule.locales());
-                        self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
-                    }
-                    _ => return Ok(()),
-                };
-
-                // create an iterator of token with their positions.
-                let tokens = process_tokens(*position, tokens)
-                    .take_while(|(p, _)| *p < self.max_positions_per_attributes);
-
-                for (index, token) in tokens {
-                    // keep a word only if it is not empty and fit in a LMDB key.
-                    let token = token.lemma().trim();
-                    if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
-                        *position = index;
-                        if let Ok(position) = (*position).try_into() {
-                            token_fn(field_name, field_id, position, token)?;
-                        }
-                    }
-                }
-
-                Ok(())
-            };
-
             // parse json.
             match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
                 Value::Object(object) => seek_leaf_values_in_object(
                     &object,
-                    None,
-                    &[],
                     field_name,
                     Depth::OnBaseKey,
                     &mut tokenize_field,
                 )?,
                 Value::Array(array) => seek_leaf_values_in_array(
                     &array,
-                    None,
-                    &[],
                     field_name,
                     Depth::OnBaseKey,
                     &mut tokenize_field,
                 )?,
-                value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
+                value => {
+                    tokenize_field(field_name, Depth::OnBaseKey, &value)?;
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn tokenize_field(
+        &self,
+        field_id: FieldId,
+        field_name: &str,
+        value: &Value,
+        token_fn: &mut impl FnMut(&str, u16, u16, &str) -> std::result::Result<(), crate::Error>,
+        field_position: &mut HashMap<u16, u32>,
+    ) -> Result<()> {
+        let position = field_position
+            .entry(field_id)
+            .and_modify(|counter| *counter += MAX_DISTANCE)
+            .or_insert(0);
+        if *position >= self.max_positions_per_attributes {
+            return Ok(());
+        }
+
+        let text;
+        let tokens = match value {
+            Value::Number(n) => {
+                text = n.to_string();
+                self.tokenizer.tokenize(text.as_str())
+            }
+            Value::Bool(b) => {
+                text = b.to_string();
+                self.tokenizer.tokenize(text.as_str())
+            }
+            Value::String(text) => {
+                let locales = self
+                    .localized_attributes_rules
+                    .iter()
+                    .find(|rule| rule.match_str(field_name) == PatternMatch::Match)
+                    .map(|rule| rule.locales());
+                self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
+            }
+            _ => return Ok(()),
+        };
+
+        // create an iterator of token with their positions.
+        let tokens = process_tokens(*position, tokens)
+            .take_while(|(p, _)| *p < self.max_positions_per_attributes);
+
+        for (index, token) in tokens {
+            // keep a word only if it is not empty and fit in a LMDB key.
+            let token = token.lemma().trim();
+            if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
+                *position = index;
+                if let Ok(position) = (*position).try_into() {
+                    token_fn(field_name, field_id, position, token)?;
+                }
             }
         }
 
@@ -215,15 +221,20 @@ mod test {
         let mut tb = TokenizerBuilder::default();
         let document_tokenizer = DocumentTokenizer {
             tokenizer: &tb.build(),
-            attribute_to_extract: None,
-            attribute_to_skip: &["not-me", "me-nether.nope"],
             localized_attributes_rules: &[],
             max_positions_per_attributes: 1000,
         };
 
         let fields_ids_map = FieldIdMapWithMetadata::new(
             fields_ids_map,
-            MetadataBuilder::new(Default::default(), Default::default(), Default::default(), None),
+            MetadataBuilder::new(
+                Default::default(),
+                Default::default(),
+                Default::default(),
+                None,
+                None,
+                Default::default(),
+            ),
         );
 
         let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);
@@ -265,6 +276,10 @@ mod test {
                 2,
                 16,
             ]: "catto",
+            [
+                3,
+                0,
+            ]: "unsearchable",
             [
                 5,
                 0,
@@ -277,6 +292,10 @@ mod test {
                 8,
                 0,
             ]: "23",
+            [
+                9,
+                0,
+            ]: "unsearchable",
         }
         "###);
     }
diff --git a/crates/milli/src/update/new/indexer/extract.rs b/crates/milli/src/update/new/indexer/extract.rs
index f49cd834d..907a4d1df 100644
--- a/crates/milli/src/update/new/indexer/extract.rs
+++ b/crates/milli/src/update/new/indexer/extract.rs
@@ -199,7 +199,7 @@ where
             let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
             let _entered = span.enter();
 
-            <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
+            WordPairProximityDocidsExtractor::run_extraction(
                 document_changes,
                 indexing_context,
                 extractor_allocs,