diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 1295c4384..64497bc13 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -18,6 +18,7 @@ flatten-serde-json = { path = "../flatten-serde-json" } grenad = { version = "0.4.1", default-features = false, features = ["tempfile"] } geoutils = "0.4.1" heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] } +json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.9" } memmap2 = "0.5.3" diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index cbb6ed428..e94eb170b 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -286,9 +286,11 @@ impl<'a, 'i> Transform<'a, 'i> { })?; self.original_sorter.insert(&docid.to_be_bytes(), base_obkv)?; - let buffer = self.flatten_from_fields_ids_map(KvReader::new(&base_obkv))?; - - self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?; + if let Some(buffer) = self.flatten_from_fields_ids_map(KvReader::new(&base_obkv))? { + self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?; + } else { + self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?; + } } else { self.new_documents_ids.insert(docid); } @@ -300,8 +302,13 @@ impl<'a, 'i> Transform<'a, 'i> { if let Some(flatten) = flattened_document { self.flattened_sorter.insert(docid.to_be_bytes(), &flatten)?; } else { - let buffer = self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))?; - self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?; + if let Some(buffer) = + self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? + { + self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?; + } else { + self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?; + } } progress_callback(UpdateIndexingStep::RemapDocumentAddition { @@ -326,8 +333,15 @@ impl<'a, 'i> Transform<'a, 'i> { } // Flatten a document from the fields ids map contained in self and insert the new - // created fields. - fn flatten_from_fields_ids_map(&mut self, obkv: KvReader) -> Result> { + // created fields. Returns `None` if the document doesn't need to be flattened. + fn flatten_from_fields_ids_map(&mut self, obkv: KvReader) -> Result>> { + if obkv + .iter() + .all(|(_, value)| !json_depth_checker::should_flatten_from_unchecked_slice(value)) + { + return Ok(None); + } + let mut doc = serde_json::Map::new(); for (k, v) in obkv.iter() { @@ -357,7 +371,7 @@ impl<'a, 'i> Transform<'a, 'i> { writer.insert(fid, &value)?; } - Ok(buffer) + Ok(Some(buffer)) } // Flatten a document from a field mapping generated by [create_fields_mapping]