diff --git a/Cargo.lock b/Cargo.lock index c3222c7fd..30b1102b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2623,6 +2623,7 @@ dependencies = [ "meilisearch-types", "memmap2", "page_size", + "raw-collections", "rayon", "roaring", "serde", diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml index 4a2913083..deaded910 100644 --- a/crates/index-scheduler/Cargo.toml +++ b/crates/index-scheduler/Cargo.toml @@ -22,6 +22,7 @@ flate2 = "1.0.30" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.6.0" +raw-collections = { git = "https://github.com/dureuill/raw-collections.git", version = "0.1.0" } rayon = "1.10.0" roaring = { version = "0.10.6", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs index bd307b19e..4ae8c7d46 100644 --- a/crates/index-scheduler/src/batch.rs +++ b/crates/index-scheduler/src/batch.rs @@ -43,6 +43,7 @@ use meilisearch_types::milli::{self, Filter}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; +use raw_collections::RawMap; use roaring::RoaringBitmap; use time::macros::format_description; use time::OffsetDateTime; @@ -1318,7 +1319,12 @@ impl IndexScheduler { index, &mut new_fields_ids_map, primary_key.as_deref(), - first_document.as_ref(), + first_document + .map(|raw| RawMap::from_raw_value(raw, &indexer_alloc)) + .transpose() + .map_err(|error| { + milli::Error::UserError(milli::UserError::SerdeJson(error)) + })?, )? .map_err(milli::Error::from)?; diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 3b66c2ec0..ca61a9b7b 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -12,6 +12,7 @@ use heed::{RoTxn, RwTxn}; use itertools::{merge_join_by, EitherOrBoth}; pub use partial_dump::PartialDump; use rand::SeedableRng as _; +use raw_collections::RawMap; use rayon::ThreadPool; use time::OffsetDateTime; pub use update_by_function::UpdateByFunction; @@ -24,7 +25,7 @@ use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; use super::words_prefix_docids::{ compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids, }; -use super::{StdResult, TopLevelMap}; +use super::StdResult; use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; use crate::facet::FacetType; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; @@ -733,7 +734,7 @@ pub fn retrieve_or_guess_primary_key<'a>( index: &Index, new_fields_ids_map: &mut FieldsIdsMap, primary_key_from_op: Option<&'a str>, - first_document: Option<&'a TopLevelMap<'a>>, + first_document: Option>, ) -> Result, bool), UserError>> { // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it. @@ -769,12 +770,18 @@ pub fn retrieve_or_guess_primary_key<'a>( None => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), }; - let mut guesses: Vec<&str> = first_document + let guesses: Result> = first_document .keys() - .map(AsRef::as_ref) - .filter(|name| name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY)) + .filter_map(|name| { + let Some(_) = new_fields_ids_map.insert(name) else { + return Some(Err(UserError::AttributeLimitReached.into())); + }; + name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY).then_some(Ok(name)) + }) .collect(); + let mut guesses = guesses?; + // sort the keys in lexicographical order, so that fields are always in the same order. guesses.sort_unstable();