diff --git a/Cargo.lock b/Cargo.lock index 3965ff9b4..cd4e09605 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4483,7 +4483,7 @@ dependencies = [ [[package]] name = "raw-collections" version = "0.1.0" -source = "git+https://github.com/dureuill/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a" +source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a" dependencies = [ "allocator-api2", "bitpacking", diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml index deaded910..657dd6dfe 100644 --- a/crates/index-scheduler/Cargo.toml +++ b/crates/index-scheduler/Cargo.toml @@ -22,7 +22,7 @@ flate2 = "1.0.30" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.6.0" -raw-collections = { git = "https://github.com/dureuill/raw-collections.git", version = "0.1.0" } +raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" } rayon = "1.10.0" roaring = { version = "0.10.6", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs index 1b30c6d48..41e1da840 100644 --- a/crates/index-scheduler/src/batch.rs +++ b/crates/index-scheduler/src/batch.rs @@ -1411,17 +1411,6 @@ impl IndexScheduler { tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); } - // else if primary_key_has_been_set { - // // Everything failed but we've set a primary key. - // // We need to remove it. - // let mut builder = - // milli::update::Settings::new(index_wtxn, index, indexer_config); - // builder.reset_primary_key(); - // builder.execute( - // |indexing_step| tracing::trace!(update = ?indexing_step), - // || must_stop_processing.clone().get(), - // )?; - // } Ok(tasks) } diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index 39434fa6d..cef24c1ea 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -1365,6 +1365,7 @@ impl IndexScheduler { let ProcessingTasks { batch, processing, progress } = self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone(); + // ignored for now, might be added to batch details later let _ = progress; let ret = tasks.into_iter(); @@ -5198,11 +5199,9 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); - // The second batch should fail. handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); - // The second batch should fail. handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_fails"); @@ -5263,7 +5262,6 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); - // The second batch should fail and contains two tasks. handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_and_third_tasks_fails"); diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml index 3bd368e7c..349c06080 100644 --- a/crates/meilisearch-types/Cargo.toml +++ b/crates/meilisearch-types/Cargo.toml @@ -24,7 +24,7 @@ flate2 = "1.0.30" fst = "0.4.7" memmap2 = "0.9.4" milli = { path = "../milli" } -raw-collections = { git = "https://github.com/dureuill/raw-collections.git", version = "0.1.0" } +raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" } roaring = { version = "0.10.6", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } serde-cs = "0.2.4" diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs index 311fcccf4..aec236949 100644 --- a/crates/meilisearch-types/src/document_formats.rs +++ b/crates/meilisearch-types/src/document_formats.rs @@ -128,7 +128,6 @@ impl ErrorCode for DocumentFormatError { } } -// TODO remove that from the place I've borrowed it #[derive(Debug)] enum AllowedType { String, @@ -213,7 +212,7 @@ pub fn read_csv(input: &File, output: impl io::Write, delimiter: u8) -> Result Result { - // We memory map to be able to deserailize into a TopLevelMap<'pl> that + // We memory map to be able to deserialize into a TopLevelMap<'pl> that // does not allocate when possible and only materialize the first/top level. let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? }; let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB @@ -254,7 +253,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result { /// Reads NDJSON from file and write it in NDJSON in a file checking it along the way. pub fn read_ndjson(input: &File, output: impl io::Write) -> Result { - // We memory map to be able to deserailize into a TopLevelMap<'pl> that + // We memory map to be able to deserialize into a TopLevelMap<'pl> that // does not allocate when possible and only materialize the first/top level. let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? }; let mut output = BufWriter::new(output); diff --git a/crates/meilisearch/tests/vector/openai.rs b/crates/meilisearch/tests/vector/openai.rs index c4cf12a2f..99aa1f710 100644 --- a/crates/meilisearch/tests/vector/openai.rs +++ b/crates/meilisearch/tests/vector/openai.rs @@ -172,7 +172,7 @@ async fn create_mock_with_template( })) } } - // 3. check API key + // 2. check API key match req.headers.get("Authorization") { Some(api_key) if api_key == API_KEY_BEARER => { {} diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 07e18ef4d..c47a0a354 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -95,7 +95,7 @@ ureq = { version = "2.10.0", features = ["json"] } url = "2.5.2" rayon-par-bridge = "0.1.0" hashbrown = "0.15.0" -raw-collections = { git = "https://github.com/dureuill/raw-collections.git", version = "0.1.0" } +raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" } bumpalo = "3.16.0" thread_local = "1.1.8" allocator-api2 = "0.2.18" diff --git a/crates/milli/src/documents/builder.rs b/crates/milli/src/documents/builder.rs index 1cf90447e..52b656290 100644 --- a/crates/milli/src/documents/builder.rs +++ b/crates/milli/src/documents/builder.rs @@ -2,7 +2,7 @@ use std::io::{self, Write}; use grenad::{CompressionType, WriterBuilder}; use serde::de::Deserializer; -use serde_json::{to_writer, Value}; +use serde_json::to_writer; use super::{DocumentsBatchIndex, Error, DOCUMENTS_BATCH_INDEX_KEY}; use crate::documents::serde_impl::DocumentVisitor; @@ -87,95 +87,6 @@ impl DocumentsBatchBuilder { de.deserialize_any(&mut visitor)? } - /// Appends a new CSV file into the batch and updates the `DocumentsBatchIndex` accordingly. - pub fn append_csv(&mut self, mut reader: csv::Reader) -> Result<(), Error> { - // Make sure that we insert the fields ids in order as the obkv writer has this requirement. - let mut typed_fields_ids: Vec<_> = reader - .headers()? - .into_iter() - .map(parse_csv_header) - .map(|(k, t)| (self.fields_index.insert(k), t)) - .enumerate() - .collect(); - // Make sure that we insert the fields ids in order as the obkv writer has this requirement. - typed_fields_ids.sort_unstable_by_key(|(_, (fid, _))| *fid); - - let mut record = csv::StringRecord::new(); - let mut line = 0; - while reader.read_record(&mut record)? { - // We increment here and not at the end of the while loop to take - // the header offset into account. - line += 1; - - self.obkv_buffer.clear(); - let mut writer = obkv::KvWriter::new(&mut self.obkv_buffer); - - for (i, (field_id, type_)) in typed_fields_ids.iter() { - self.value_buffer.clear(); - - let value = &record[*i]; - let trimmed_value = value.trim(); - match type_ { - AllowedType::Number => { - if trimmed_value.is_empty() { - to_writer(&mut self.value_buffer, &Value::Null)?; - } else if let Ok(integer) = trimmed_value.parse::() { - to_writer(&mut self.value_buffer, &integer)?; - } else { - match trimmed_value.parse::() { - Ok(float) => { - to_writer(&mut self.value_buffer, &float)?; - } - Err(error) => { - return Err(Error::ParseFloat { - error, - line, - value: value.to_string(), - }); - } - } - } - } - AllowedType::Boolean => { - if trimmed_value.is_empty() { - to_writer(&mut self.value_buffer, &Value::Null)?; - } else { - match trimmed_value.parse::() { - Ok(bool) => { - to_writer(&mut self.value_buffer, &bool)?; - } - Err(error) => { - return Err(Error::ParseBool { - error, - line, - value: value.to_string(), - }); - } - } - } - } - AllowedType::String => { - if value.is_empty() { - to_writer(&mut self.value_buffer, &Value::Null)?; - } else { - to_writer(&mut self.value_buffer, value)?; - } - } - } - - // We insert into the obkv writer the value buffer that has been filled just above. - writer.insert(*field_id, &self.value_buffer)?; - } - - let internal_id = self.documents_count.to_be_bytes(); - let document_bytes = writer.into_inner()?; - self.writer.insert(internal_id, &document_bytes)?; - self.documents_count += 1; - } - - Ok(()) - } - /// Flushes the content on disk and stores the final version of the `DocumentsBatchIndex`. pub fn into_inner(mut self) -> io::Result { let DocumentsBatchBuilder { mut writer, fields_index, .. } = self; @@ -189,35 +100,12 @@ impl DocumentsBatchBuilder { } } -#[derive(Debug)] -enum AllowedType { - String, - Boolean, - Number, -} - -fn parse_csv_header(header: &str) -> (&str, AllowedType) { - // if there are several separators we only split on the last one. - match header.rsplit_once(':') { - Some((field_name, field_type)) => match field_type { - "string" => (field_name, AllowedType::String), - "boolean" => (field_name, AllowedType::Boolean), - "number" => (field_name, AllowedType::Number), - // if the pattern isn't recognized, we keep the whole field. - _otherwise => (header, AllowedType::String), - }, - None => (header, AllowedType::String), - } -} - #[cfg(test)] mod test { use std::io::Cursor; - use serde_json::json; - use super::*; - use crate::documents::{obkv_to_object, DocumentsBatchReader}; + use crate::documents::DocumentsBatchReader; #[test] fn add_single_documents_json() { @@ -253,348 +141,4 @@ mod test { assert!(cursor.next_document().unwrap().is_none()); } - - #[test] - fn add_documents_csv() { - let csv_content = "id:number,field:string\n1,hello!\n2,blabla"; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - assert_eq!(builder.documents_count(), 2); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - assert_eq!(index.len(), 2); - - let document = cursor.next_document().unwrap().unwrap(); - assert_eq!(document.iter().count(), 2); - - let document = cursor.next_document().unwrap().unwrap(); - assert_eq!(document.iter().count(), 2); - - assert!(cursor.next_document().unwrap().is_none()); - } - - #[test] - fn simple_csv_document() { - let csv_content = r#"city,country,pop -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - - assert!(cursor.next_document().unwrap().is_none()); - } - - #[test] - fn coma_in_field() { - let csv_content = r#"city,country,pop -"Boston","United, States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city": "Boston", - "country": "United, States", - "pop": "4628910", - }) - ); - } - - #[test] - fn quote_in_field() { - let csv_content = r#"city,country,pop -"Boston","United"" States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city": "Boston", - "country": "United\" States", - "pop": "4628910", - }) - ); - } - - #[test] - fn integer_in_field() { - let csv_content = r#"city,country,pop:number -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city": "Boston", - "country": "United States", - "pop": 4628910, - }) - ); - } - - #[test] - fn integer_as_id() { - let csv_content = r#""id:number","title:string","comment:string" -"1239","Pride and Prejudice","A great book""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "id": 1239, - "title": "Pride and Prejudice", - "comment": "A great book", - }) - ); - } - - #[test] - fn float_in_field() { - let csv_content = r#"city,country,pop:number -"Boston","United States","4628910.01""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city": "Boston", - "country": "United States", - "pop": 4628910.01, - }) - ); - } - - #[test] - fn several_colon_in_header() { - let csv_content = r#"city:love:string,country:state,pop -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city:love": "Boston", - "country:state": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn ending_by_colon_in_header() { - let csv_content = r#"city:,country,pop -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city:": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn starting_by_colon_in_header() { - let csv_content = r#":city,country,pop -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - ":city": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - } - - #[ignore] - #[test] - fn starting_by_colon_in_header2() { - let csv_content = r#":string,country,pop -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, _) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - assert!(cursor.next_document().is_err()); - } - - #[test] - fn double_colon_in_header() { - let csv_content = r#"city::string,country,pop -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv).unwrap(); - let vector = builder.into_inner().unwrap(); - - let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) - .unwrap() - .into_cursor_and_fields_index(); - - let doc = cursor.next_document().unwrap().unwrap(); - let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); - - assert_eq!( - val, - json!({ - "city:": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn bad_type_in_header() { - let csv_content = r#"city,country:number,pop -"Boston","United States","4628910""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - assert!(builder.append_csv(csv).is_err()); - } - - #[test] - fn bad_column_count1() { - let csv_content = r#"city,country,pop -"Boston","United States","4628910", "too much - let csv = csv::Reader::from_reader(Cursor::new(csv_content"#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - assert!(builder.append_csv(csv).is_err()); - } - - #[test] - fn bad_column_count2() { - let csv_content = r#"city,country,pop -"Boston","United States""#; - let csv = csv::Reader::from_reader(Cursor::new(csv_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - assert!(builder.append_csv(csv).is_err()); - } } diff --git a/crates/milli/src/documents/mod.rs b/crates/milli/src/documents/mod.rs index 5f6235f9e..88fa38d30 100644 --- a/crates/milli/src/documents/mod.rs +++ b/crates/milli/src/documents/mod.rs @@ -253,33 +253,4 @@ mod test { {"id": 2,"a": 0,"b": 0}, ]); } - - #[test] - fn csv_types_dont_panic() { - let csv1_content = - "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5"; - let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv1).unwrap(); - let vector = builder.into_inner().unwrap(); - - DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); - } - - #[test] - fn out_of_order_csv_fields() { - let csv1_content = "id:number,b\n1,0"; - let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); - - let csv2_content = "id:number,a,b\n2,0,0"; - let csv2 = csv::Reader::from_reader(Cursor::new(csv2_content)); - - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - builder.append_csv(csv1).unwrap(); - builder.append_csv(csv2).unwrap(); - let vector = builder.into_inner().unwrap(); - - DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); - } } diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index 54fdc7b4b..65a1111fa 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -102,6 +102,7 @@ impl Metadata { rules: &'rules [LocalizedAttributesRule], ) -> Option<&'rules [Language]> { let localized_attributes_rule_id = self.localized_attributes_rule_id?.get(); + // - 1: `localized_attributes_rule_id` is NonZero let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap(); Some(rule.locales()) } @@ -160,6 +161,7 @@ impl MetadataBuilder { .iter() .flat_map(|v| v.iter()) .position(|rule| rule.match_str(field)) + // saturating_add(1): make `id` `NonZero` .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); Metadata { searchable, filterable, sortable, localized_attributes_rule_id } diff --git a/crates/milli/src/heed_codec/facet/ordered_f64_codec.rs b/crates/milli/src/heed_codec/facet/ordered_f64_codec.rs index 19ba7a460..d198eae38 100644 --- a/crates/milli/src/heed_codec/facet/ordered_f64_codec.rs +++ b/crates/milli/src/heed_codec/facet/ordered_f64_codec.rs @@ -46,6 +46,7 @@ fn encode_f64_into_ordered_bytes( f: f64, buffer: &mut [u8; 16], ) -> Result<(), InvalidGloballyOrderedFloatError> { + // write the globally ordered float let bytes = f64_into_bytes(f).ok_or(InvalidGloballyOrderedFloatError { float: f })?; buffer[..8].copy_from_slice(&bytes[..]); // Then the f64 value just to be able to read it back diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index 25f5fc05c..3afcd3e4b 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -87,23 +87,10 @@ pub enum WriterOperation { } pub enum ArroyOperation { - /// TODO: call when deleting regular documents - DeleteVectors { - docid: DocumentId, - }, - SetVectors { - docid: DocumentId, - embedder_id: u8, - embeddings: Vec, - }, - SetVector { - docid: DocumentId, - embedder_id: u8, - embedding: Embedding, - }, - Finish { - configs: Vec, - }, + DeleteVectors { docid: DocumentId }, + SetVectors { docid: DocumentId, embedder_id: u8, embeddings: Vec }, + SetVector { docid: DocumentId, embedder_id: u8, embedding: Embedding }, + Finish { configs: Vec }, } pub struct DbOperation { @@ -334,7 +321,6 @@ impl DocidsSender for FacetDocidsSender<'_> { fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> { let (facet_kind, key) = FacetKind::extract_from_key(key); let database = Database::from(facet_kind); - // let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)); let entry = match facet_kind { // skip level group size FacetKind::String | FacetKind::Number => { diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs index 23d93a2c2..aeb1d5694 100644 --- a/crates/milli/src/update/new/extract/documents.rs +++ b/crates/milli/src/update/new/extract/documents.rs @@ -140,7 +140,6 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { )?; document_extractor_data.docids_delta.insert_add_u32(docid); self.document_sender.uncompressed(docid, external_docid, content).unwrap(); - // extracted_dictionary_sender.send(self, dictionary: &[u8]); } } } diff --git a/crates/milli/src/update/new/extract/geo/mod.rs b/crates/milli/src/update/new/extract/geo/mod.rs index c3ea76c42..9a6077318 100644 --- a/crates/milli/src/update/new/extract/geo/mod.rs +++ b/crates/milli/src/update/new/extract/geo/mod.rs @@ -137,7 +137,6 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> Result { Ok(RefCell::new(GeoExtractorData { removed: bumpalo::collections::Vec::new_in(extractor_alloc), - // inserted: Uell::new_in(extractor_alloc), inserted: bumpalo::collections::Vec::new_in(extractor_alloc), spilled_inserted: None, spilled_removed: None, @@ -242,7 +241,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { } } -/// Extracts and validate the latitude and latitude from a document geo field. +/// Extracts and validates the latitude and latitude from a document geo field. /// /// It can be of the form `{ "lat": 0.0, "lng": "1.0" }`. pub fn extract_geo_coordinates( diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs index f3d4afcb8..eef9b322a 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -35,7 +35,6 @@ pub struct WordDocidsBalancedCaches<'extractor> { unsafe impl<'extractor> MostlySend for WordDocidsBalancedCaches<'extractor> {} impl<'extractor> WordDocidsBalancedCaches<'extractor> { - /// TODO Make sure to give the same max_memory to all of them, without splitting it pub fn new_in(buckets: usize, max_memory: Option, alloc: &'extractor Bump) -> Self { Self { word_fid_docids: BalancedCaches::new_in(buckets, max_memory, alloc),