diff --git a/Cargo.lock b/Cargo.lock index dd51e7b07..0021c2be8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -469,7 +469,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.3.0" +version = "1.3.1" dependencies = [ "anyhow", "bytes", @@ -1199,7 +1199,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.3.0" +version = "1.3.1" dependencies = [ "anyhow", "big_s", @@ -1413,7 +1413,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.3.0" +version = "1.3.1" dependencies = [ "faux", "tempfile", @@ -1435,7 +1435,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.3.0" +version = "1.3.1" dependencies = [ "insta", "nom", @@ -1454,7 +1454,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.3.0" +version = "1.3.1" dependencies = [ "criterion", "serde_json", @@ -1572,7 +1572,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.3.0" +version = "1.3.1" dependencies = [ "arbitrary", "clap", @@ -1894,7 +1894,7 @@ dependencies = [ [[package]] name = "index-scheduler" -version = "1.3.0" +version = "1.3.1" dependencies = [ "anyhow", "big_s", @@ -2082,7 +2082,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.3.0" +version = "1.3.1" dependencies = [ "criterion", "serde_json", @@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.3.0" +version = "1.3.1" dependencies = [ "insta", "md5", @@ -2509,7 +2509,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.3.0" +version = "1.3.1" dependencies = [ "actix-cors", "actix-http", @@ -2600,7 +2600,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.3.0" +version = "1.3.1" dependencies = [ "base64 0.21.2", "enum-iterator", @@ -2619,7 +2619,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.3.0" +version = "1.3.1" dependencies = [ "actix-web", "anyhow", @@ -2673,7 +2673,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.3.0" +version = "1.3.1" dependencies = [ "big_s", "bimap", @@ -3004,7 +3004,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "permissive-json-pointer" -version = "1.3.0" +version = "1.3.1" dependencies = [ "big_s", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 2099483ee..714f7b289 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ members = [ ] [workspace.package] -version = "1.3.0" +version = "1.3.1" authors = ["Quentin de Quelen ", "Clément Renault "] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap new file mode 100644 index 000000000..92fc61d72 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/mod.rs +expression: spells.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap new file mode 100644 index 000000000..b0b54c136 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap @@ -0,0 +1,38 @@ +--- +source: dump/src/reader/mod.rs +expression: products.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": { + "android": [ + "phone", + "smartphone" + ], + "iphone": [ + "phone", + "smartphone" + ], + "phone": [ + "android", + "iphone", + "smartphone" + ] + }, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap new file mode 100644 index 000000000..5c12a0438 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap @@ -0,0 +1,31 @@ +--- +source: dump/src/reader/mod.rs +expression: movies.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [ + "genres", + "id" + ], + "sortableAttributes": [ + "genres", + "id" + ], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness", + "release_date:asc" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index b7b8685aa..df87d868d 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -790,10 +790,19 @@ impl IndexScheduler { let mut res = BTreeMap::new(); + let processing_tasks = { self.processing_tasks.read().unwrap().processing.len() }; + res.insert( "statuses".to_string(), enum_iterator::all::() - .map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len()))) + .map(|s| { + let tasks = self.get_status(&rtxn, s)?.len(); + match s { + Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)), + Status::Processing => Ok((s.to_string(), processing_tasks)), + s => Ok((s.to_string(), tasks)), + } + }) .collect::>>()?, ); res.insert( @@ -4131,4 +4140,154 @@ mod tests { snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); drop(rtxn); } + + #[test] + fn basic_get_stats() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind).unwrap(); + + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 3, + "failed": 0, + "processing": 0, + "succeeded": 0 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + handle.advance_till([Start, BatchCreated]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 2, + "failed": 0, + "processing": 1, + "succeeded": 0 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 1, + "failed": 0, + "processing": 1, + "succeeded": 1 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 0, + "failed": 0, + "processing": 1, + "succeeded": 2 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + } } diff --git a/meili-snap/src/lib.rs b/meili-snap/src/lib.rs index 32d3385d9..c467aef49 100644 --- a/meili-snap/src/lib.rs +++ b/meili-snap/src/lib.rs @@ -167,7 +167,9 @@ macro_rules! snapshot { let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name)); settings.bind(|| { let snap = format!("{}", $value); - meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + insta::allow_duplicates! { + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + } }); }; ($value:expr, @$inline:literal) => { @@ -176,7 +178,9 @@ macro_rules! snapshot { let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument")); settings.bind(|| { let snap = format!("{}", $value); - meili_snap::insta::assert_snapshot!(snap, @$inline); + insta::allow_duplicates! { + meili_snap::insta::assert_snapshot!(snap, @$inline); + } }); }; ($value:expr) => { @@ -194,7 +198,9 @@ macro_rules! snapshot { let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None); settings.bind(|| { let snap = format!("{}", $value); - meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + insta::allow_duplicates! { + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + } }); }; } diff --git a/meilisearch/tests/search/geo.rs b/meilisearch/tests/search/geo.rs index 4253bc210..5eec5857a 100644 --- a/meilisearch/tests/search/geo.rs +++ b/meilisearch/tests/search/geo.rs @@ -1,3 +1,4 @@ +use meili_snap::{json_string, snapshot}; use once_cell::sync::Lazy; use serde_json::{json, Value}; @@ -60,3 +61,59 @@ async fn geo_sort_with_geo_strings() { ) .await; } + +#[actix_rt::test] +async fn geo_bounding_box_with_string_and_number() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["_geo"])).await; + index.update_settings_sortable_attributes(json!(["_geo"])).await; + index.add_documents(documents, None).await; + index.wait_task(2).await; + + index + .search( + json!({ + "filter": "_geoBoundingBox([89, 179], [-89, -179])", + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "id": 1, + "name": "Taco Truck", + "address": "444 Salsa Street, Burritoville", + "type": "Mexican", + "rating": 9, + "_geo": { + "lat": 34.0522, + "lng": -118.2437 + } + }, + { + "id": 2, + "name": "La Bella Italia", + "address": "456 Elm Street, Townsville", + "type": "Italian", + "rating": 9, + "_geo": { + "lat": "45.4777599", + "lng": "9.1967508" + } + } + ], + "query": "", + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + }, + ) + .await; +} diff --git a/milli/src/index.rs b/milli/src/index.rs index f3432628f..25fd6609a 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1820,11 +1820,11 @@ pub(crate) mod tests { .unwrap(); index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, - { "id": 1, "_geo": { "lat": 0, "lng": -175 } }, - { "id": 2, "_geo": { "lat": 0, "lng": 175 } }, + { "id": 0, "_geo": { "lat": "0", "lng": "0" } }, + { "id": 1, "_geo": { "lat": 0, "lng": "-175" } }, + { "id": 2, "_geo": { "lat": "0", "lng": 175 } }, { "id": 3, "_geo": { "lat": 85, "lng": 0 } }, - { "id": 4, "_geo": { "lat": -85, "lng": 0 } }, + { "id": 4, "_geo": { "lat": "-85", "lng": "0" } }, ])) .unwrap(); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 3e5f63fd5..cd97d6192 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500; /// /// This number is determined by the keys of the different facet databases /// and adding a margin of safety. -pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20; +pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32; /// The maximum length a word can be pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2; diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 16fc1cd2f..15776a709 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -94,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu use crate::heed_codec::ByteSliceRefCodec; use crate::update::index_documents::create_sorter; use crate::update::merge_btreeset_string; -use crate::{BEU16StrCodec, Index, Result, BEU16}; +use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH}; pub mod bulk; pub mod delete; @@ -191,7 +191,16 @@ impl<'i> FacetsUpdate<'i> { for result in database.iter(wtxn)? { let (facet_group_key, ()) = result?; if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { - let normalized_facet = left_bound.normalize(&options); + let mut normalized_facet = left_bound.normalize(&options); + let normalized_truncated_facet: String; + if normalized_facet.len() > MAX_FACET_VALUE_LENGTH { + normalized_truncated_facet = normalized_facet + .char_indices() + .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH) + .map(|(_, c)| c) + .collect(); + normalized_facet = normalized_truncated_facet.into(); + } let set = BTreeSet::from_iter(std::iter::once(left_bound)); let key = (field_id, normalized_facet.as_ref()); let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index e5e864b66..0035f54e1 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -46,7 +46,7 @@ pub fn extract_facet_string_docids( if normalised_value.len() > MAX_FACET_VALUE_LENGTH { normalised_truncated_value = normalised_value .char_indices() - .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH) + .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH) .map(|(_, c)| c) .collect(); normalised_value = normalised_truncated_value.as_str(); diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 882d7779f..5496a071b 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -28,11 +28,13 @@ pub struct ExtractedFacetValues { /// /// Returns the generated grenad reader containing the docid the fid and the orginal value as key /// and the normalized value as value extracted from the given chunk of documents. +/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. #[logging_timer::time] pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, faceted_fields: &HashSet, + geo_fields_ids: Option<(FieldId, FieldId)>, ) -> Result { puffin::profile_function!(); @@ -84,7 +86,10 @@ pub fn extract_fid_docid_facet_values( let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?; - match extract_facet_values(&value) { + match extract_facet_values( + &value, + geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng), + ) { FilterableValues::Null => { facet_is_null_docids.entry(field_id).or_default().insert(document); } @@ -177,12 +182,13 @@ enum FilterableValues { Values { numbers: Vec, strings: Vec<(String, String)> }, } -fn extract_facet_values(value: &Value) -> FilterableValues { +fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues { fn inner_extract_facet_values( value: &Value, can_recurse: bool, output_numbers: &mut Vec, output_strings: &mut Vec<(String, String)>, + geo_field: bool, ) { match value { Value::Null => (), @@ -193,13 +199,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues { } } Value::String(original) => { + // if we're working on a geofield it MUST be something we can parse or else there was an internal error + // in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs. + if geo_field { + if let Ok(float) = original.parse() { + output_numbers.push(float); + } else { + log::warn!( + "Internal error, could not parse a geofield that has been validated. Please open an issue." + ) + } + } let normalized = crate::normalize_facet(original); output_strings.push((normalized, original.clone())); } Value::Array(values) => { if can_recurse { for value in values { - inner_extract_facet_values(value, false, output_numbers, output_strings); + inner_extract_facet_values( + value, + false, + output_numbers, + output_strings, + geo_field, + ); } } } @@ -215,7 +238,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues { otherwise => { let mut numbers = Vec::new(); let mut strings = Vec::new(); - inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings); + inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field); FilterableValues::Values { numbers, strings } } } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 4e174631c..605ef8dd8 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -378,6 +378,7 @@ fn send_and_extract_flattened_documents_data( flattened_documents_chunk.clone(), indexer, faceted_fields, + geo_fields_ids, )?; // send docid_fid_facet_numbers_chunk to DB writer