diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 030390822..8898e5dac 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -1,10 +1,12 @@ +use std::error::Error; use std::io::stdin; +use std::path::Path; use std::time::Instant; -use std::{error::Error, path::Path}; use heed::EnvOpenOptions; use milli::{ - execute_search, DefaultSearchLogger, Index, SearchContext, SearchLogger, TermsMatchingStrategy, + execute_search, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger, + TermsMatchingStrategy, }; #[global_allocator] @@ -54,6 +56,7 @@ fn main() -> Result<(), Box> { false, &None, &None, + GeoSortStrategy::default(), 0, 20, None, diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 868df74e8..48699e76f 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -79,7 +79,8 @@ pub use filter_parser::{Condition, FilterCondition, Span, Token}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; pub use search::new::{ - execute_search, DefaultSearchLogger, SearchContext, SearchLogger, VisualSearchLogger, + execute_search, DefaultSearchLogger, GeoSortStrategy, SearchContext, SearchLogger, + VisualSearchLogger, }; use serde_json::Value; pub use {charabia as tokenizer, heed}; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index a0bf272dd..97725b9bf 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -27,6 +27,7 @@ pub struct Search<'a> { offset: usize, limit: usize, sort_criteria: Option>, + geo_strategy: new::GeoSortStrategy, terms_matching_strategy: TermsMatchingStrategy, words_limit: usize, exhaustive_number_hits: bool, @@ -42,6 +43,7 @@ impl<'a> Search<'a> { offset: 0, limit: 20, sort_criteria: None, + geo_strategy: new::GeoSortStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(), exhaustive_number_hits: false, words_limit: 10, @@ -85,6 +87,12 @@ impl<'a> Search<'a> { self } + #[cfg(test)] + pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> { + self.geo_strategy = strategy; + self + } + /// Force the search to exhastivelly compute the number of candidates, /// this will increase the search time but allows finite pagination. pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> { @@ -102,6 +110,7 @@ impl<'a> Search<'a> { self.exhaustive_number_hits, &self.filter, &self.sort_criteria, + self.geo_strategy, self.offset, self.limit, Some(self.words_limit), @@ -127,6 +136,7 @@ impl fmt::Debug for Search<'_> { offset, limit, sort_criteria, + geo_strategy: _, terms_matching_strategy, words_limit, exhaustive_number_hits, diff --git a/milli/src/search/new/geo_sort.rs b/milli/src/search/new/geo_sort.rs new file mode 100644 index 000000000..b841dfe9c --- /dev/null +++ b/milli/src/search/new/geo_sort.rs @@ -0,0 +1,261 @@ +use std::collections::VecDeque; +use std::iter::FromIterator; + +use heed::types::{ByteSlice, Unit}; +use heed::{RoPrefix, RoTxn}; +use roaring::RoaringBitmap; +use rstar::RTree; + +use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; +use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec}; +use crate::{ + distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext, + SearchLogger, +}; + +const FID_SIZE: usize = 2; +const DOCID_SIZE: usize = 4; + +#[allow(clippy::drop_non_drop)] +fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] { + concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes()) +} + +/// Return an iterator over each number value in the given field of the given document. +fn facet_number_values<'a>( + docid: u32, + field_id: u16, + index: &Index, + txn: &'a RoTxn, +) -> Result, Unit>> { + let key = facet_values_prefix_key(field_id, docid); + + let iter = index + .field_id_docid_facet_f64s + .remap_key_type::() + .prefix_iter(txn, &key)? + .remap_key_type(); + + Ok(iter) +} + +/// Define the strategy used by the geo sort. +/// The paramater represents the cache size, and, in the case of the Dynamic strategy, +/// the point where we move from using the iterative strategy to the rtree. +#[derive(Debug, Clone, Copy)] +pub enum Strategy { + AlwaysIterative(usize), + AlwaysRtree(usize), + Dynamic(usize), +} + +impl Default for Strategy { + fn default() -> Self { + Strategy::Dynamic(1000) + } +} + +impl Strategy { + pub fn use_rtree(&self, candidates: usize) -> bool { + match self { + Strategy::AlwaysIterative(_) => false, + Strategy::AlwaysRtree(_) => true, + Strategy::Dynamic(i) => candidates >= *i, + } + } + + pub fn cache_size(&self) -> usize { + match self { + Strategy::AlwaysIterative(i) | Strategy::AlwaysRtree(i) | Strategy::Dynamic(i) => *i, + } + } +} + +pub struct GeoSort { + query: Option, + + strategy: Strategy, + ascending: bool, + point: [f64; 2], + field_ids: Option<[u16; 2]>, + rtree: Option>, + + cached_sorted_docids: VecDeque, + geo_candidates: RoaringBitmap, +} + +impl GeoSort { + pub fn new( + strategy: Strategy, + geo_faceted_docids: RoaringBitmap, + point: [f64; 2], + ascending: bool, + ) -> Result { + Ok(Self { + query: None, + strategy, + ascending, + point, + geo_candidates: geo_faceted_docids, + field_ids: None, + rtree: None, + cached_sorted_docids: VecDeque::new(), + }) + } + + /// Refill the internal buffer of cached docids based on the strategy. + /// Drop the rtree if we don't need it anymore. + fn fill_buffer<'ctx>(&mut self, ctx: &mut SearchContext<'ctx>) -> Result<()> { + debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng"); + debug_assert!(self.cached_sorted_docids.is_empty()); + + // if we had an rtree and the strategy doesn't require one anymore we can drop it + let use_rtree = self.strategy.use_rtree(self.geo_candidates.len() as usize); + if !use_rtree && self.rtree.is_some() { + self.rtree = None; + } + + let cache_size = self.strategy.cache_size(); + if let Some(ref mut rtree) = self.rtree { + let point = lat_lng_to_xyz(&self.point); + + if self.ascending { + for point in rtree.nearest_neighbor_iter(&point) { + if self.geo_candidates.contains(point.data.0) { + self.cached_sorted_docids.push_back(point.data.0); + if self.cached_sorted_docids.len() >= cache_size { + break; + } + } + } + } else { + // in the case of the desc geo sort we have to scan the whole database + // and only keep the latest candidates. + for point in rtree.nearest_neighbor_iter(&point) { + if self.geo_candidates.contains(point.data.0) { + self.cached_sorted_docids.pop_front(); + self.cached_sorted_docids.push_back(point.data.0); + } + } + } + } else { + // the iterative version + let [lat, lng] = self.field_ids.unwrap(); + + let mut documents = self + .geo_candidates + .iter() + .map(|id| -> Result<_> { + Ok(( + id, + [ + facet_number_values(id, lat, ctx.index, ctx.txn)? + .next() + .expect("A geo faceted document doesn't contain any lat")? + .0 + .2, + facet_number_values(id, lng, ctx.index, ctx.txn)? + .next() + .expect("A geo faceted document doesn't contain any lng")? + .0 + .2, + ], + )) + }) + .collect::>>()?; + documents.sort_by_key(|(_, p)| distance_between_two_points(&self.point, &p) as usize); + self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id)); + }; + + if self.cached_sorted_docids.is_empty() && matches!(self.strategy, Strategy::AlwaysRtree(_)) + { + // this shouldn't be possible + self.rtree = None; + } + Ok(()) + } +} + +impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { + fn id(&self) -> String { + "geo_sort".to_owned() + } + + fn start_iteration( + &mut self, + ctx: &mut SearchContext<'ctx>, + _logger: &mut dyn SearchLogger, + universe: &RoaringBitmap, + query: &Q, + ) -> Result<()> { + assert!(self.query.is_none()); + + self.query = Some(query.clone()); + self.geo_candidates &= universe; + + if self.geo_candidates.len() == 0 { + return Ok(()); + } + + let fid_map = ctx.index.fields_ids_map(ctx.txn)?; + let lat = fid_map.id("_geo.lat").expect("geo candidates but no fid for lat"); + let lng = fid_map.id("_geo.lng").expect("geo candidates but no fid for lng"); + self.field_ids = Some([lat, lng]); + + if self.strategy.use_rtree(self.geo_candidates.len() as usize) { + self.rtree = Some(ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree")); + } + + self.fill_buffer(ctx)?; + Ok(()) + } + + fn next_bucket( + &mut self, + ctx: &mut SearchContext<'ctx>, + logger: &mut dyn SearchLogger, + universe: &RoaringBitmap, + ) -> Result>> { + assert!(universe.len() > 1); + let query = self.query.as_ref().unwrap().clone(); + self.geo_candidates &= universe; + + if self.geo_candidates.is_empty() { + return Ok(Some(RankingRuleOutput { query, candidates: universe.clone() })); + } + + let ascending = self.ascending; + let next = |cache: &mut VecDeque<_>| { + if ascending { + cache.pop_front() + } else { + cache.pop_back() + } + }; + while let Some(id) = next(&mut self.cached_sorted_docids) { + if self.geo_candidates.contains(id) { + return Ok(Some(RankingRuleOutput { + query, + candidates: RoaringBitmap::from_iter([id]), + })); + } + } + + // if we got out of this loop it means we've exhausted our cache. + + if self.rtree.is_none() { + // with no rtree it means all geo candidates have been returned. We can return all the non geo-faceted documents + Ok(Some(RankingRuleOutput { query, candidates: universe.clone() })) + } else { + // else, we need to refill our bucket and run the function again + self.fill_buffer(ctx)?; + self.next_bucket(ctx, logger, universe) + } + } + + fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger) { + self.query = None; + self.rtree = None; + self.cached_sorted_docids.clear(); + } +} diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 246745678..eb006fbf3 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -1,6 +1,7 @@ mod bucket_sort; mod db_cache; mod distinct; +mod geo_sort; mod graph_based_ranking_rule; mod interner; mod limits; @@ -25,32 +26,30 @@ mod tests; use std::collections::HashSet; -use bucket_sort::bucket_sort; +use bucket_sort::{bucket_sort, BucketSortOutput}; use charabia::TokenizerBuilder; use db_cache::DatabaseCache; -use graph_based_ranking_rule::{Fid, Position, Proximity, Typo}; +use exact_attribute::ExactAttribute; +use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo}; use heed::RoTxn; -use interner::DedupInterner; +use interner::{DedupInterner, Interner}; pub use logger::visual::VisualSearchLogger; pub use logger::{DefaultSearchLogger, SearchLogger}; use query_graph::{QueryGraph, QueryNode}; use query_term::{located_query_terms_from_string, LocatedQueryTerm, Phrase, QueryTerm}; -use ranking_rules::{PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait}; -use resolve_query_graph::PhraseDocIdsCache; +use ranking_rules::{ + BoxRankingRule, PlaceholderQuery, RankingRule, RankingRuleOutput, RankingRuleQueryTrait, +}; +use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache}; use roaring::RoaringBitmap; +use sort::Sort; use words::Words; +use self::geo_sort::GeoSort; +pub use self::geo_sort::Strategy as GeoSortStrategy; +use self::interner::Interned; use crate::search::new::distinct::apply_distinct_rule; use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; -use bucket_sort::BucketSortOutput; -use exact_attribute::ExactAttribute; -use graph_based_ranking_rule::Exactness; -use interner::Interner; -use ranking_rules::{BoxRankingRule, RankingRule}; -use resolve_query_graph::compute_query_graph_docids; -use sort::Sort; - -use self::interner::Interned; /// A structure used throughout the execution of a search query. pub struct SearchContext<'ctx> { @@ -139,10 +138,11 @@ fn resolve_universe( fn get_ranking_rules_for_placeholder_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, + geo_strategy: geo_sort::Strategy, ) -> Result>> { let mut sort = false; - let mut asc = HashSet::new(); - let mut desc = HashSet::new(); + let mut sorted_fields = HashSet::new(); + let mut geo_sorted = false; let mut ranking_rules: Vec> = vec![]; let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; for rr in settings_ranking_rules { @@ -157,21 +157,28 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( if sort { continue; } - resolve_sort_criteria(sort_criteria, ctx, &mut ranking_rules, &mut asc, &mut desc)?; + resolve_sort_criteria( + sort_criteria, + ctx, + &mut ranking_rules, + &mut sorted_fields, + &mut geo_sorted, + geo_strategy, + )?; sort = true; } crate::Criterion::Asc(field_name) => { - if asc.contains(&field_name) { + if sorted_fields.contains(&field_name) { continue; } - asc.insert(field_name.clone()); + sorted_fields.insert(field_name.clone()); ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?)); } crate::Criterion::Desc(field_name) => { - if desc.contains(&field_name) { + if sorted_fields.contains(&field_name) { continue; } - desc.insert(field_name.clone()); + sorted_fields.insert(field_name.clone()); ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?)); } } @@ -183,6 +190,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( fn get_ranking_rules_for_query_graph_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, + geo_strategy: geo_sort::Strategy, terms_matching_strategy: TermsMatchingStrategy, ) -> Result>> { // query graph search @@ -192,8 +200,8 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( let mut sort = false; let mut attribute = false; let mut exactness = false; - let mut asc = HashSet::new(); - let mut desc = HashSet::new(); + let mut sorted_fields = HashSet::new(); + let mut geo_sorted = false; let mut ranking_rules: Vec> = vec![]; let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; @@ -245,7 +253,14 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( if sort { continue; } - resolve_sort_criteria(sort_criteria, ctx, &mut ranking_rules, &mut asc, &mut desc)?; + resolve_sort_criteria( + sort_criteria, + ctx, + &mut ranking_rules, + &mut sorted_fields, + &mut geo_sorted, + geo_strategy, + )?; sort = true; } crate::Criterion::Exactness => { @@ -257,17 +272,17 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( exactness = true; } crate::Criterion::Asc(field_name) => { - if asc.contains(&field_name) { + if sorted_fields.contains(&field_name) { continue; } - asc.insert(field_name.clone()); + sorted_fields.insert(field_name.clone()); ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?)); } crate::Criterion::Desc(field_name) => { - if desc.contains(&field_name) { + if sorted_fields.contains(&field_name) { continue; } - desc.insert(field_name.clone()); + sorted_fields.insert(field_name.clone()); ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?)); } } @@ -279,33 +294,53 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( sort_criteria: &Option>, ctx: &SearchContext<'ctx>, ranking_rules: &mut Vec>, - asc: &mut HashSet, - desc: &mut HashSet, + sorted_fields: &mut HashSet, + geo_sorted: &mut bool, + geo_strategy: geo_sort::Strategy, ) -> Result<()> { let sort_criteria = sort_criteria.clone().unwrap_or_default(); ranking_rules.reserve(sort_criteria.len()); for criterion in sort_criteria { - let sort_ranking_rule = match criterion { + match criterion { AscDesc::Asc(Member::Field(field_name)) => { - if asc.contains(&field_name) { + if sorted_fields.contains(&field_name) { continue; } - asc.insert(field_name.clone()); - Sort::new(ctx.index, ctx.txn, field_name, true)? + sorted_fields.insert(field_name.clone()); + ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?)); } AscDesc::Desc(Member::Field(field_name)) => { - if desc.contains(&field_name) { + if sorted_fields.contains(&field_name) { continue; } - desc.insert(field_name.clone()); - Sort::new(ctx.index, ctx.txn, field_name, false)? + sorted_fields.insert(field_name.clone()); + ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?)); } - // geosearch - _ => { - todo!() + AscDesc::Asc(Member::Geo(point)) => { + if *geo_sorted { + continue; + } + let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?; + ranking_rules.push(Box::new(GeoSort::new( + geo_strategy, + geo_faceted_docids, + point, + true, + )?)); + } + AscDesc::Desc(Member::Geo(point)) => { + if *geo_sorted { + continue; + } + let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?; + ranking_rules.push(Box::new(GeoSort::new( + geo_strategy, + geo_faceted_docids, + point, + false, + )?)); } }; - ranking_rules.push(Box::new(sort_ranking_rule)); } Ok(()) } @@ -318,6 +353,7 @@ pub fn execute_search( exhaustive_number_hits: bool, filters: &Option, sort_criteria: &Option>, + geo_strategy: geo_sort::Strategy, from: usize, length: usize, words_limit: Option, @@ -373,7 +409,8 @@ pub fn execute_search( bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)? } else { - let ranking_rules = get_ranking_rules_for_placeholder_search(ctx, sort_criteria)?; + let ranking_rules = + get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; bucket_sort( ctx, ranking_rules, diff --git a/milli/src/search/new/tests/geo_sort.rs b/milli/src/search/new/tests/geo_sort.rs new file mode 100644 index 000000000..e49fd7c99 --- /dev/null +++ b/milli/src/search/new/tests/geo_sort.rs @@ -0,0 +1,273 @@ +/*! +This module tests the `geo_sort` ranking rule: + +1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time +2. an error is returned if the fields-to-sort are not sortable +3. it is possible to add multiple fields-to-sort at search time +4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected +5. numbers appear before strings +6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket +7. boolean values are translated to strings +8. if a field contains an array, it is sorted by the best value in the array according to the sort rule +*/ + +use big_s::S; +use heed::RoTxn; +use maplit::hashset; + +use crate::index::tests::TempIndex; +use crate::search::new::tests::collect_field_values; +use crate::{AscDesc, Criterion, GeoSortStrategy, Member, Search, SearchResult}; + +fn create_index() -> TempIndex { + let index = TempIndex::new(); + + index + .update_settings(|s| { + s.set_primary_key("id".to_owned()); + s.set_sortable_fields(hashset! { S("_geo") }); + s.set_criteria(vec![Criterion::Words, Criterion::Sort]); + }) + .unwrap(); + index +} + +#[track_caller] +fn execute_iterative_and_rtree_returns_the_same<'a>( + rtxn: &RoTxn<'a>, + index: &TempIndex, + search: &mut Search<'a>, +) -> Vec { + search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2)); + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let iterative_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids); + + search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000)); + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let iterative_ids = collect_field_values(&index, rtxn, "id", &documents_ids); + + assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket"); + + search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2)); + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let rtree_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids); + + search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000)); + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let rtree_ids = collect_field_values(&index, rtxn, "id", &documents_ids); + + assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket"); + + assert_eq!(iterative_ids, rtree_ids, "iterative vs rtree"); + + iterative_ids.into_iter().map(|id| id.parse().unwrap()).collect() +} + +#[test] +fn test_geo_sort() { + let index = create_index(); + + index + .add_documents(documents!([ + { "id": 2, "_geo": { "lat": 2, "lng": -1 } }, + { "id": 3, "_geo": { "lat": -2, "lng": -2 } }, + { "id": 5, "_geo": { "lat": 6, "lng": -5 } }, + { "id": 4, "_geo": { "lat": 3, "lng": 5 } }, + { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, + { "id": 1, "_geo": { "lat": 1, "lng": 1 } }, + { "id": 6 }, { "id": 8 }, { "id": 7 }, { "id": 10 }, { "id": 9 }, + ])) + .unwrap(); + + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + + // --- asc + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); + + s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); + + // --- desc + s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]); + + s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let ids = collect_field_values(&index, &txn, "id", &documents_ids); + insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); +} + +#[test] +fn test_geo_sort_around_the_edge_of_the_flat_earth() { + let index = create_index(); + + index + .add_documents(documents!([ + { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, + { "id": 1, "_geo": { "lat": 88, "lng": 0 } }, + { "id": 2, "_geo": { "lat": -89, "lng": 0 } }, + + { "id": 3, "_geo": { "lat": 0, "lng": 178 } }, + { "id": 4, "_geo": { "lat": 0, "lng": -179 } }, + ])) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + + let mut s = Search::new(&rtxn, &index); + + // --- asc + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4]"); + + // ensuring the lat doesn't wrap around + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([85., 0.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[1, 0, 3, 4, 2]"); + + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([-85., 0.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[2, 0, 3, 4, 1]"); + + // ensuring the lng does wrap around + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 175.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[3, 4, 2, 1, 0]"); + + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., -175.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[4, 3, 2, 1, 0]"); + + // --- desc + s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[4, 3, 2, 1, 0]"); + + // ensuring the lat doesn't wrap around + s.sort_criteria(vec![AscDesc::Desc(Member::Geo([85., 0.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[2, 4, 3, 0, 1]"); + + s.sort_criteria(vec![AscDesc::Desc(Member::Geo([-85., 0.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[1, 4, 3, 0, 2]"); + + // ensuring the lng does wrap around + s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 175.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 4, 3]"); + + s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., -175.]))]); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4]"); +} + +#[test] +fn geo_sort_mixed_with_words() { + let index = create_index(); + + index + .add_documents(documents!([ + { "id": 0, "doggo": "jean", "_geo": { "lat": 0, "lng": 0 } }, + { "id": 1, "doggo": "intel", "_geo": { "lat": 88, "lng": 0 } }, + { "id": 2, "doggo": "jean bob", "_geo": { "lat": -89, "lng": 0 } }, + { "id": 3, "doggo": "jean michel", "_geo": { "lat": 0, "lng": 178 } }, + { "id": 4, "doggo": "bob marley", "_geo": { "lat": 0, "lng": -179 } }, + ])) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + + let mut s = Search::new(&rtxn, &index); + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); + + s.query("jean"); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[0, 2, 3]"); + + s.query("bob"); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[2, 4]"); + + s.query("intel"); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[1]"); +} + +#[test] +fn geo_sort_without_any_geo_faceted_documents() { + let index = create_index(); + + index + .add_documents(documents!([ + { "id": 0, "doggo": "jean" }, + { "id": 1, "doggo": "intel" }, + { "id": 2, "doggo": "jean bob" }, + { "id": 3, "doggo": "jean michel" }, + { "id": 4, "doggo": "bob marley" }, + ])) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + + let mut s = Search::new(&rtxn, &index); + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); + + s.query("jean"); + let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[0, 2, 3]"); +} diff --git a/milli/src/search/new/tests/mod.rs b/milli/src/search/new/tests/mod.rs index cdcdb5936..2ad806a87 100644 --- a/milli/src/search/new/tests/mod.rs +++ b/milli/src/search/new/tests/mod.rs @@ -2,6 +2,7 @@ pub mod attribute_fid; pub mod attribute_position; pub mod distinct; pub mod exactness; +pub mod geo_sort; #[cfg(feature = "default")] pub mod language; pub mod ngram_split_words;