322: Geosearch r=ManyTheFish a=irevoire

This PR introduces [basic geo-search functionalities](https://github.com/meilisearch/specifications/pull/59), it makes the engine able to index, filter and, sort by geo-point. We decided to use [the rstar library](https://docs.rs/rstar) and to save the points in [an RTree](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html) that we de/serialize in the index database [by using serde](https://serde.rs/) with [bincode](https://docs.rs/bincode). This is not an efficient way to query this tree as it will consume a lot of CPU and memory when a search is made, but at least it is an easy first way to do so.

### What we will have to do on the indexing part:
 - [x] Index the `_geo` fields from the documents.
   - [x] Create a new module with an extractor in the `extract` module that takes the `obkv_documents` and retrieves the latitude and longitude coordinates, outputting them in a `grenad::Reader` for further process.
   - [x] Call the extractor in the `extract::extract_documents_data` function and send the result to the `TypedChunk` module.
   - [x] Get the `grenad::Reader` in the `typed_chunk::write_typed_chunk_into_index` function and store all the points in the `rtree`
- [x] Delete the documents from the `RTree` when deleting documents from the database. All this can be done in the `delete_documents.rs` file by getting the data structure and removing the points from it, inserting it back after the modification.
- [x] Clearing the `RTree` entirely when we clear the documents from the database, everything happens in the `clear_documents.rs` file.
- [x] save a Roaring bitmap of all documents containing the `_geo` field

### What we will have to do on the query part:
- [x] Filter the documents at a certain distance around a point, this is done by [collecting the documents from the searched point](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html#method.nearest_neighbor_iter) while they are in range.
  - [x] We must introduce new `geoLowerThan` and `geoGreaterThan` variants to the `Operator` filter enum.
  - [x] Implement the `negative` method on both variants where the `geoGreaterThan` variant is implemented by executing the `geoLowerThan` and removing the results found from the whole list of geo faceted documents.
  - [x] Add the `_geoRadius` function in the pest parser.
- [x] Introduce a `_geo` ascending ranking function that takes a point in parameter, ~~this function must keep the iterator on the `RTree` and make it peekable~~ This was not possible for now, we had to collect the whole iterator. Only the documents that are part of the candidates must be sent too!
  - [x] This ascending ranking rule will only be active if the search is set up with the `_geoPoint` parameter that indicates the center point of the ascending ranking rule.

-----------

- On Meilisearch part: We must introduce a new concept, returning the documents with a new `_geoDistance` field when it passed by the `_geo` ranking rule, this has never been done before. We could maybe just do it afterward when the documents have been retrieved from the database, computing the distance from the `_geoPoint` and all of the documents to be returned.

Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: cvermand <33010418+bidoubiwa@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2021-09-20 19:04:57 +00:00 committed by GitHub
commit 31c8de1cca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 896 additions and 69 deletions

View File

@ -60,7 +60,13 @@ $('#query, #filters').on('input', function () {
const content = document.createElement('div'); const content = document.createElement('div');
content.classList.add("content"); content.classList.add("content");
// Stringify Objects and Arrays to avoid [Object object]
if (typeof element[prop] === 'object' && element[prop] !== null) {
content.innerHTML = JSON.stringify(element[prop]);
} else {
content.innerHTML = element[prop]; content.innerHTML = element[prop];
}
field.appendChild(attribute); field.appendChild(attribute);
field.appendChild(content); field.appendChild(content);

View File

@ -695,6 +695,7 @@ async fn main() -> anyhow::Result<()> {
struct QueryBody { struct QueryBody {
query: Option<String>, query: Option<String>,
filters: Option<String>, filters: Option<String>,
sort: Option<String>,
facet_filters: Option<Vec<UntaggedEither<Vec<String>, String>>>, facet_filters: Option<Vec<UntaggedEither<Vec<String>, String>>>,
facet_distribution: Option<bool>, facet_distribution: Option<bool>,
limit: Option<usize>, limit: Option<usize>,
@ -754,6 +755,10 @@ async fn main() -> anyhow::Result<()> {
search.limit(limit); search.limit(limit);
} }
if let Some(sort) = query.sort {
search.sort_criteria(vec![sort.parse().unwrap()]);
}
let SearchResult { matching_words, candidates, documents_ids } = let SearchResult { matching_words, candidates, documents_ids } =
search.execute().unwrap(); search.execute().unwrap();

View File

@ -16,6 +16,7 @@ flate2 = "1.0.20"
fst = "0.4.5" fst = "0.4.5"
fxhash = "0.2.1" fxhash = "0.2.1"
grenad = { version = "0.3.1", default-features = false, features = ["tempfile"] } grenad = { version = "0.3.1", default-features = false, features = ["tempfile"] }
geoutils = "0.4.1"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3" human_format = "1.0.3"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
@ -27,6 +28,7 @@ once_cell = "1.5.2"
ordered-float = "2.1.1" ordered-float = "2.1.1"
rayon = "1.5.0" rayon = "1.5.0"
roaring = "0.6.6" roaring = "0.6.6"
rstar = { version = "0.9.1", features = ["serde"] }
serde = { version = "1.0.123", features = ["derive"] } serde = { version = "1.0.123", features = ["derive"] }
serde_json = { version = "1.0.62", features = ["preserve_order"] } serde_json = { version = "1.0.62", features = ["preserve_order"] }
slice-group-by = "0.2.6" slice-group-by = "0.2.6"

View File

@ -3,7 +3,7 @@ use std::str::FromStr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::error::{Error, UserError}; use crate::error::{is_reserved_keyword, Error, UserError};
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion { pub enum Criterion {
@ -50,32 +50,101 @@ impl FromStr for Criterion {
"sort" => Ok(Criterion::Sort), "sort" => Ok(Criterion::Sort),
"exactness" => Ok(Criterion::Exactness), "exactness" => Ok(Criterion::Exactness),
text => match AscDesc::from_str(text) { text => match AscDesc::from_str(text) {
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)), Ok(AscDesc::Asc(Member::Field(field))) => Ok(Criterion::Asc(field)),
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)), Ok(AscDesc::Desc(Member::Field(field))) => Ok(Criterion::Desc(field)),
Ok(AscDesc::Asc(Member::Geo(_))) | Ok(AscDesc::Desc(Member::Geo(_))) => {
Err(UserError::InvalidRankingRuleName { name: text.to_string() })?
}
Err(UserError::InvalidAscDescSyntax { name }) => { Err(UserError::InvalidAscDescSyntax { name }) => {
Err(UserError::InvalidCriterionName { name }.into()) Err(UserError::InvalidRankingRuleName { name }.into())
} }
Err(error) => { Err(error) => {
Err(UserError::InvalidCriterionName { name: error.to_string() }.into()) Err(UserError::InvalidRankingRuleName { name: error.to_string() }.into())
} }
}, },
} }
} }
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub enum Member {
Field(String),
Geo([f64; 2]),
}
impl FromStr for Member {
type Err = UserError;
fn from_str(text: &str) -> Result<Member, Self::Err> {
match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(")")) {
Some(point) => {
let (lat, long) = point
.split_once(',')
.ok_or_else(|| UserError::InvalidRankingRuleName { name: text.to_string() })
.and_then(|(lat, long)| {
lat.trim()
.parse()
.and_then(|lat| long.trim().parse().map(|long| (lat, long)))
.map_err(|_| UserError::InvalidRankingRuleName {
name: text.to_string(),
})
})?;
Ok(Member::Geo([lat, long]))
}
None => {
if is_reserved_keyword(text) {
return Err(UserError::InvalidReservedRankingRuleName {
name: text.to_string(),
})?;
}
Ok(Member::Field(text.to_string()))
}
}
}
}
impl fmt::Display for Member {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Member::Field(name) => f.write_str(name),
Member::Geo([lat, lng]) => write!(f, "_geoPoint({}, {})", lat, lng),
}
}
}
impl Member {
pub fn field(&self) -> Option<&str> {
match self {
Member::Field(field) => Some(field),
Member::Geo(_) => None,
}
}
pub fn geo_point(&self) -> Option<&[f64; 2]> {
match self {
Member::Geo(point) => Some(point),
Member::Field(_) => None,
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub enum AscDesc { pub enum AscDesc {
Asc(String), Asc(Member),
Desc(String), Desc(Member),
} }
impl AscDesc { impl AscDesc {
pub fn field(&self) -> &str { pub fn member(&self) -> &Member {
match self { match self {
AscDesc::Asc(field) => field, AscDesc::Asc(member) => member,
AscDesc::Desc(field) => field, AscDesc::Desc(member) => member,
} }
} }
pub fn field(&self) -> Option<&str> {
self.member().field()
}
} }
impl FromStr for AscDesc { impl FromStr for AscDesc {
@ -85,9 +154,9 @@ impl FromStr for AscDesc {
/// string and let the caller create his own error /// string and let the caller create his own error
fn from_str(text: &str) -> Result<AscDesc, Self::Err> { fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
match text.rsplit_once(':') { match text.rsplit_once(':') {
Some((field_name, "asc")) => Ok(AscDesc::Asc(field_name.to_string())), Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)),
Some((field_name, "desc")) => Ok(AscDesc::Desc(field_name.to_string())), Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)),
_ => Err(UserError::InvalidAscDescSyntax { name: text.to_string() }), _ => Err(UserError::InvalidRankingRuleName { name: text.to_string() }),
} }
} }
} }
@ -119,3 +188,63 @@ impl fmt::Display for Criterion {
} }
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_asc_desc() {
use big_s::S;
use AscDesc::*;
use Member::*;
let valid_req = [
("truc:asc", Asc(Field(S("truc")))),
("bidule:desc", Desc(Field(S("bidule")))),
("a-b:desc", Desc(Field(S("a-b")))),
("a:b:desc", Desc(Field(S("a:b")))),
("a12:asc", Asc(Field(S("a12")))),
("42:asc", Asc(Field(S("42")))),
("_geoPoint(42, 59):asc", Asc(Geo([42., 59.]))),
("_geoPoint(42.459, 59):desc", Desc(Geo([42.459, 59.]))),
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
("_geoPoint(42.0002, 59.895):desc", Desc(Geo([42.0002, 59.895]))),
("_geoPoint(42., 59.):desc", Desc(Geo([42., 59.]))),
("truc(12, 13):desc", Desc(Field(S("truc(12, 13)")))),
];
for (req, expected) in valid_req {
let res = req.parse();
assert!(res.is_ok(), "Failed to parse `{}`, was expecting `{:?}`", req, expected);
assert_eq!(expected, res.unwrap());
}
let invalid_req = [
"truc:machin",
"truc:deesc",
"truc:asc:deesc",
"42desc",
"_geoPoint:asc",
"_geoDistance:asc",
"_geoPoint(42.12 , 59.598)",
"_geoPoint(42.12 , 59.598):deesc",
"_geoPoint(42.12 , 59.598):machin",
"_geoPoint(42.12 , 59.598):asc:aasc",
"_geoPoint(42,12 , 59,598):desc",
"_geoPoint(35, 85, 75):asc",
"_geoPoint(18):asc",
];
for req in invalid_req {
let res = req.parse::<AscDesc>();
assert!(
res.is_err(),
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
req,
res,
);
}
}
}

View File

@ -12,6 +12,10 @@ use crate::{DocumentId, FieldId};
pub type Object = Map<String, Value>; pub type Object = Map<String, Value>;
pub fn is_reserved_keyword(keyword: &str) -> bool {
["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword)
}
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
InternalError(InternalError), InternalError(InternalError),
@ -54,12 +58,14 @@ pub enum UserError {
Csv(csv::Error), Csv(csv::Error),
DocumentLimitReached, DocumentLimitReached,
InvalidAscDescSyntax { name: String }, InvalidAscDescSyntax { name: String },
InvalidCriterionName { name: String },
InvalidDocumentId { document_id: Value }, InvalidDocumentId { document_id: Value },
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> }, InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
InvalidFilter(pest::error::Error<ParserRule>), InvalidFilter(pest::error::Error<ParserRule>),
InvalidFilterAttribute(pest::error::Error<ParserRule>), InvalidFilterAttribute(pest::error::Error<ParserRule>),
InvalidSortName { name: String }, InvalidSortName { name: String },
InvalidGeoField { document_id: Value, object: Value },
InvalidRankingRuleName { name: String },
InvalidReservedRankingRuleName { name: String },
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> }, InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
SortRankingRuleMissing, SortRankingRuleMissing,
InvalidStoreFile, InvalidStoreFile,
@ -221,7 +227,15 @@ impl fmt::Display for UserError {
Self::InvalidAscDescSyntax { name } => { Self::InvalidAscDescSyntax { name } => {
write!(f, "invalid asc/desc syntax for {}", name) write!(f, "invalid asc/desc syntax for {}", name)
} }
Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name), Self::InvalidGeoField { document_id, object } => write!(
f,
"the document with the id: {} contains an invalid _geo field: {}",
document_id, object
),
Self::InvalidRankingRuleName { name } => write!(f, "invalid criterion {}", name),
Self::InvalidReservedRankingRuleName { name } => {
write!(f, "{} is a reserved keyword and thus can't be used as a ranking rule", name)
}
Self::InvalidDocumentId { document_id } => { Self::InvalidDocumentId { document_id } => {
let json = serde_json::to_string(document_id).unwrap(); let json = serde_json::to_string(document_id).unwrap();
write!( write!(

View File

@ -8,6 +8,7 @@ use heed::flags::Flags;
use heed::types::*; use heed::types::*;
use heed::{Database, PolyDatabase, RoTxn, RwTxn}; use heed::{Database, PolyDatabase, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree;
use crate::error::{InternalError, UserError}; use crate::error::{InternalError, UserError};
use crate::fields_ids_map::FieldsIdsMap; use crate::fields_ids_map::FieldsIdsMap;
@ -18,8 +19,8 @@ use crate::heed_codec::facet::{
use crate::{ use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
FieldIdWordCountCodec, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
StrLevelPositionCodec, StrStrU8Codec, BEU32, Search, StrLevelPositionCodec, StrStrU8Codec, BEU32,
}; };
pub mod main_key { pub mod main_key {
@ -31,6 +32,8 @@ pub mod main_key {
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields"; pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution"; pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
pub const GEO_RTREE_KEY: &str = "geo-rtree";
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids"; pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
pub const PRIMARY_KEY_KEY: &str = "primary-key"; pub const PRIMARY_KEY_KEY: &str = "primary-key";
@ -294,6 +297,64 @@ impl Index {
.unwrap_or_default()) .unwrap_or_default())
} }
/* geo rtree */
/// Writes the provided `rtree` which associates coordinates to documents ids.
pub(crate) fn put_geo_rtree(
&self,
wtxn: &mut RwTxn,
rtree: &RTree<GeoPoint>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<RTree<GeoPoint>>>(wtxn, main_key::GEO_RTREE_KEY, rtree)
}
/// Delete the `rtree` which associates coordinates to documents ids.
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY)
}
/// Returns the `rtree` which associates coordinates to documents ids.
pub fn geo_rtree<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<RTree<GeoPoint>>> {
match self
.main
.get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)?
{
Some(rtree) => Ok(Some(rtree)),
None => Ok(None),
}
}
/* geo faceted */
/// Writes the documents ids that are faceted with a _geo field.
pub(crate) fn put_geo_faceted_documents_ids(
&self,
wtxn: &mut RwTxn,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.put::<_, Str, RoaringBitmapCodec>(
wtxn,
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
docids,
)
}
/// Delete the documents ids that are faceted with a _geo field.
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
}
/// Retrieve all the documents ids that are faceted with a _geo field.
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
match self
.main
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
{
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
}
}
/* field distribution */ /* field distribution */
/// Writes the field distribution which associates every field name with /// Writes the field distribution which associates every field name with

View File

@ -21,7 +21,7 @@ use fxhash::{FxHasher32, FxHasher64};
pub use grenad::CompressionType; pub use grenad::CompressionType;
use serde_json::{Map, Value}; use serde_json::{Map, Value};
pub use self::criterion::{default_criteria, AscDesc, Criterion}; pub use self::criterion::{default_criteria, AscDesc, Criterion, Member};
pub use self::error::{ pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
}; };
@ -51,6 +51,7 @@ pub type DocumentId = u32;
pub type FieldId = u16; pub type FieldId = u16;
pub type Position = u32; pub type Position = u32;
pub type FieldDistribution = BTreeMap<String, u64>; pub type FieldDistribution = BTreeMap<String, u64>;
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>;
/// Transform a raw obkv store into a JSON Object. /// Transform a raw obkv store into a JSON Object.
pub fn obkv_to_json( pub fn obkv_to_json(
@ -141,6 +142,15 @@ where
Some((head, tail)) Some((head, tail))
} }
/// Return the distance between two points in meters. Each points are composed of two f64,
/// one latitude and one longitude.
pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
let a = geoutils::Location::new(a[0], a[1]);
let b = geoutils::Location::new(b[0], b[1]);
a.haversine_distance_to(&b).meters()
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use serde_json::json; use serde_json::json;

View File

@ -0,0 +1,150 @@
use std::iter;
use roaring::RoaringBitmap;
use rstar::RTree;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::{GeoPoint, Index, Result};
pub struct Geo<'t> {
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
ascending: bool,
parent: Box<dyn Criterion + 't>,
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap,
rtree: Option<RTree<GeoPoint>>,
point: [f64; 2],
}
impl<'t> Geo<'t> {
pub fn asc(
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
parent: Box<dyn Criterion + 't>,
point: [f64; 2],
) -> Result<Self> {
Self::new(index, rtxn, parent, point, true)
}
pub fn desc(
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
parent: Box<dyn Criterion + 't>,
point: [f64; 2],
) -> Result<Self> {
Self::new(index, rtxn, parent, point, false)
}
fn new(
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
parent: Box<dyn Criterion + 't>,
point: [f64; 2],
ascending: bool,
) -> Result<Self> {
let candidates = Box::new(iter::empty());
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
let bucket_candidates = RoaringBitmap::new();
let rtree = index.geo_rtree(rtxn)?;
Ok(Self {
index,
rtxn,
ascending,
parent,
candidates,
allowed_candidates,
bucket_candidates,
rtree,
point,
})
}
}
impl Criterion for Geo<'_> {
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
let rtree = self.rtree.as_ref();
loop {
match self.candidates.next() {
Some(mut candidates) => {
candidates -= params.excluded_candidates;
self.allowed_candidates -= &candidates;
return Ok(Some(CriterionResult {
query_tree: None,
candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(self.bucket_candidates.clone()),
}));
}
None => match self.parent.next(params)? {
Some(CriterionResult {
query_tree,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
let mut candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
resolve_query_tree(&context, qt, params.wdcache)?
}
(None, None) => self.index.documents_ids(self.rtxn)?,
};
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
if candidates.is_empty() {
continue;
}
self.allowed_candidates = &candidates - params.excluded_candidates;
self.candidates = match rtree {
Some(rtree) => geo_point(
rtree,
self.allowed_candidates.clone(),
self.point,
self.ascending,
),
None => Box::new(std::iter::empty()),
};
}
None => return Ok(None),
},
}
}
}
}
fn geo_point(
rtree: &RTree<GeoPoint>,
mut candidates: RoaringBitmap,
point: [f64; 2],
ascending: bool,
) -> Box<dyn Iterator<Item = RoaringBitmap>> {
let mut results = Vec::new();
for point in rtree.nearest_neighbor_iter(&point) {
if candidates.remove(point.data) {
results.push(std::iter::once(point.data).collect());
if candidates.is_empty() {
break;
}
}
}
if ascending {
Box::new(results.into_iter())
} else {
Box::new(results.into_iter().rev())
}
}

View File

@ -12,7 +12,8 @@ use self::r#final::Final;
use self::typo::Typo; use self::typo::Typo;
use self::words::Words; use self::words::Words;
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use crate::criterion::AscDesc as AscDescName; use crate::criterion::{AscDesc as AscDescName, Member};
use crate::search::criteria::geo::Geo;
use crate::search::{word_derivations, WordDerivationsCache}; use crate::search::{word_derivations, WordDerivationsCache};
use crate::{DocumentId, FieldId, Index, Result, TreeLevel}; use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
@ -20,6 +21,7 @@ mod asc_desc;
mod attribute; mod attribute;
mod exactness; mod exactness;
pub mod r#final; pub mod r#final;
mod geo;
mod initial; mod initial;
mod proximity; mod proximity;
mod typo; mod typo;
@ -290,18 +292,30 @@ impl<'t> CriteriaBuilder<'t> {
Some(ref sort_criteria) => { Some(ref sort_criteria) => {
for asc_desc in sort_criteria { for asc_desc in sort_criteria {
criterion = match asc_desc { criterion = match asc_desc {
AscDescName::Asc(field) => Box::new(AscDesc::asc( AscDescName::Asc(Member::Field(field)) => Box::new(AscDesc::asc(
&self.index, &self.index,
&self.rtxn, &self.rtxn,
criterion, criterion,
field.to_string(), field.to_string(),
)?), )?),
AscDescName::Desc(field) => Box::new(AscDesc::desc( AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
&self.index, &self.index,
&self.rtxn, &self.rtxn,
criterion, criterion,
field.to_string(), field.to_string(),
)?), )?),
AscDescName::Asc(Member::Geo(point)) => Box::new(Geo::asc(
&self.index,
&self.rtxn,
criterion,
point.clone(),
)?),
AscDescName::Desc(Member::Geo(point)) => Box::new(Geo::desc(
&self.index,
&self.rtxn,
criterion,
point.clone(),
)?),
}; };
} }
criterion criterion

View File

@ -21,7 +21,9 @@ use crate::error::UserError;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
}; };
use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result}; use crate::{
distance_between_two_points, CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result,
};
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Operator { pub enum Operator {
@ -32,6 +34,8 @@ pub enum Operator {
LowerThan(f64), LowerThan(f64),
LowerThanOrEqual(f64), LowerThanOrEqual(f64),
Between(f64, f64), Between(f64, f64),
GeoLowerThan([f64; 2], f64),
GeoGreaterThan([f64; 2], f64),
} }
impl Operator { impl Operator {
@ -46,6 +50,8 @@ impl Operator {
LowerThan(n) => (GreaterThanOrEqual(n), None), LowerThan(n) => (GreaterThanOrEqual(n), None),
LowerThanOrEqual(n) => (GreaterThan(n), None), LowerThanOrEqual(n) => (GreaterThan(n), None),
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))), Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
GeoLowerThan(point, distance) => (GeoGreaterThan(point, distance), None),
GeoGreaterThan(point, distance) => (GeoLowerThan(point, distance), None),
} }
} }
} }
@ -131,6 +137,7 @@ impl FilterCondition {
Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?), Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?),
Rule::less => Ok(Self::lower_than(fim, ff, pair)?), Rule::less => Ok(Self::lower_than(fim, ff, pair)?),
Rule::between => Ok(Self::between(fim, ff, pair)?), Rule::between => Ok(Self::between(fim, ff, pair)?),
Rule::geo_radius => Ok(Self::geo_radius(fim, ff, pair)?),
Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()), Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()),
Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()), Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()),
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()), Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
@ -156,6 +163,65 @@ impl FilterCondition {
} }
} }
fn geo_radius(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<String>,
item: Pair<Rule>,
) -> Result<FilterCondition> {
if !filterable_fields.contains("_geo") {
return Err(UserError::InvalidFilterAttribute(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `_geo` is not filterable, available filterable attributes are: {}",
filterable_fields.iter().join(", "),
),
},
item.as_span(),
)))?;
}
let mut items = item.into_inner();
let fid = match fields_ids_map.id("_geo") {
Some(fid) => fid,
None => return Ok(Empty),
};
let parameters_item = items.next().unwrap();
// We don't need more than 3 parameters, but to handle errors correctly we are still going
// to extract the first 4 parameters
let param_span = parameters_item.as_span();
let parameters = parameters_item
.into_inner()
.take(4)
.map(|param| (param.clone(), param.as_span()))
.map(|(param, span)| pest_parse(param).0.map(|arg| (arg, span)))
.collect::<StdResult<Vec<(f64, _)>, _>>()
.map_err(UserError::InvalidFilter)?;
if parameters.len() != 3 {
return Err(UserError::InvalidFilter(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"),
},
// we want to point to the last parameters and if there was no parameters we
// point to the parenthesis
parameters.last().map(|param| param.1.clone()).unwrap_or(param_span),
)))?;
}
let (lat, lng, distance) = (&parameters[0], &parameters[1], parameters[2].0);
if let Some(span) = (!(-181.0..181.).contains(&lat.0))
.then(|| &lat.1)
.or((!(-181.0..181.).contains(&lng.0)).then(|| &lng.1))
{
return Err(UserError::InvalidFilter(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"Latitude and longitude must be contained between -180 to 180 degrees."
),
},
span.clone(),
)))?;
}
Ok(Operator(fid, GeoLowerThan([lat.0, lng.0], distance)))
}
fn between( fn between(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<String>, filterable_fields: &HashSet<String>,
@ -440,6 +506,34 @@ impl FilterCondition {
LowerThan(val) => (Included(f64::MIN), Excluded(*val)), LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)), LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)),
Between(left, right) => (Included(*left), Included(*right)), Between(left, right) => (Included(*left), Included(*right)),
GeoLowerThan(base_point, distance) => {
let rtree = match index.geo_rtree(rtxn)? {
Some(rtree) => rtree,
None => return Ok(RoaringBitmap::new()),
};
let result = rtree
.nearest_neighbor_iter(base_point)
.take_while(|point| {
distance_between_two_points(base_point, point.geom()) < *distance
})
.map(|point| point.data)
.collect();
return Ok(result);
}
GeoGreaterThan(point, distance) => {
let result = Self::evaluate_operator(
rtxn,
index,
numbers_db,
strings_db,
field_id,
&GeoLowerThan(point.clone(), *distance),
)?;
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?;
return Ok(geo_faceted_doc_ids - result);
}
}; };
// Ask for the biggest value that can exist for this specific field, if it exists // Ask for the biggest value that can exist for this specific field, if it exists
@ -505,6 +599,19 @@ fn field_id(
) -> StdResult<Option<FieldId>, PestError<Rule>> { ) -> StdResult<Option<FieldId>, PestError<Rule>> {
// lexing ensures that we at least have a key // lexing ensures that we at least have a key
let key = items.next().unwrap(); let key = items.next().unwrap();
if key.as_rule() == Rule::reserved {
return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"`{}` is a reserved keyword and therefore can't be used as a filter expression. \
Available filterable attributes are: {}",
key.as_str(),
filterable_fields.iter().join(", "),
),
},
key.as_span(),
));
}
if !filterable_fields.contains(key.as_str()) { if !filterable_fields.contains(key.as_str()) {
return Err(PestError::new_from_span( return Err(PestError::new_from_span(
@ -581,6 +688,13 @@ mod tests {
let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
let expected = Operator(0, Operator::NotEqual(None, S("ponce"))); let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let result = FilterCondition::from_str(&rtxn, &index, "_geo = France");
assert!(result.is_err());
let error = result.unwrap_err();
assert!(error.to_string().contains(
"`_geo` is a reserved keyword and therefore can't be used as a filter expression."
));
} }
#[test] #[test]
@ -663,6 +777,92 @@ mod tests {
assert_eq!(condition, expected); assert_eq!(condition, expected);
} }
#[test]
fn geo_radius() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
builder.set_filterable_fields(hashset! { S("_geo"), S("price") });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();
// basic test
let condition =
FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap();
let expected = Operator(0, GeoLowerThan([12., 13.0005], 2000.));
assert_eq!(condition, expected);
// test the negation of the GeoLowerThan
let condition =
FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap();
let expected = Operator(0, GeoGreaterThan([50., 18.], 2000.500));
assert_eq!(condition, expected);
// composition of multiple operations
let condition = FilterCondition::from_str(
&rtxn,
&index,
"(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10",
)
.unwrap();
let expected = Or(
Box::new(And(
Box::new(Operator(0, GeoGreaterThan([1., 2.], 300.))),
Box::new(Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))),
)),
Box::new(Operator(1, LowerThanOrEqual(10.))),
);
assert_eq!(condition, expected);
// georadius don't have any parameters
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius");
assert!(result.is_err());
let error = result.unwrap_err();
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
// georadius don't have any parameters
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()");
assert!(result.is_err());
let error = result.unwrap_err();
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
// georadius don't have enough parameters
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)");
assert!(result.is_err());
let error = result.unwrap_err();
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
// georadius have too many parameters
let result =
FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)");
assert!(result.is_err());
let error = result.unwrap_err();
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
// georadius have a bad latitude
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-200, 150, 10)");
assert!(result.is_err());
let error = result.unwrap_err();
assert!(error
.to_string()
.contains("Latitude and longitude must be contained between -180 to 180 degrees."));
// georadius have a bad longitude
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 181, 10)");
assert!(result.is_err());
let error = result.unwrap_err();
assert!(error
.to_string()
.contains("Latitude and longitude must be contained between -180 to 180 degrees."));
}
#[test] #[test]
fn from_array() { fn from_array() {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();

View File

@ -1,4 +1,4 @@
key = _{quoted | word} key = _{reserved | quoted | word }
value = _{quoted | word } value = _{quoted | word }
quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP } quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP }
string = {char*} string = {char*}
@ -8,6 +8,9 @@ char = _{ !(PEEK | "\\") ~ ANY
| "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t") | "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})} | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})}
reserved = { "_geo" | "_geoDistance" | "_geoPoint" | ("_geoPoint" ~ parameters) }
// we deliberately choose to allow empty parameters to generate more specific error message later
parameters = {("(" ~ (value ~ ",")* ~ value? ~ ")") | ""}
condition = _{between | eq | greater | less | geq | leq | neq} condition = _{between | eq | greater | less | geq | leq | neq}
between = {key ~ value ~ "TO" ~ value} between = {key ~ value ~ "TO" ~ value}
geq = {key ~ ">=" ~ value} geq = {key ~ ">=" ~ value}
@ -16,10 +19,11 @@ neq = {key ~ "!=" ~ value}
eq = {key ~ "=" ~ value} eq = {key ~ "=" ~ value}
greater = {key ~ ">" ~ value} greater = {key ~ ">" ~ value}
less = {key ~ "<" ~ value} less = {key ~ "<" ~ value}
geo_radius = {"_geoRadius" ~ parameters }
prgm = {SOI ~ expr ~ EOI} prgm = {SOI ~ expr ~ EOI}
expr = _{ ( term ~ (operation ~ term)* ) } expr = _{ ( term ~ (operation ~ term)* ) }
term = { ("(" ~ expr ~ ")") | condition | not } term = { ("(" ~ expr ~ ")") | condition | not | geo_radius }
operation = _{ and | or } operation = _{ and | or }
and = {"AND"} and = {"AND"}
or = {"OR"} or = {"OR"}

View File

@ -148,7 +148,8 @@ impl<'a> Search<'a> {
if let Some(sort_criteria) = &self.sort_criteria { if let Some(sort_criteria) = &self.sort_criteria {
let sortable_fields = self.index.sortable_fields(self.rtxn)?; let sortable_fields = self.index.sortable_fields(self.rtxn)?;
for asc_desc in sort_criteria { for asc_desc in sort_criteria {
let field = asc_desc.field(); // we are not supposed to find any geoPoint in the criterion
if let Some(field) = asc_desc.field() {
if !sortable_fields.contains(field) { if !sortable_fields.contains(field) {
return Err(UserError::InvalidSortableAttribute { return Err(UserError::InvalidSortableAttribute {
field: field.to_string(), field: field.to_string(),
@ -158,6 +159,7 @@ impl<'a> Search<'a> {
} }
} }
} }
}
// We check that the sort ranking rule exists and throw an // We check that the sort ranking rule exists and throw an
// error if we try to use it and that it doesn't. // error if we try to use it and that it doesn't.

View File

@ -48,6 +48,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?; self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?; self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?; self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
self.index.delete_geo_rtree(self.wtxn)?;
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
// We clean all the faceted documents ids. // We clean all the faceted documents ids.
let empty = RoaringBitmap::default(); let empty = RoaringBitmap::default();
@ -93,7 +95,7 @@ mod tests {
let content = &br#"[ let content = &br#"[
{ "id": 0, "name": "kevin", "age": 20 }, { "id": 0, "name": "kevin", "age": 20 },
{ "id": 1, "name": "kevina" }, { "id": 1, "name": "kevina" },
{ "id": 2, "name": "benoit", "country": "France" } { "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } }
]"#[..]; ]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json); builder.update_format(UpdateFormat::Json);
@ -107,13 +109,15 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 4); assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 5);
assert!(index.words_fst(&rtxn).unwrap().is_empty()); assert!(index.words_fst(&rtxn).unwrap().is_empty());
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty()); assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty()); assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
assert!(index.documents_ids(&rtxn).unwrap().is_empty()); assert!(index.documents_ids(&rtxn).unwrap().is_empty());
assert!(index.field_distribution(&rtxn).unwrap().is_empty()); assert!(index.field_distribution(&rtxn).unwrap().is_empty());
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
assert!(index.word_docids.is_empty(&rtxn).unwrap()); assert!(index.word_docids.is_empty(&rtxn).unwrap());
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap()); assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());

View File

@ -380,6 +380,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
drop(iter); drop(iter);
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?;
let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree
.iter()
.filter(|&point| self.documents_ids.contains(point.data))
.cloned()
.map(|point| (point, point.data))
.unzip();
points_to_remove.iter().for_each(|point| {
rtree.remove(&point);
});
geo_faceted_doc_ids -= docids_to_remove;
self.index.put_geo_rtree(self.wtxn, &rtree)?;
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
}
// We delete the documents ids that are under the facet field id values. // We delete the documents ids that are under the facet field id values.
remove_docids_from_facet_field_id_number_docids( remove_docids_from_facet_field_id_number_docids(
self.wtxn, self.wtxn,
@ -542,6 +560,8 @@ where
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::collections::HashSet;
use big_s::S; use big_s::S;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::hashset; use maplit::hashset;
@ -657,4 +677,95 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
} }
#[test]
fn delete_documents_with_geo_points() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_primary_key(S("id"));
builder.set_filterable_fields(hashset!(S("_geo")));
builder.set_sortable_fields(hashset!(S("_geo")));
builder.execute(|_, _| ()).unwrap();
let content = &r#"[
{"id":"1","city":"Lille", "_geo": { "lat": 50.629973371633746, "lng": 3.0569447399419570 } },
{"id":"2","city":"Mons-en-Barœul", "_geo": { "lat": 50.641586120121050, "lng": 3.1106593480348670 } },
{"id":"3","city":"Hellemmes", "_geo": { "lat": 50.631220965518080, "lng": 3.1106399673339933 } },
{"id":"4","city":"Villeneuve-d'Ascq", "_geo": { "lat": 50.622468098014565, "lng": 3.1476425513437140 } },
{"id":"5","city":"Hem", "_geo": { "lat": 50.655250871381355, "lng": 3.1897297266244130 } },
{"id":"6","city":"Roubaix", "_geo": { "lat": 50.692473451896710, "lng": 3.1763326737747650 } },
{"id":"7","city":"Tourcoing", "_geo": { "lat": 50.726397466736480, "lng": 3.1541653659578670 } },
{"id":"8","city":"Mouscron", "_geo": { "lat": 50.745325554908610, "lng": 3.2206407854429853 } },
{"id":"9","city":"Tournai", "_geo": { "lat": 50.605342528602630, "lng": 3.3758586941351414 } },
{"id":"10","city":"Ghent", "_geo": { "lat": 51.053777403679035, "lng": 3.6957733119926930 } },
{"id":"11","city":"Brussels", "_geo": { "lat": 50.846640974544690, "lng": 4.3370663564281840 } },
{"id":"12","city":"Charleroi", "_geo": { "lat": 50.409570138889480, "lng": 4.4347354315085520 } },
{"id":"13","city":"Mons", "_geo": { "lat": 50.450294178855420, "lng": 3.9623722870904690 } },
{"id":"14","city":"Valenciennes", "_geo": { "lat": 50.351817774473545, "lng": 3.5326283646928800 } },
{"id":"15","city":"Arras", "_geo": { "lat": 50.284487528579950, "lng": 2.7637515844478160 } },
{"id":"16","city":"Cambrai", "_geo": { "lat": 50.179340577906700, "lng": 3.2189409952502930 } },
{"id":"17","city":"Bapaume", "_geo": { "lat": 50.111276127236400, "lng": 2.8547894666083120 } },
{"id":"18","city":"Amiens", "_geo": { "lat": 49.931472529669996, "lng": 2.2710499758317080 } },
{"id":"19","city":"Compiègne", "_geo": { "lat": 49.444980887725656, "lng": 2.7913841281529015 } },
{"id":"20","city":"Paris", "_geo": { "lat": 48.902100060895480, "lng": 2.3708400867406930 } }
]"#[..];
let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content.as_bytes(), |_, _| ()).unwrap();
let external_document_ids = index.external_documents_ids(&wtxn).unwrap();
let ids_to_delete: Vec<u32> = external_ids_to_delete
.iter()
.map(|id| external_document_ids.get(id.as_bytes()).unwrap())
.collect();
// Delete some documents.
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
external_ids_to_delete.iter().for_each(|id| drop(builder.delete_external_id(id)));
builder.execute().unwrap();
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();
let rtree = index.geo_rtree(&rtxn).unwrap().unwrap();
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
let all_geo_documents = index
.documents(&rtxn, all_geo_ids.iter().copied())
.unwrap()
.iter()
.map(|(id, _)| *id)
.collect::<HashSet<_>>();
let all_geo_faceted_ids = geo_faceted_doc_ids.iter().collect::<Vec<_>>();
let all_geo_faceted_documents = index
.documents(&rtxn, all_geo_faceted_ids.iter().copied())
.unwrap()
.iter()
.map(|(id, _)| *id)
.collect::<HashSet<_>>();
assert_eq!(
all_geo_documents, all_geo_faceted_documents,
"There is an inconsistency between the geo_faceted database and the rtree"
);
for id in all_geo_documents.iter() {
assert!(!ids_to_delete.contains(&id), "The document {} was supposed to be deleted", id);
}
assert_eq!(
all_geo_ids.len(),
all_geo_documents.len(),
"We deleted documents that were not supposed to be deleted"
);
}
} }

View File

@ -0,0 +1,44 @@
use std::fs::File;
use std::io;
use concat_arrays::concat_arrays;
use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::{FieldId, InternalError, Result, UserError};
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
pub fn extract_geo_points<R: io::Read>(
mut obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
primary_key_id: FieldId,
geo_field_id: FieldId,
) -> Result<grenad::Reader<File>> {
let mut writer = tempfile::tempfile().and_then(|file| {
create_writer(indexer.chunk_compression_type, indexer.chunk_compression_level, file)
})?;
while let Some((docid_bytes, value)) = obkv_documents.next()? {
let obkv = obkv::KvReader::new(value);
let point: Value = match obkv.get(geo_field_id) {
Some(point) => serde_json::from_slice(point).map_err(InternalError::SerdeJson)?,
None => continue,
};
if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) {
// this will create an array of 16 bytes (two 8 bytes floats)
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
writer.insert(docid_bytes, bytes)?;
} else {
// All document must have a primary key so we can unwrap safely here
let primary_key = obkv.get(primary_key_id).unwrap();
let primary_key =
serde_json::from_slice(primary_key).map_err(InternalError::SerdeJson)?;
Err(UserError::InvalidGeoField { document_id: primary_key, object: point })?
}
}
Ok(writer_into_reader(writer)?)
}

View File

@ -3,6 +3,7 @@ mod extract_facet_number_docids;
mod extract_facet_string_docids; mod extract_facet_string_docids;
mod extract_fid_docid_facet_values; mod extract_fid_docid_facet_values;
mod extract_fid_word_count_docids; mod extract_fid_word_count_docids;
mod extract_geo_points;
mod extract_word_docids; mod extract_word_docids;
mod extract_word_level_position_docids; mod extract_word_level_position_docids;
mod extract_word_pair_proximity_docids; mod extract_word_pair_proximity_docids;
@ -19,6 +20,7 @@ use self::extract_facet_number_docids::extract_facet_number_docids;
use self::extract_facet_string_docids::extract_facet_string_docids; use self::extract_facet_string_docids::extract_facet_string_docids;
use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values; use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values;
use self::extract_fid_word_count_docids::extract_fid_word_count_docids; use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
use self::extract_geo_points::extract_geo_points;
use self::extract_word_docids::extract_word_docids; use self::extract_word_docids::extract_word_docids;
use self::extract_word_level_position_docids::extract_word_level_position_docids; use self::extract_word_level_position_docids::extract_word_level_position_docids;
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids; use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
@ -37,6 +39,8 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx: Sender<Result<TypedChunk>>, lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: Option<HashSet<FieldId>>, searchable_fields: Option<HashSet<FieldId>>,
faceted_fields: HashSet<FieldId>, faceted_fields: HashSet<FieldId>,
primary_key_id: FieldId,
geo_field_id: Option<FieldId>,
stop_words: Option<fst::Set<&[u8]>>, stop_words: Option<fst::Set<&[u8]>>,
) -> Result<()> { ) -> Result<()> {
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
@ -48,6 +52,8 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
&searchable_fields, &searchable_fields,
&faceted_fields, &faceted_fields,
primary_key_id,
geo_field_id,
&stop_words, &stop_words,
) )
}) })
@ -168,6 +174,8 @@ fn extract_documents_data(
lmdb_writer_sx: Sender<Result<TypedChunk>>, lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: &Option<HashSet<FieldId>>, searchable_fields: &Option<HashSet<FieldId>>,
faceted_fields: &HashSet<FieldId>, faceted_fields: &HashSet<FieldId>,
primary_key_id: FieldId,
geo_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>, stop_words: &Option<fst::Set<&[u8]>>,
) -> Result<( ) -> Result<(
grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>,
@ -177,6 +185,19 @@ fn extract_documents_data(
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone()))); let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
if let Some(geo_field_id) = geo_field_id {
let documents_chunk_cloned = documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result =
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_field_id);
let _ = match result {
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) = let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join( rayon::join(
|| { || {

View File

@ -228,11 +228,27 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
Receiver<Result<TypedChunk>>, Receiver<Result<TypedChunk>>,
) = crossbeam_channel::unbounded(); ) = crossbeam_channel::unbounded();
// get the primary key field id
let primary_key_id = fields_ids_map.id(&primary_key).unwrap();
// get searchable fields for word databases // get searchable fields for word databases
let searchable_fields = let searchable_fields =
self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter); self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
// get filterable fields for facet databases // get filterable fields for facet databases
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
// get the fid of the `_geo` field.
let geo_field_id = match self.index.fields_ids_map(self.wtxn)?.id("_geo") {
Some(gfid) => {
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
if is_sortable || is_filterable {
Some(gfid)
} else {
None
}
}
None => None,
};
let stop_words = self.index.stop_words(self.wtxn)?; let stop_words = self.index.stop_words(self.wtxn)?;
// let stop_words = stop_words.as_ref(); // let stop_words = stop_words.as_ref();
@ -261,6 +277,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
searchable_fields, searchable_fields,
faceted_fields, faceted_fields,
primary_key_id,
geo_field_id,
stop_words, stop_words,
) )
}); });
@ -876,12 +894,12 @@ mod tests {
// First we send 3 documents with an id for only one of them. // First we send 3 documents with an id for only one of them.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let documents = &r#"[ let documents = &r#"[
{ "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5 }, { "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
{ "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 }, { "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 },
{ "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 }, { "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 },
{ "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" }, { "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" },
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" }, { "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams" } { "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
]"#[..]; ]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json); builder.update_format(UpdateFormat::Json);
@ -917,7 +935,7 @@ mod tests {
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" }, { "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" }, { "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" }, { "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
{ "objectId": 30, "title": "Hamlet" } { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
]"#[..]; ]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json); builder.update_format(UpdateFormat::Json);
@ -934,7 +952,7 @@ mod tests {
assert!(external_documents_ids.get("30").is_none()); assert!(external_documents_ids.get("30").is_none());
let content = &br#"[ let content = &br#"[
{ "objectId": 30, "title": "Hamlet" } { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
]"#[..]; ]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json); builder.update_format(UpdateFormat::Json);
@ -944,7 +962,7 @@ mod tests {
assert!(external_documents_ids.get("30").is_some()); assert!(external_documents_ids.get("30").is_some());
let content = &br#"[ let content = &br#"[
{ "objectId": 30, "title": "Hamlet" } { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
]"#[..]; ]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json); builder.update_format(UpdateFormat::Json);

View File

@ -1,4 +1,5 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto;
use std::fs::File; use std::fs::File;
use heed::types::ByteSlice; use heed::types::ByteSlice;
@ -6,11 +7,12 @@ use heed::{BytesDecode, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::helpers::{ use super::helpers::{
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap, self, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, valid_lmdb_key,
CursorClonableMmap,
}; };
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string}; use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
use crate::update::index_documents::helpers::into_clonable_grenad; use crate::update::index_documents::helpers::into_clonable_grenad;
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Index, Result}; use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
pub(crate) enum TypedChunk { pub(crate) enum TypedChunk {
DocidWordPositions(grenad::Reader<CursorClonableMmap>), DocidWordPositions(grenad::Reader<CursorClonableMmap>),
@ -24,6 +26,7 @@ pub(crate) enum TypedChunk {
WordPairProximityDocids(grenad::Reader<File>), WordPairProximityDocids(grenad::Reader<File>),
FieldIdFacetStringDocids(grenad::Reader<File>), FieldIdFacetStringDocids(grenad::Reader<File>),
FieldIdFacetNumberDocids(grenad::Reader<File>), FieldIdFacetNumberDocids(grenad::Reader<File>),
GeoPoints(grenad::Reader<File>),
} }
/// Write typed chunk in the corresponding LMDB database of the provided index. /// Write typed chunk in the corresponding LMDB database of the provided index.
@ -177,6 +180,24 @@ pub(crate) fn write_typed_chunk_into_index(
)?; )?;
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::GeoPoints(mut geo_points) => {
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
while let Some((key, value)) = geo_points.next()? {
// convert the key back to a u32 (4 bytes)
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
// convert the latitude and longitude back to a f64 (8 bytes)
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
rtree.insert(GeoPoint::new(point, docid));
geo_faceted_docids.insert(docid);
}
index.put_geo_rtree(wtxn, &rtree)?;
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
}
} }
Ok((RoaringBitmap::new(), is_merged_database)) Ok((RoaringBitmap::new(), is_merged_database))

View File

@ -1,17 +1,17 @@
{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","":""} {"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":""}
{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""} {"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":""}
{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""} {"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":""}
{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""} {"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":""}
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""} {"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":""}
{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""} {"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":""}
{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""} {"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"geo_rank":34692,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","_geo": { "lat": 50.78776041427129, "lng": 2.661201766290338 },"":""}
{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""} {"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":""}
{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","":""} {"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":""}
{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","":""} {"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":""}
{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","":""} {"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":""}
{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","":""} {"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":""}
{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","":""} {"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"geo_rank":739203,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","_geo": { "lat": 43.99150729038736, "lng": 3.606143957295055 },"":""}
{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","":""} {"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":""}
{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","":""} {"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":""}
{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","":""} {"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":9422437,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","_geo": { "lat": 35.06462306367058, "lng": 135.8338440354251 },"":""}
{"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","":""} {"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":9339230,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","_geo": { "lat": 34.39548365683149, "lng": 132.4535960928883 },"":""}

View File

@ -47,6 +47,11 @@ test_filter!(eq_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank=1")]
test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]); test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]);
test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]); test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]);
test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]); test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]);
test_filter!(geo_radius, vec![Right("_geoRadius(50.630010347667806, 3.086251829166809, 100000)")]);
test_filter!(
not_geo_radius,
vec![Right("NOT _geoRadius(50.630010347667806, 3.086251829166809, 1000000)")]
);
test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]); test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]);
test_filter!( test_filter!(
eq_complex_filter_2, eq_complex_filter_2,

View File

@ -6,7 +6,7 @@ use either::{Either, Left, Right};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::{hashmap, hashset}; use maplit::{hashmap, hashset};
use milli::update::{Settings, UpdateBuilder, UpdateFormat}; use milli::update::{Settings, UpdateBuilder, UpdateFormat};
use milli::{AscDesc, Criterion, DocumentId, Index}; use milli::{AscDesc, Criterion, DocumentId, Index, Member};
use serde::Deserialize; use serde::Deserialize;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
@ -37,6 +37,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
builder.set_filterable_fields(hashset! { builder.set_filterable_fields(hashset! {
S("tag"), S("tag"),
S("asc_desc_rank"), S("asc_desc_rank"),
S("_geo"),
}); });
builder.set_sortable_fields(hashset! { builder.set_sortable_fields(hashset! {
S("tag"), S("tag"),
@ -99,11 +100,11 @@ pub fn expected_order(
new_groups new_groups
.extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from)); .extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
} }
Criterion::Sort if sort_by == [AscDesc::Asc(S("tag"))] => { Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
group.sort_by_key(|d| d.sort_by_rank); group.sort_by_key(|d| d.sort_by_rank);
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from)); new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
} }
Criterion::Sort if sort_by == [AscDesc::Desc(S("tag"))] => { Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
group.sort_by_key(|d| Reverse(d.sort_by_rank)); group.sort_by_key(|d| Reverse(d.sort_by_rank));
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from)); new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
} }
@ -162,6 +163,10 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
if document.asc_desc_rank > filter.parse().unwrap() { if document.asc_desc_rank > filter.parse().unwrap() {
id = Some(document.id.clone()) id = Some(document.id.clone())
} }
} else if filter.starts_with("_geoRadius") {
id = (document.geo_rank < 100000).then(|| document.id.clone());
} else if filter.starts_with("NOT _geoRadius") {
id = (document.geo_rank > 1000000).then(|| document.id.clone());
} }
id id
} }
@ -205,6 +210,7 @@ pub struct TestDocument {
pub exact_rank: u32, pub exact_rank: u32,
pub asc_desc_rank: u32, pub asc_desc_rank: u32,
pub sort_by_rank: u32, pub sort_by_rank: u32,
pub geo_rank: u32,
pub title: String, pub title: String,
pub description: String, pub description: String,
pub tag: String, pub tag: String,

View File

@ -5,7 +5,7 @@ use heed::EnvOpenOptions;
use itertools::Itertools; use itertools::Itertools;
use maplit::hashset; use maplit::hashset;
use milli::update::{Settings, UpdateBuilder, UpdateFormat}; use milli::update::{Settings, UpdateBuilder, UpdateFormat};
use milli::{AscDesc, Criterion, Index, Search, SearchResult}; use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
use rand::Rng; use rand::Rng;
use Criterion::*; use Criterion::*;
@ -163,28 +163,28 @@ test_criterion!(
DISALLOW_OPTIONAL_WORDS, DISALLOW_OPTIONAL_WORDS,
ALLOW_TYPOS, ALLOW_TYPOS,
vec![Sort], vec![Sort],
vec![AscDesc::Asc(S("tag"))] vec![AscDesc::Asc(Member::Field(S("tag")))]
); );
test_criterion!( test_criterion!(
sort_by_asc_disallow_typo, sort_by_asc_disallow_typo,
DISALLOW_OPTIONAL_WORDS, DISALLOW_OPTIONAL_WORDS,
DISALLOW_TYPOS, DISALLOW_TYPOS,
vec![Sort], vec![Sort],
vec![AscDesc::Asc(S("tag"))] vec![AscDesc::Asc(Member::Field(S("tag")))]
); );
test_criterion!( test_criterion!(
sort_by_desc_allow_typo, sort_by_desc_allow_typo,
DISALLOW_OPTIONAL_WORDS, DISALLOW_OPTIONAL_WORDS,
ALLOW_TYPOS, ALLOW_TYPOS,
vec![Sort], vec![Sort],
vec![AscDesc::Desc(S("tag"))] vec![AscDesc::Desc(Member::Field(S("tag")))]
); );
test_criterion!( test_criterion!(
sort_by_desc_disallow_typo, sort_by_desc_disallow_typo,
DISALLOW_OPTIONAL_WORDS, DISALLOW_OPTIONAL_WORDS,
DISALLOW_TYPOS, DISALLOW_TYPOS,
vec![Sort], vec![Sort],
vec![AscDesc::Desc(S("tag"))] vec![AscDesc::Desc(Member::Field(S("tag")))]
); );
test_criterion!( test_criterion!(
default_criteria_order, default_criteria_order,

View File

@ -1,6 +1,6 @@
use big_s::S; use big_s::S;
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words}; use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
use milli::{AscDesc, Error, Search, UserError}; use milli::{AscDesc, Error, Member, Search, UserError};
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS}; use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
@ -16,7 +16,7 @@ fn sort_ranking_rule_missing() {
search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(true); search.optional_words(true);
search.sort_criteria(vec![AscDesc::Asc(S("tag"))]); search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
let result = search.execute(); let result = search.execute();
assert!(matches!(result, Err(Error::UserError(UserError::SortRankingRuleMissing)))); assert!(matches!(result, Err(Error::UserError(UserError::SortRankingRuleMissing))));