mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Merge #322
322: Geosearch r=ManyTheFish a=irevoire This PR introduces [basic geo-search functionalities](https://github.com/meilisearch/specifications/pull/59), it makes the engine able to index, filter and, sort by geo-point. We decided to use [the rstar library](https://docs.rs/rstar) and to save the points in [an RTree](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html) that we de/serialize in the index database [by using serde](https://serde.rs/) with [bincode](https://docs.rs/bincode). This is not an efficient way to query this tree as it will consume a lot of CPU and memory when a search is made, but at least it is an easy first way to do so. ### What we will have to do on the indexing part: - [x] Index the `_geo` fields from the documents. - [x] Create a new module with an extractor in the `extract` module that takes the `obkv_documents` and retrieves the latitude and longitude coordinates, outputting them in a `grenad::Reader` for further process. - [x] Call the extractor in the `extract::extract_documents_data` function and send the result to the `TypedChunk` module. - [x] Get the `grenad::Reader` in the `typed_chunk::write_typed_chunk_into_index` function and store all the points in the `rtree` - [x] Delete the documents from the `RTree` when deleting documents from the database. All this can be done in the `delete_documents.rs` file by getting the data structure and removing the points from it, inserting it back after the modification. - [x] Clearing the `RTree` entirely when we clear the documents from the database, everything happens in the `clear_documents.rs` file. - [x] save a Roaring bitmap of all documents containing the `_geo` field ### What we will have to do on the query part: - [x] Filter the documents at a certain distance around a point, this is done by [collecting the documents from the searched point](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html#method.nearest_neighbor_iter) while they are in range. - [x] We must introduce new `geoLowerThan` and `geoGreaterThan` variants to the `Operator` filter enum. - [x] Implement the `negative` method on both variants where the `geoGreaterThan` variant is implemented by executing the `geoLowerThan` and removing the results found from the whole list of geo faceted documents. - [x] Add the `_geoRadius` function in the pest parser. - [x] Introduce a `_geo` ascending ranking function that takes a point in parameter, ~~this function must keep the iterator on the `RTree` and make it peekable~~ This was not possible for now, we had to collect the whole iterator. Only the documents that are part of the candidates must be sent too! - [x] This ascending ranking rule will only be active if the search is set up with the `_geoPoint` parameter that indicates the center point of the ascending ranking rule. ----------- - On Meilisearch part: We must introduce a new concept, returning the documents with a new `_geoDistance` field when it passed by the `_geo` ranking rule, this has never been done before. We could maybe just do it afterward when the documents have been retrieved from the database, computing the distance from the `_geoPoint` and all of the documents to be returned. Co-authored-by: Irevoire <tamo@meilisearch.com> Co-authored-by: cvermand <33010418+bidoubiwa@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
31c8de1cca
@ -60,7 +60,13 @@ $('#query, #filters').on('input', function () {
|
|||||||
|
|
||||||
const content = document.createElement('div');
|
const content = document.createElement('div');
|
||||||
content.classList.add("content");
|
content.classList.add("content");
|
||||||
|
|
||||||
|
// Stringify Objects and Arrays to avoid [Object object]
|
||||||
|
if (typeof element[prop] === 'object' && element[prop] !== null) {
|
||||||
|
content.innerHTML = JSON.stringify(element[prop]);
|
||||||
|
} else {
|
||||||
content.innerHTML = element[prop];
|
content.innerHTML = element[prop];
|
||||||
|
}
|
||||||
|
|
||||||
field.appendChild(attribute);
|
field.appendChild(attribute);
|
||||||
field.appendChild(content);
|
field.appendChild(content);
|
||||||
|
@ -695,6 +695,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
struct QueryBody {
|
struct QueryBody {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
filters: Option<String>,
|
filters: Option<String>,
|
||||||
|
sort: Option<String>,
|
||||||
facet_filters: Option<Vec<UntaggedEither<Vec<String>, String>>>,
|
facet_filters: Option<Vec<UntaggedEither<Vec<String>, String>>>,
|
||||||
facet_distribution: Option<bool>,
|
facet_distribution: Option<bool>,
|
||||||
limit: Option<usize>,
|
limit: Option<usize>,
|
||||||
@ -754,6 +755,10 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
search.limit(limit);
|
search.limit(limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(sort) = query.sort {
|
||||||
|
search.sort_criteria(vec![sort.parse().unwrap()]);
|
||||||
|
}
|
||||||
|
|
||||||
let SearchResult { matching_words, candidates, documents_ids } =
|
let SearchResult { matching_words, candidates, documents_ids } =
|
||||||
search.execute().unwrap();
|
search.execute().unwrap();
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ flate2 = "1.0.20"
|
|||||||
fst = "0.4.5"
|
fst = "0.4.5"
|
||||||
fxhash = "0.2.1"
|
fxhash = "0.2.1"
|
||||||
grenad = { version = "0.3.1", default-features = false, features = ["tempfile"] }
|
grenad = { version = "0.3.1", default-features = false, features = ["tempfile"] }
|
||||||
|
geoutils = "0.4.1"
|
||||||
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||||
human_format = "1.0.3"
|
human_format = "1.0.3"
|
||||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||||
@ -27,6 +28,7 @@ once_cell = "1.5.2"
|
|||||||
ordered-float = "2.1.1"
|
ordered-float = "2.1.1"
|
||||||
rayon = "1.5.0"
|
rayon = "1.5.0"
|
||||||
roaring = "0.6.6"
|
roaring = "0.6.6"
|
||||||
|
rstar = { version = "0.9.1", features = ["serde"] }
|
||||||
serde = { version = "1.0.123", features = ["derive"] }
|
serde = { version = "1.0.123", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
||||||
slice-group-by = "0.2.6"
|
slice-group-by = "0.2.6"
|
||||||
|
@ -3,7 +3,7 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{is_reserved_keyword, Error, UserError};
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||||
pub enum Criterion {
|
pub enum Criterion {
|
||||||
@ -50,32 +50,101 @@ impl FromStr for Criterion {
|
|||||||
"sort" => Ok(Criterion::Sort),
|
"sort" => Ok(Criterion::Sort),
|
||||||
"exactness" => Ok(Criterion::Exactness),
|
"exactness" => Ok(Criterion::Exactness),
|
||||||
text => match AscDesc::from_str(text) {
|
text => match AscDesc::from_str(text) {
|
||||||
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
|
Ok(AscDesc::Asc(Member::Field(field))) => Ok(Criterion::Asc(field)),
|
||||||
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
|
Ok(AscDesc::Desc(Member::Field(field))) => Ok(Criterion::Desc(field)),
|
||||||
|
Ok(AscDesc::Asc(Member::Geo(_))) | Ok(AscDesc::Desc(Member::Geo(_))) => {
|
||||||
|
Err(UserError::InvalidRankingRuleName { name: text.to_string() })?
|
||||||
|
}
|
||||||
Err(UserError::InvalidAscDescSyntax { name }) => {
|
Err(UserError::InvalidAscDescSyntax { name }) => {
|
||||||
Err(UserError::InvalidCriterionName { name }.into())
|
Err(UserError::InvalidRankingRuleName { name }.into())
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
Err(UserError::InvalidCriterionName { name: error.to_string() }.into())
|
Err(UserError::InvalidRankingRuleName { name: error.to_string() }.into())
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||||
|
pub enum Member {
|
||||||
|
Field(String),
|
||||||
|
Geo([f64; 2]),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for Member {
|
||||||
|
type Err = UserError;
|
||||||
|
|
||||||
|
fn from_str(text: &str) -> Result<Member, Self::Err> {
|
||||||
|
match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(")")) {
|
||||||
|
Some(point) => {
|
||||||
|
let (lat, long) = point
|
||||||
|
.split_once(',')
|
||||||
|
.ok_or_else(|| UserError::InvalidRankingRuleName { name: text.to_string() })
|
||||||
|
.and_then(|(lat, long)| {
|
||||||
|
lat.trim()
|
||||||
|
.parse()
|
||||||
|
.and_then(|lat| long.trim().parse().map(|long| (lat, long)))
|
||||||
|
.map_err(|_| UserError::InvalidRankingRuleName {
|
||||||
|
name: text.to_string(),
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
Ok(Member::Geo([lat, long]))
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
if is_reserved_keyword(text) {
|
||||||
|
return Err(UserError::InvalidReservedRankingRuleName {
|
||||||
|
name: text.to_string(),
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
Ok(Member::Field(text.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Member {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Member::Field(name) => f.write_str(name),
|
||||||
|
Member::Geo([lat, lng]) => write!(f, "_geoPoint({}, {})", lat, lng),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Member {
|
||||||
|
pub fn field(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Member::Field(field) => Some(field),
|
||||||
|
Member::Geo(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn geo_point(&self) -> Option<&[f64; 2]> {
|
||||||
|
match self {
|
||||||
|
Member::Geo(point) => Some(point),
|
||||||
|
Member::Field(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||||
pub enum AscDesc {
|
pub enum AscDesc {
|
||||||
Asc(String),
|
Asc(Member),
|
||||||
Desc(String),
|
Desc(Member),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AscDesc {
|
impl AscDesc {
|
||||||
pub fn field(&self) -> &str {
|
pub fn member(&self) -> &Member {
|
||||||
match self {
|
match self {
|
||||||
AscDesc::Asc(field) => field,
|
AscDesc::Asc(member) => member,
|
||||||
AscDesc::Desc(field) => field,
|
AscDesc::Desc(member) => member,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn field(&self) -> Option<&str> {
|
||||||
|
self.member().field()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromStr for AscDesc {
|
impl FromStr for AscDesc {
|
||||||
@ -85,9 +154,9 @@ impl FromStr for AscDesc {
|
|||||||
/// string and let the caller create his own error
|
/// string and let the caller create his own error
|
||||||
fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
|
fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
|
||||||
match text.rsplit_once(':') {
|
match text.rsplit_once(':') {
|
||||||
Some((field_name, "asc")) => Ok(AscDesc::Asc(field_name.to_string())),
|
Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)),
|
||||||
Some((field_name, "desc")) => Ok(AscDesc::Desc(field_name.to_string())),
|
Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)),
|
||||||
_ => Err(UserError::InvalidAscDescSyntax { name: text.to_string() }),
|
_ => Err(UserError::InvalidRankingRuleName { name: text.to_string() }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -119,3 +188,63 @@ impl fmt::Display for Criterion {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_asc_desc() {
|
||||||
|
use big_s::S;
|
||||||
|
use AscDesc::*;
|
||||||
|
use Member::*;
|
||||||
|
|
||||||
|
let valid_req = [
|
||||||
|
("truc:asc", Asc(Field(S("truc")))),
|
||||||
|
("bidule:desc", Desc(Field(S("bidule")))),
|
||||||
|
("a-b:desc", Desc(Field(S("a-b")))),
|
||||||
|
("a:b:desc", Desc(Field(S("a:b")))),
|
||||||
|
("a12:asc", Asc(Field(S("a12")))),
|
||||||
|
("42:asc", Asc(Field(S("42")))),
|
||||||
|
("_geoPoint(42, 59):asc", Asc(Geo([42., 59.]))),
|
||||||
|
("_geoPoint(42.459, 59):desc", Desc(Geo([42.459, 59.]))),
|
||||||
|
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
|
||||||
|
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
|
||||||
|
("_geoPoint(42.0002, 59.895):desc", Desc(Geo([42.0002, 59.895]))),
|
||||||
|
("_geoPoint(42., 59.):desc", Desc(Geo([42., 59.]))),
|
||||||
|
("truc(12, 13):desc", Desc(Field(S("truc(12, 13)")))),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (req, expected) in valid_req {
|
||||||
|
let res = req.parse();
|
||||||
|
assert!(res.is_ok(), "Failed to parse `{}`, was expecting `{:?}`", req, expected);
|
||||||
|
assert_eq!(expected, res.unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
let invalid_req = [
|
||||||
|
"truc:machin",
|
||||||
|
"truc:deesc",
|
||||||
|
"truc:asc:deesc",
|
||||||
|
"42desc",
|
||||||
|
"_geoPoint:asc",
|
||||||
|
"_geoDistance:asc",
|
||||||
|
"_geoPoint(42.12 , 59.598)",
|
||||||
|
"_geoPoint(42.12 , 59.598):deesc",
|
||||||
|
"_geoPoint(42.12 , 59.598):machin",
|
||||||
|
"_geoPoint(42.12 , 59.598):asc:aasc",
|
||||||
|
"_geoPoint(42,12 , 59,598):desc",
|
||||||
|
"_geoPoint(35, 85, 75):asc",
|
||||||
|
"_geoPoint(18):asc",
|
||||||
|
];
|
||||||
|
|
||||||
|
for req in invalid_req {
|
||||||
|
let res = req.parse::<AscDesc>();
|
||||||
|
assert!(
|
||||||
|
res.is_err(),
|
||||||
|
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
|
||||||
|
req,
|
||||||
|
res,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -12,6 +12,10 @@ use crate::{DocumentId, FieldId};
|
|||||||
|
|
||||||
pub type Object = Map<String, Value>;
|
pub type Object = Map<String, Value>;
|
||||||
|
|
||||||
|
pub fn is_reserved_keyword(keyword: &str) -> bool {
|
||||||
|
["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
InternalError(InternalError),
|
InternalError(InternalError),
|
||||||
@ -54,12 +58,14 @@ pub enum UserError {
|
|||||||
Csv(csv::Error),
|
Csv(csv::Error),
|
||||||
DocumentLimitReached,
|
DocumentLimitReached,
|
||||||
InvalidAscDescSyntax { name: String },
|
InvalidAscDescSyntax { name: String },
|
||||||
InvalidCriterionName { name: String },
|
|
||||||
InvalidDocumentId { document_id: Value },
|
InvalidDocumentId { document_id: Value },
|
||||||
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
|
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
|
||||||
InvalidFilter(pest::error::Error<ParserRule>),
|
InvalidFilter(pest::error::Error<ParserRule>),
|
||||||
InvalidFilterAttribute(pest::error::Error<ParserRule>),
|
InvalidFilterAttribute(pest::error::Error<ParserRule>),
|
||||||
InvalidSortName { name: String },
|
InvalidSortName { name: String },
|
||||||
|
InvalidGeoField { document_id: Value, object: Value },
|
||||||
|
InvalidRankingRuleName { name: String },
|
||||||
|
InvalidReservedRankingRuleName { name: String },
|
||||||
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
|
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
|
||||||
SortRankingRuleMissing,
|
SortRankingRuleMissing,
|
||||||
InvalidStoreFile,
|
InvalidStoreFile,
|
||||||
@ -221,7 +227,15 @@ impl fmt::Display for UserError {
|
|||||||
Self::InvalidAscDescSyntax { name } => {
|
Self::InvalidAscDescSyntax { name } => {
|
||||||
write!(f, "invalid asc/desc syntax for {}", name)
|
write!(f, "invalid asc/desc syntax for {}", name)
|
||||||
}
|
}
|
||||||
Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
|
Self::InvalidGeoField { document_id, object } => write!(
|
||||||
|
f,
|
||||||
|
"the document with the id: {} contains an invalid _geo field: {}",
|
||||||
|
document_id, object
|
||||||
|
),
|
||||||
|
Self::InvalidRankingRuleName { name } => write!(f, "invalid criterion {}", name),
|
||||||
|
Self::InvalidReservedRankingRuleName { name } => {
|
||||||
|
write!(f, "{} is a reserved keyword and thus can't be used as a ranking rule", name)
|
||||||
|
}
|
||||||
Self::InvalidDocumentId { document_id } => {
|
Self::InvalidDocumentId { document_id } => {
|
||||||
let json = serde_json::to_string(document_id).unwrap();
|
let json = serde_json::to_string(document_id).unwrap();
|
||||||
write!(
|
write!(
|
||||||
|
@ -8,6 +8,7 @@ use heed::flags::Flags;
|
|||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use heed::{Database, PolyDatabase, RoTxn, RwTxn};
|
use heed::{Database, PolyDatabase, RoTxn, RwTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use rstar::RTree;
|
||||||
|
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
@ -18,8 +19,8 @@ use crate::heed_codec::facet::{
|
|||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||||
FieldIdWordCountCodec, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search,
|
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||||
StrLevelPositionCodec, StrStrU8Codec, BEU32,
|
Search, StrLevelPositionCodec, StrStrU8Codec, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub mod main_key {
|
pub mod main_key {
|
||||||
@ -31,6 +32,8 @@ pub mod main_key {
|
|||||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||||
|
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||||
|
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||||
@ -294,6 +297,64 @@ impl Index {
|
|||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* geo rtree */
|
||||||
|
|
||||||
|
/// Writes the provided `rtree` which associates coordinates to documents ids.
|
||||||
|
pub(crate) fn put_geo_rtree(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
rtree: &RTree<GeoPoint>,
|
||||||
|
) -> heed::Result<()> {
|
||||||
|
self.main.put::<_, Str, SerdeBincode<RTree<GeoPoint>>>(wtxn, main_key::GEO_RTREE_KEY, rtree)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete the `rtree` which associates coordinates to documents ids.
|
||||||
|
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the `rtree` which associates coordinates to documents ids.
|
||||||
|
pub fn geo_rtree<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<RTree<GeoPoint>>> {
|
||||||
|
match self
|
||||||
|
.main
|
||||||
|
.get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)?
|
||||||
|
{
|
||||||
|
Some(rtree) => Ok(Some(rtree)),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* geo faceted */
|
||||||
|
|
||||||
|
/// Writes the documents ids that are faceted with a _geo field.
|
||||||
|
pub(crate) fn put_geo_faceted_documents_ids(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
docids: &RoaringBitmap,
|
||||||
|
) -> heed::Result<()> {
|
||||||
|
self.main.put::<_, Str, RoaringBitmapCodec>(
|
||||||
|
wtxn,
|
||||||
|
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||||
|
docids,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete the documents ids that are faceted with a _geo field.
|
||||||
|
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retrieve all the documents ids that are faceted with a _geo field.
|
||||||
|
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
||||||
|
match self
|
||||||
|
.main
|
||||||
|
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
|
||||||
|
{
|
||||||
|
Some(docids) => Ok(docids),
|
||||||
|
None => Ok(RoaringBitmap::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* field distribution */
|
/* field distribution */
|
||||||
|
|
||||||
/// Writes the field distribution which associates every field name with
|
/// Writes the field distribution which associates every field name with
|
||||||
|
@ -21,7 +21,7 @@ use fxhash::{FxHasher32, FxHasher64};
|
|||||||
pub use grenad::CompressionType;
|
pub use grenad::CompressionType;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
pub use self::criterion::{default_criteria, AscDesc, Criterion};
|
pub use self::criterion::{default_criteria, AscDesc, Criterion, Member};
|
||||||
pub use self::error::{
|
pub use self::error::{
|
||||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||||
};
|
};
|
||||||
@ -51,6 +51,7 @@ pub type DocumentId = u32;
|
|||||||
pub type FieldId = u16;
|
pub type FieldId = u16;
|
||||||
pub type Position = u32;
|
pub type Position = u32;
|
||||||
pub type FieldDistribution = BTreeMap<String, u64>;
|
pub type FieldDistribution = BTreeMap<String, u64>;
|
||||||
|
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>;
|
||||||
|
|
||||||
/// Transform a raw obkv store into a JSON Object.
|
/// Transform a raw obkv store into a JSON Object.
|
||||||
pub fn obkv_to_json(
|
pub fn obkv_to_json(
|
||||||
@ -141,6 +142,15 @@ where
|
|||||||
Some((head, tail))
|
Some((head, tail))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the distance between two points in meters. Each points are composed of two f64,
|
||||||
|
/// one latitude and one longitude.
|
||||||
|
pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
|
||||||
|
let a = geoutils::Location::new(a[0], a[1]);
|
||||||
|
let b = geoutils::Location::new(b[0], b[1]);
|
||||||
|
|
||||||
|
a.haversine_distance_to(&b).meters()
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
150
milli/src/search/criteria/geo.rs
Normal file
150
milli/src/search/criteria/geo.rs
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
use std::iter;
|
||||||
|
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use rstar::RTree;
|
||||||
|
|
||||||
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
|
use crate::{GeoPoint, Index, Result};
|
||||||
|
|
||||||
|
pub struct Geo<'t> {
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
|
ascending: bool,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
|
||||||
|
allowed_candidates: RoaringBitmap,
|
||||||
|
bucket_candidates: RoaringBitmap,
|
||||||
|
rtree: Option<RTree<GeoPoint>>,
|
||||||
|
point: [f64; 2],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Geo<'t> {
|
||||||
|
pub fn asc(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
point: [f64; 2],
|
||||||
|
) -> Result<Self> {
|
||||||
|
Self::new(index, rtxn, parent, point, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn desc(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
point: [f64; 2],
|
||||||
|
) -> Result<Self> {
|
||||||
|
Self::new(index, rtxn, parent, point, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
point: [f64; 2],
|
||||||
|
ascending: bool,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let candidates = Box::new(iter::empty());
|
||||||
|
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
|
||||||
|
let bucket_candidates = RoaringBitmap::new();
|
||||||
|
let rtree = index.geo_rtree(rtxn)?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
index,
|
||||||
|
rtxn,
|
||||||
|
ascending,
|
||||||
|
parent,
|
||||||
|
candidates,
|
||||||
|
allowed_candidates,
|
||||||
|
bucket_candidates,
|
||||||
|
rtree,
|
||||||
|
point,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Criterion for Geo<'_> {
|
||||||
|
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||||
|
let rtree = self.rtree.as_ref();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match self.candidates.next() {
|
||||||
|
Some(mut candidates) => {
|
||||||
|
candidates -= params.excluded_candidates;
|
||||||
|
self.allowed_candidates -= &candidates;
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: None,
|
||||||
|
candidates: Some(candidates),
|
||||||
|
filtered_candidates: None,
|
||||||
|
bucket_candidates: Some(self.bucket_candidates.clone()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
None => match self.parent.next(params)? {
|
||||||
|
Some(CriterionResult {
|
||||||
|
query_tree,
|
||||||
|
candidates,
|
||||||
|
filtered_candidates,
|
||||||
|
bucket_candidates,
|
||||||
|
}) => {
|
||||||
|
let mut candidates = match (&query_tree, candidates) {
|
||||||
|
(_, Some(candidates)) => candidates,
|
||||||
|
(Some(qt), None) => {
|
||||||
|
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
||||||
|
resolve_query_tree(&context, qt, params.wdcache)?
|
||||||
|
}
|
||||||
|
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(filtered_candidates) = filtered_candidates {
|
||||||
|
candidates &= filtered_candidates;
|
||||||
|
}
|
||||||
|
|
||||||
|
match bucket_candidates {
|
||||||
|
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||||
|
None => self.bucket_candidates |= &candidates,
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidates.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||||
|
self.candidates = match rtree {
|
||||||
|
Some(rtree) => geo_point(
|
||||||
|
rtree,
|
||||||
|
self.allowed_candidates.clone(),
|
||||||
|
self.point,
|
||||||
|
self.ascending,
|
||||||
|
),
|
||||||
|
None => Box::new(std::iter::empty()),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
None => return Ok(None),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn geo_point(
|
||||||
|
rtree: &RTree<GeoPoint>,
|
||||||
|
mut candidates: RoaringBitmap,
|
||||||
|
point: [f64; 2],
|
||||||
|
ascending: bool,
|
||||||
|
) -> Box<dyn Iterator<Item = RoaringBitmap>> {
|
||||||
|
let mut results = Vec::new();
|
||||||
|
for point in rtree.nearest_neighbor_iter(&point) {
|
||||||
|
if candidates.remove(point.data) {
|
||||||
|
results.push(std::iter::once(point.data).collect());
|
||||||
|
if candidates.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ascending {
|
||||||
|
Box::new(results.into_iter())
|
||||||
|
} else {
|
||||||
|
Box::new(results.into_iter().rev())
|
||||||
|
}
|
||||||
|
}
|
@ -12,7 +12,8 @@ use self::r#final::Final;
|
|||||||
use self::typo::Typo;
|
use self::typo::Typo;
|
||||||
use self::words::Words;
|
use self::words::Words;
|
||||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||||
use crate::criterion::AscDesc as AscDescName;
|
use crate::criterion::{AscDesc as AscDescName, Member};
|
||||||
|
use crate::search::criteria::geo::Geo;
|
||||||
use crate::search::{word_derivations, WordDerivationsCache};
|
use crate::search::{word_derivations, WordDerivationsCache};
|
||||||
use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
|
use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
|
||||||
|
|
||||||
@ -20,6 +21,7 @@ mod asc_desc;
|
|||||||
mod attribute;
|
mod attribute;
|
||||||
mod exactness;
|
mod exactness;
|
||||||
pub mod r#final;
|
pub mod r#final;
|
||||||
|
mod geo;
|
||||||
mod initial;
|
mod initial;
|
||||||
mod proximity;
|
mod proximity;
|
||||||
mod typo;
|
mod typo;
|
||||||
@ -290,18 +292,30 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
Some(ref sort_criteria) => {
|
Some(ref sort_criteria) => {
|
||||||
for asc_desc in sort_criteria {
|
for asc_desc in sort_criteria {
|
||||||
criterion = match asc_desc {
|
criterion = match asc_desc {
|
||||||
AscDescName::Asc(field) => Box::new(AscDesc::asc(
|
AscDescName::Asc(Member::Field(field)) => Box::new(AscDesc::asc(
|
||||||
&self.index,
|
&self.index,
|
||||||
&self.rtxn,
|
&self.rtxn,
|
||||||
criterion,
|
criterion,
|
||||||
field.to_string(),
|
field.to_string(),
|
||||||
)?),
|
)?),
|
||||||
AscDescName::Desc(field) => Box::new(AscDesc::desc(
|
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
|
||||||
&self.index,
|
&self.index,
|
||||||
&self.rtxn,
|
&self.rtxn,
|
||||||
criterion,
|
criterion,
|
||||||
field.to_string(),
|
field.to_string(),
|
||||||
)?),
|
)?),
|
||||||
|
AscDescName::Asc(Member::Geo(point)) => Box::new(Geo::asc(
|
||||||
|
&self.index,
|
||||||
|
&self.rtxn,
|
||||||
|
criterion,
|
||||||
|
point.clone(),
|
||||||
|
)?),
|
||||||
|
AscDescName::Desc(Member::Geo(point)) => Box::new(Geo::desc(
|
||||||
|
&self.index,
|
||||||
|
&self.rtxn,
|
||||||
|
criterion,
|
||||||
|
point.clone(),
|
||||||
|
)?),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
criterion
|
criterion
|
||||||
|
@ -21,7 +21,9 @@ use crate::error::UserError;
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||||
};
|
};
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result};
|
use crate::{
|
||||||
|
distance_between_two_points, CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum Operator {
|
pub enum Operator {
|
||||||
@ -32,6 +34,8 @@ pub enum Operator {
|
|||||||
LowerThan(f64),
|
LowerThan(f64),
|
||||||
LowerThanOrEqual(f64),
|
LowerThanOrEqual(f64),
|
||||||
Between(f64, f64),
|
Between(f64, f64),
|
||||||
|
GeoLowerThan([f64; 2], f64),
|
||||||
|
GeoGreaterThan([f64; 2], f64),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Operator {
|
impl Operator {
|
||||||
@ -46,6 +50,8 @@ impl Operator {
|
|||||||
LowerThan(n) => (GreaterThanOrEqual(n), None),
|
LowerThan(n) => (GreaterThanOrEqual(n), None),
|
||||||
LowerThanOrEqual(n) => (GreaterThan(n), None),
|
LowerThanOrEqual(n) => (GreaterThan(n), None),
|
||||||
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
|
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
|
||||||
|
GeoLowerThan(point, distance) => (GeoGreaterThan(point, distance), None),
|
||||||
|
GeoGreaterThan(point, distance) => (GeoLowerThan(point, distance), None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -131,6 +137,7 @@ impl FilterCondition {
|
|||||||
Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?),
|
Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?),
|
||||||
Rule::less => Ok(Self::lower_than(fim, ff, pair)?),
|
Rule::less => Ok(Self::lower_than(fim, ff, pair)?),
|
||||||
Rule::between => Ok(Self::between(fim, ff, pair)?),
|
Rule::between => Ok(Self::between(fim, ff, pair)?),
|
||||||
|
Rule::geo_radius => Ok(Self::geo_radius(fim, ff, pair)?),
|
||||||
Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()),
|
Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()),
|
||||||
Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()),
|
Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||||
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||||
@ -156,6 +163,65 @@ impl FilterCondition {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn geo_radius(
|
||||||
|
fields_ids_map: &FieldsIdsMap,
|
||||||
|
filterable_fields: &HashSet<String>,
|
||||||
|
item: Pair<Rule>,
|
||||||
|
) -> Result<FilterCondition> {
|
||||||
|
if !filterable_fields.contains("_geo") {
|
||||||
|
return Err(UserError::InvalidFilterAttribute(PestError::new_from_span(
|
||||||
|
ErrorVariant::CustomError {
|
||||||
|
message: format!(
|
||||||
|
"attribute `_geo` is not filterable, available filterable attributes are: {}",
|
||||||
|
filterable_fields.iter().join(", "),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
item.as_span(),
|
||||||
|
)))?;
|
||||||
|
}
|
||||||
|
let mut items = item.into_inner();
|
||||||
|
let fid = match fields_ids_map.id("_geo") {
|
||||||
|
Some(fid) => fid,
|
||||||
|
None => return Ok(Empty),
|
||||||
|
};
|
||||||
|
let parameters_item = items.next().unwrap();
|
||||||
|
// We don't need more than 3 parameters, but to handle errors correctly we are still going
|
||||||
|
// to extract the first 4 parameters
|
||||||
|
let param_span = parameters_item.as_span();
|
||||||
|
let parameters = parameters_item
|
||||||
|
.into_inner()
|
||||||
|
.take(4)
|
||||||
|
.map(|param| (param.clone(), param.as_span()))
|
||||||
|
.map(|(param, span)| pest_parse(param).0.map(|arg| (arg, span)))
|
||||||
|
.collect::<StdResult<Vec<(f64, _)>, _>>()
|
||||||
|
.map_err(UserError::InvalidFilter)?;
|
||||||
|
if parameters.len() != 3 {
|
||||||
|
return Err(UserError::InvalidFilter(PestError::new_from_span(
|
||||||
|
ErrorVariant::CustomError {
|
||||||
|
message: format!("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"),
|
||||||
|
},
|
||||||
|
// we want to point to the last parameters and if there was no parameters we
|
||||||
|
// point to the parenthesis
|
||||||
|
parameters.last().map(|param| param.1.clone()).unwrap_or(param_span),
|
||||||
|
)))?;
|
||||||
|
}
|
||||||
|
let (lat, lng, distance) = (¶meters[0], ¶meters[1], parameters[2].0);
|
||||||
|
if let Some(span) = (!(-181.0..181.).contains(&lat.0))
|
||||||
|
.then(|| &lat.1)
|
||||||
|
.or((!(-181.0..181.).contains(&lng.0)).then(|| &lng.1))
|
||||||
|
{
|
||||||
|
return Err(UserError::InvalidFilter(PestError::new_from_span(
|
||||||
|
ErrorVariant::CustomError {
|
||||||
|
message: format!(
|
||||||
|
"Latitude and longitude must be contained between -180 to 180 degrees."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
span.clone(),
|
||||||
|
)))?;
|
||||||
|
}
|
||||||
|
Ok(Operator(fid, GeoLowerThan([lat.0, lng.0], distance)))
|
||||||
|
}
|
||||||
|
|
||||||
fn between(
|
fn between(
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
filterable_fields: &HashSet<String>,
|
filterable_fields: &HashSet<String>,
|
||||||
@ -440,6 +506,34 @@ impl FilterCondition {
|
|||||||
LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
|
LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
|
||||||
LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)),
|
LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)),
|
||||||
Between(left, right) => (Included(*left), Included(*right)),
|
Between(left, right) => (Included(*left), Included(*right)),
|
||||||
|
GeoLowerThan(base_point, distance) => {
|
||||||
|
let rtree = match index.geo_rtree(rtxn)? {
|
||||||
|
Some(rtree) => rtree,
|
||||||
|
None => return Ok(RoaringBitmap::new()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = rtree
|
||||||
|
.nearest_neighbor_iter(base_point)
|
||||||
|
.take_while(|point| {
|
||||||
|
distance_between_two_points(base_point, point.geom()) < *distance
|
||||||
|
})
|
||||||
|
.map(|point| point.data)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
GeoGreaterThan(point, distance) => {
|
||||||
|
let result = Self::evaluate_operator(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
numbers_db,
|
||||||
|
strings_db,
|
||||||
|
field_id,
|
||||||
|
&GeoLowerThan(point.clone(), *distance),
|
||||||
|
)?;
|
||||||
|
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?;
|
||||||
|
return Ok(geo_faceted_doc_ids - result);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Ask for the biggest value that can exist for this specific field, if it exists
|
// Ask for the biggest value that can exist for this specific field, if it exists
|
||||||
@ -505,6 +599,19 @@ fn field_id(
|
|||||||
) -> StdResult<Option<FieldId>, PestError<Rule>> {
|
) -> StdResult<Option<FieldId>, PestError<Rule>> {
|
||||||
// lexing ensures that we at least have a key
|
// lexing ensures that we at least have a key
|
||||||
let key = items.next().unwrap();
|
let key = items.next().unwrap();
|
||||||
|
if key.as_rule() == Rule::reserved {
|
||||||
|
return Err(PestError::new_from_span(
|
||||||
|
ErrorVariant::CustomError {
|
||||||
|
message: format!(
|
||||||
|
"`{}` is a reserved keyword and therefore can't be used as a filter expression. \
|
||||||
|
Available filterable attributes are: {}",
|
||||||
|
key.as_str(),
|
||||||
|
filterable_fields.iter().join(", "),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
key.as_span(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
if !filterable_fields.contains(key.as_str()) {
|
if !filterable_fields.contains(key.as_str()) {
|
||||||
return Err(PestError::new_from_span(
|
return Err(PestError::new_from_span(
|
||||||
@ -581,6 +688,13 @@ mod tests {
|
|||||||
let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||||
let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
|
let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
|
||||||
assert_eq!(condition, expected);
|
assert_eq!(condition, expected);
|
||||||
|
|
||||||
|
let result = FilterCondition::from_str(&rtxn, &index, "_geo = France");
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error = result.unwrap_err();
|
||||||
|
assert!(error.to_string().contains(
|
||||||
|
"`_geo` is a reserved keyword and therefore can't be used as a filter expression."
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -663,6 +777,92 @@ mod tests {
|
|||||||
assert_eq!(condition, expected);
|
assert_eq!(condition, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn geo_radius() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// Set the filterable fields to be the channel.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
|
||||||
|
builder.set_filterable_fields(hashset! { S("_geo"), S("price") });
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
// basic test
|
||||||
|
let condition =
|
||||||
|
FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap();
|
||||||
|
let expected = Operator(0, GeoLowerThan([12., 13.0005], 2000.));
|
||||||
|
assert_eq!(condition, expected);
|
||||||
|
|
||||||
|
// test the negation of the GeoLowerThan
|
||||||
|
let condition =
|
||||||
|
FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap();
|
||||||
|
let expected = Operator(0, GeoGreaterThan([50., 18.], 2000.500));
|
||||||
|
assert_eq!(condition, expected);
|
||||||
|
|
||||||
|
// composition of multiple operations
|
||||||
|
let condition = FilterCondition::from_str(
|
||||||
|
&rtxn,
|
||||||
|
&index,
|
||||||
|
"(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let expected = Or(
|
||||||
|
Box::new(And(
|
||||||
|
Box::new(Operator(0, GeoGreaterThan([1., 2.], 300.))),
|
||||||
|
Box::new(Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))),
|
||||||
|
)),
|
||||||
|
Box::new(Operator(1, LowerThanOrEqual(10.))),
|
||||||
|
);
|
||||||
|
assert_eq!(condition, expected);
|
||||||
|
|
||||||
|
// georadius don't have any parameters
|
||||||
|
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius");
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error = result.unwrap_err();
|
||||||
|
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||||
|
|
||||||
|
// georadius don't have any parameters
|
||||||
|
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()");
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error = result.unwrap_err();
|
||||||
|
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||||
|
|
||||||
|
// georadius don't have enough parameters
|
||||||
|
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)");
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error = result.unwrap_err();
|
||||||
|
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||||
|
|
||||||
|
// georadius have too many parameters
|
||||||
|
let result =
|
||||||
|
FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)");
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error = result.unwrap_err();
|
||||||
|
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||||
|
|
||||||
|
// georadius have a bad latitude
|
||||||
|
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-200, 150, 10)");
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error = result.unwrap_err();
|
||||||
|
assert!(error
|
||||||
|
.to_string()
|
||||||
|
.contains("Latitude and longitude must be contained between -180 to 180 degrees."));
|
||||||
|
|
||||||
|
// georadius have a bad longitude
|
||||||
|
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 181, 10)");
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error = result.unwrap_err();
|
||||||
|
assert!(error
|
||||||
|
.to_string()
|
||||||
|
.contains("Latitude and longitude must be contained between -180 to 180 degrees."));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_array() {
|
fn from_array() {
|
||||||
let path = tempfile::tempdir().unwrap();
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
key = _{quoted | word}
|
key = _{reserved | quoted | word }
|
||||||
value = _{quoted | word }
|
value = _{quoted | word }
|
||||||
quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP }
|
quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP }
|
||||||
string = {char*}
|
string = {char*}
|
||||||
@ -8,6 +8,9 @@ char = _{ !(PEEK | "\\") ~ ANY
|
|||||||
| "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
|
| "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
|
||||||
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})}
|
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})}
|
||||||
|
|
||||||
|
reserved = { "_geo" | "_geoDistance" | "_geoPoint" | ("_geoPoint" ~ parameters) }
|
||||||
|
// we deliberately choose to allow empty parameters to generate more specific error message later
|
||||||
|
parameters = {("(" ~ (value ~ ",")* ~ value? ~ ")") | ""}
|
||||||
condition = _{between | eq | greater | less | geq | leq | neq}
|
condition = _{between | eq | greater | less | geq | leq | neq}
|
||||||
between = {key ~ value ~ "TO" ~ value}
|
between = {key ~ value ~ "TO" ~ value}
|
||||||
geq = {key ~ ">=" ~ value}
|
geq = {key ~ ">=" ~ value}
|
||||||
@ -16,10 +19,11 @@ neq = {key ~ "!=" ~ value}
|
|||||||
eq = {key ~ "=" ~ value}
|
eq = {key ~ "=" ~ value}
|
||||||
greater = {key ~ ">" ~ value}
|
greater = {key ~ ">" ~ value}
|
||||||
less = {key ~ "<" ~ value}
|
less = {key ~ "<" ~ value}
|
||||||
|
geo_radius = {"_geoRadius" ~ parameters }
|
||||||
|
|
||||||
prgm = {SOI ~ expr ~ EOI}
|
prgm = {SOI ~ expr ~ EOI}
|
||||||
expr = _{ ( term ~ (operation ~ term)* ) }
|
expr = _{ ( term ~ (operation ~ term)* ) }
|
||||||
term = { ("(" ~ expr ~ ")") | condition | not }
|
term = { ("(" ~ expr ~ ")") | condition | not | geo_radius }
|
||||||
operation = _{ and | or }
|
operation = _{ and | or }
|
||||||
and = {"AND"}
|
and = {"AND"}
|
||||||
or = {"OR"}
|
or = {"OR"}
|
||||||
|
@ -148,7 +148,8 @@ impl<'a> Search<'a> {
|
|||||||
if let Some(sort_criteria) = &self.sort_criteria {
|
if let Some(sort_criteria) = &self.sort_criteria {
|
||||||
let sortable_fields = self.index.sortable_fields(self.rtxn)?;
|
let sortable_fields = self.index.sortable_fields(self.rtxn)?;
|
||||||
for asc_desc in sort_criteria {
|
for asc_desc in sort_criteria {
|
||||||
let field = asc_desc.field();
|
// we are not supposed to find any geoPoint in the criterion
|
||||||
|
if let Some(field) = asc_desc.field() {
|
||||||
if !sortable_fields.contains(field) {
|
if !sortable_fields.contains(field) {
|
||||||
return Err(UserError::InvalidSortableAttribute {
|
return Err(UserError::InvalidSortableAttribute {
|
||||||
field: field.to_string(),
|
field: field.to_string(),
|
||||||
@ -158,6 +159,7 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We check that the sort ranking rule exists and throw an
|
// We check that the sort ranking rule exists and throw an
|
||||||
// error if we try to use it and that it doesn't.
|
// error if we try to use it and that it doesn't.
|
||||||
|
@ -48,6 +48,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
||||||
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
||||||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||||
|
self.index.delete_geo_rtree(self.wtxn)?;
|
||||||
|
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
||||||
|
|
||||||
// We clean all the faceted documents ids.
|
// We clean all the faceted documents ids.
|
||||||
let empty = RoaringBitmap::default();
|
let empty = RoaringBitmap::default();
|
||||||
@ -93,7 +95,7 @@ mod tests {
|
|||||||
let content = &br#"[
|
let content = &br#"[
|
||||||
{ "id": 0, "name": "kevin", "age": 20 },
|
{ "id": 0, "name": "kevin", "age": 20 },
|
||||||
{ "id": 1, "name": "kevina" },
|
{ "id": 1, "name": "kevina" },
|
||||||
{ "id": 2, "name": "benoit", "country": "France" }
|
{ "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } }
|
||||||
]"#[..];
|
]"#[..];
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
builder.update_format(UpdateFormat::Json);
|
builder.update_format(UpdateFormat::Json);
|
||||||
@ -107,13 +109,15 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 4);
|
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 5);
|
||||||
|
|
||||||
assert!(index.words_fst(&rtxn).unwrap().is_empty());
|
assert!(index.words_fst(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
|
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||||
|
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
||||||
|
assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
|
||||||
|
|
||||||
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
||||||
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
||||||
|
@ -380,6 +380,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
|
|
||||||
drop(iter);
|
drop(iter);
|
||||||
|
|
||||||
|
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
|
||||||
|
let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?;
|
||||||
|
|
||||||
|
let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree
|
||||||
|
.iter()
|
||||||
|
.filter(|&point| self.documents_ids.contains(point.data))
|
||||||
|
.cloned()
|
||||||
|
.map(|point| (point, point.data))
|
||||||
|
.unzip();
|
||||||
|
points_to_remove.iter().for_each(|point| {
|
||||||
|
rtree.remove(&point);
|
||||||
|
});
|
||||||
|
geo_faceted_doc_ids -= docids_to_remove;
|
||||||
|
|
||||||
|
self.index.put_geo_rtree(self.wtxn, &rtree)?;
|
||||||
|
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
||||||
|
}
|
||||||
|
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
remove_docids_from_facet_field_id_number_docids(
|
remove_docids_from_facet_field_id_number_docids(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
@ -542,6 +560,8 @@ where
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
@ -657,4 +677,95 @@ mod tests {
|
|||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn delete_documents_with_geo_points() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.set_primary_key(S("id"));
|
||||||
|
builder.set_filterable_fields(hashset!(S("_geo")));
|
||||||
|
builder.set_sortable_fields(hashset!(S("_geo")));
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
|
||||||
|
let content = &r#"[
|
||||||
|
{"id":"1","city":"Lille", "_geo": { "lat": 50.629973371633746, "lng": 3.0569447399419570 } },
|
||||||
|
{"id":"2","city":"Mons-en-Barœul", "_geo": { "lat": 50.641586120121050, "lng": 3.1106593480348670 } },
|
||||||
|
{"id":"3","city":"Hellemmes", "_geo": { "lat": 50.631220965518080, "lng": 3.1106399673339933 } },
|
||||||
|
{"id":"4","city":"Villeneuve-d'Ascq", "_geo": { "lat": 50.622468098014565, "lng": 3.1476425513437140 } },
|
||||||
|
{"id":"5","city":"Hem", "_geo": { "lat": 50.655250871381355, "lng": 3.1897297266244130 } },
|
||||||
|
{"id":"6","city":"Roubaix", "_geo": { "lat": 50.692473451896710, "lng": 3.1763326737747650 } },
|
||||||
|
{"id":"7","city":"Tourcoing", "_geo": { "lat": 50.726397466736480, "lng": 3.1541653659578670 } },
|
||||||
|
{"id":"8","city":"Mouscron", "_geo": { "lat": 50.745325554908610, "lng": 3.2206407854429853 } },
|
||||||
|
{"id":"9","city":"Tournai", "_geo": { "lat": 50.605342528602630, "lng": 3.3758586941351414 } },
|
||||||
|
{"id":"10","city":"Ghent", "_geo": { "lat": 51.053777403679035, "lng": 3.6957733119926930 } },
|
||||||
|
{"id":"11","city":"Brussels", "_geo": { "lat": 50.846640974544690, "lng": 4.3370663564281840 } },
|
||||||
|
{"id":"12","city":"Charleroi", "_geo": { "lat": 50.409570138889480, "lng": 4.4347354315085520 } },
|
||||||
|
{"id":"13","city":"Mons", "_geo": { "lat": 50.450294178855420, "lng": 3.9623722870904690 } },
|
||||||
|
{"id":"14","city":"Valenciennes", "_geo": { "lat": 50.351817774473545, "lng": 3.5326283646928800 } },
|
||||||
|
{"id":"15","city":"Arras", "_geo": { "lat": 50.284487528579950, "lng": 2.7637515844478160 } },
|
||||||
|
{"id":"16","city":"Cambrai", "_geo": { "lat": 50.179340577906700, "lng": 3.2189409952502930 } },
|
||||||
|
{"id":"17","city":"Bapaume", "_geo": { "lat": 50.111276127236400, "lng": 2.8547894666083120 } },
|
||||||
|
{"id":"18","city":"Amiens", "_geo": { "lat": 49.931472529669996, "lng": 2.2710499758317080 } },
|
||||||
|
{"id":"19","city":"Compiègne", "_geo": { "lat": 49.444980887725656, "lng": 2.7913841281529015 } },
|
||||||
|
{"id":"20","city":"Paris", "_geo": { "lat": 48.902100060895480, "lng": 2.3708400867406930 } }
|
||||||
|
]"#[..];
|
||||||
|
let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
|
||||||
|
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.execute(content.as_bytes(), |_, _| ()).unwrap();
|
||||||
|
|
||||||
|
let external_document_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||||
|
let ids_to_delete: Vec<u32> = external_ids_to_delete
|
||||||
|
.iter()
|
||||||
|
.map(|id| external_document_ids.get(id.as_bytes()).unwrap())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Delete some documents.
|
||||||
|
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
|
||||||
|
external_ids_to_delete.iter().for_each(|id| drop(builder.delete_external_id(id)));
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let rtree = index.geo_rtree(&rtxn).unwrap().unwrap();
|
||||||
|
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||||
|
|
||||||
|
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
|
||||||
|
let all_geo_documents = index
|
||||||
|
.documents(&rtxn, all_geo_ids.iter().copied())
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|(id, _)| *id)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
|
let all_geo_faceted_ids = geo_faceted_doc_ids.iter().collect::<Vec<_>>();
|
||||||
|
let all_geo_faceted_documents = index
|
||||||
|
.documents(&rtxn, all_geo_faceted_ids.iter().copied())
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|(id, _)| *id)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
all_geo_documents, all_geo_faceted_documents,
|
||||||
|
"There is an inconsistency between the geo_faceted database and the rtree"
|
||||||
|
);
|
||||||
|
|
||||||
|
for id in all_geo_documents.iter() {
|
||||||
|
assert!(!ids_to_delete.contains(&id), "The document {} was supposed to be deleted", id);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
all_geo_ids.len(),
|
||||||
|
all_geo_documents.len(),
|
||||||
|
"We deleted documents that were not supposed to be deleted"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,44 @@
|
|||||||
|
use std::fs::File;
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
use concat_arrays::concat_arrays;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||||
|
use crate::{FieldId, InternalError, Result, UserError};
|
||||||
|
|
||||||
|
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||||
|
///
|
||||||
|
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
|
||||||
|
pub fn extract_geo_points<R: io::Read>(
|
||||||
|
mut obkv_documents: grenad::Reader<R>,
|
||||||
|
indexer: GrenadParameters,
|
||||||
|
primary_key_id: FieldId,
|
||||||
|
geo_field_id: FieldId,
|
||||||
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
let mut writer = tempfile::tempfile().and_then(|file| {
|
||||||
|
create_writer(indexer.chunk_compression_type, indexer.chunk_compression_level, file)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
while let Some((docid_bytes, value)) = obkv_documents.next()? {
|
||||||
|
let obkv = obkv::KvReader::new(value);
|
||||||
|
let point: Value = match obkv.get(geo_field_id) {
|
||||||
|
Some(point) => serde_json::from_slice(point).map_err(InternalError::SerdeJson)?,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) {
|
||||||
|
// this will create an array of 16 bytes (two 8 bytes floats)
|
||||||
|
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||||
|
writer.insert(docid_bytes, bytes)?;
|
||||||
|
} else {
|
||||||
|
// All document must have a primary key so we can unwrap safely here
|
||||||
|
let primary_key = obkv.get(primary_key_id).unwrap();
|
||||||
|
let primary_key =
|
||||||
|
serde_json::from_slice(primary_key).map_err(InternalError::SerdeJson)?;
|
||||||
|
Err(UserError::InvalidGeoField { document_id: primary_key, object: point })?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(writer_into_reader(writer)?)
|
||||||
|
}
|
@ -3,6 +3,7 @@ mod extract_facet_number_docids;
|
|||||||
mod extract_facet_string_docids;
|
mod extract_facet_string_docids;
|
||||||
mod extract_fid_docid_facet_values;
|
mod extract_fid_docid_facet_values;
|
||||||
mod extract_fid_word_count_docids;
|
mod extract_fid_word_count_docids;
|
||||||
|
mod extract_geo_points;
|
||||||
mod extract_word_docids;
|
mod extract_word_docids;
|
||||||
mod extract_word_level_position_docids;
|
mod extract_word_level_position_docids;
|
||||||
mod extract_word_pair_proximity_docids;
|
mod extract_word_pair_proximity_docids;
|
||||||
@ -19,6 +20,7 @@ use self::extract_facet_number_docids::extract_facet_number_docids;
|
|||||||
use self::extract_facet_string_docids::extract_facet_string_docids;
|
use self::extract_facet_string_docids::extract_facet_string_docids;
|
||||||
use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values;
|
use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values;
|
||||||
use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
|
use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
|
||||||
|
use self::extract_geo_points::extract_geo_points;
|
||||||
use self::extract_word_docids::extract_word_docids;
|
use self::extract_word_docids::extract_word_docids;
|
||||||
use self::extract_word_level_position_docids::extract_word_level_position_docids;
|
use self::extract_word_level_position_docids::extract_word_level_position_docids;
|
||||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
||||||
@ -37,6 +39,8 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
searchable_fields: Option<HashSet<FieldId>>,
|
searchable_fields: Option<HashSet<FieldId>>,
|
||||||
faceted_fields: HashSet<FieldId>,
|
faceted_fields: HashSet<FieldId>,
|
||||||
|
primary_key_id: FieldId,
|
||||||
|
geo_field_id: Option<FieldId>,
|
||||||
stop_words: Option<fst::Set<&[u8]>>,
|
stop_words: Option<fst::Set<&[u8]>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
||||||
@ -48,6 +52,8 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
&searchable_fields,
|
&searchable_fields,
|
||||||
&faceted_fields,
|
&faceted_fields,
|
||||||
|
primary_key_id,
|
||||||
|
geo_field_id,
|
||||||
&stop_words,
|
&stop_words,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@ -168,6 +174,8 @@ fn extract_documents_data(
|
|||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
searchable_fields: &Option<HashSet<FieldId>>,
|
searchable_fields: &Option<HashSet<FieldId>>,
|
||||||
faceted_fields: &HashSet<FieldId>,
|
faceted_fields: &HashSet<FieldId>,
|
||||||
|
primary_key_id: FieldId,
|
||||||
|
geo_field_id: Option<FieldId>,
|
||||||
stop_words: &Option<fst::Set<&[u8]>>,
|
stop_words: &Option<fst::Set<&[u8]>>,
|
||||||
) -> Result<(
|
) -> Result<(
|
||||||
grenad::Reader<CursorClonableMmap>,
|
grenad::Reader<CursorClonableMmap>,
|
||||||
@ -177,6 +185,19 @@ fn extract_documents_data(
|
|||||||
|
|
||||||
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
|
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
|
||||||
|
|
||||||
|
if let Some(geo_field_id) = geo_field_id {
|
||||||
|
let documents_chunk_cloned = documents_chunk.clone();
|
||||||
|
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||||
|
rayon::spawn(move || {
|
||||||
|
let result =
|
||||||
|
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_field_id);
|
||||||
|
let _ = match result {
|
||||||
|
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
||||||
|
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
||||||
rayon::join(
|
rayon::join(
|
||||||
|| {
|
|| {
|
||||||
|
@ -228,11 +228,27 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
Receiver<Result<TypedChunk>>,
|
Receiver<Result<TypedChunk>>,
|
||||||
) = crossbeam_channel::unbounded();
|
) = crossbeam_channel::unbounded();
|
||||||
|
|
||||||
|
// get the primary key field id
|
||||||
|
let primary_key_id = fields_ids_map.id(&primary_key).unwrap();
|
||||||
|
|
||||||
// get searchable fields for word databases
|
// get searchable fields for word databases
|
||||||
let searchable_fields =
|
let searchable_fields =
|
||||||
self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
|
self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
|
||||||
// get filterable fields for facet databases
|
// get filterable fields for facet databases
|
||||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||||
|
// get the fid of the `_geo` field.
|
||||||
|
let geo_field_id = match self.index.fields_ids_map(self.wtxn)?.id("_geo") {
|
||||||
|
Some(gfid) => {
|
||||||
|
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||||
|
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||||
|
if is_sortable || is_filterable {
|
||||||
|
Some(gfid)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||||
// let stop_words = stop_words.as_ref();
|
// let stop_words = stop_words.as_ref();
|
||||||
@ -261,6 +277,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
searchable_fields,
|
searchable_fields,
|
||||||
faceted_fields,
|
faceted_fields,
|
||||||
|
primary_key_id,
|
||||||
|
geo_field_id,
|
||||||
stop_words,
|
stop_words,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
@ -876,12 +894,12 @@ mod tests {
|
|||||||
// First we send 3 documents with an id for only one of them.
|
// First we send 3 documents with an id for only one of them.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let documents = &r#"[
|
let documents = &r#"[
|
||||||
{ "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5 },
|
{ "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
|
||||||
{ "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 },
|
{ "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 },
|
||||||
{ "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 },
|
{ "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 },
|
||||||
{ "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" },
|
{ "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" },
|
||||||
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
|
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
|
||||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams" }
|
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
|
||||||
]"#[..];
|
]"#[..];
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
builder.update_format(UpdateFormat::Json);
|
builder.update_format(UpdateFormat::Json);
|
||||||
@ -917,7 +935,7 @@ mod tests {
|
|||||||
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
|
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
|
||||||
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
|
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
|
||||||
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
|
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
|
||||||
{ "objectId": 30, "title": "Hamlet" }
|
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||||
]"#[..];
|
]"#[..];
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
builder.update_format(UpdateFormat::Json);
|
builder.update_format(UpdateFormat::Json);
|
||||||
@ -934,7 +952,7 @@ mod tests {
|
|||||||
assert!(external_documents_ids.get("30").is_none());
|
assert!(external_documents_ids.get("30").is_none());
|
||||||
|
|
||||||
let content = &br#"[
|
let content = &br#"[
|
||||||
{ "objectId": 30, "title": "Hamlet" }
|
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||||
]"#[..];
|
]"#[..];
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
builder.update_format(UpdateFormat::Json);
|
builder.update_format(UpdateFormat::Json);
|
||||||
@ -944,7 +962,7 @@ mod tests {
|
|||||||
assert!(external_documents_ids.get("30").is_some());
|
assert!(external_documents_ids.get("30").is_some());
|
||||||
|
|
||||||
let content = &br#"[
|
let content = &br#"[
|
||||||
{ "objectId": 30, "title": "Hamlet" }
|
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||||
]"#[..];
|
]"#[..];
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
builder.update_format(UpdateFormat::Json);
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
@ -6,11 +7,12 @@ use heed::{BytesDecode, RwTxn};
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
|
self, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, valid_lmdb_key,
|
||||||
|
CursorClonableMmap,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||||
use crate::update::index_documents::helpers::into_clonable_grenad;
|
use crate::update::index_documents::helpers::into_clonable_grenad;
|
||||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Index, Result};
|
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
|
||||||
|
|
||||||
pub(crate) enum TypedChunk {
|
pub(crate) enum TypedChunk {
|
||||||
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
|
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
|
||||||
@ -24,6 +26,7 @@ pub(crate) enum TypedChunk {
|
|||||||
WordPairProximityDocids(grenad::Reader<File>),
|
WordPairProximityDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetStringDocids(grenad::Reader<File>),
|
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||||
|
GeoPoints(grenad::Reader<File>),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||||
@ -177,6 +180,24 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
)?;
|
)?;
|
||||||
is_merged_database = true;
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
|
TypedChunk::GeoPoints(mut geo_points) => {
|
||||||
|
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||||
|
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
|
||||||
|
|
||||||
|
while let Some((key, value)) = geo_points.next()? {
|
||||||
|
// convert the key back to a u32 (4 bytes)
|
||||||
|
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||||
|
|
||||||
|
// convert the latitude and longitude back to a f64 (8 bytes)
|
||||||
|
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
|
||||||
|
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||||
|
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||||
|
rtree.insert(GeoPoint::new(point, docid));
|
||||||
|
geo_faceted_docids.insert(docid);
|
||||||
|
}
|
||||||
|
index.put_geo_rtree(wtxn, &rtree)?;
|
||||||
|
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((RoaringBitmap::new(), is_merged_database))
|
Ok((RoaringBitmap::new(), is_merged_database))
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","":""}
|
{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":""}
|
||||||
{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""}
|
{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":""}
|
||||||
{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""}
|
{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":""}
|
||||||
{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""}
|
{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":""}
|
||||||
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""}
|
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":""}
|
||||||
{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""}
|
{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":""}
|
||||||
{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""}
|
{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"geo_rank":34692,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","_geo": { "lat": 50.78776041427129, "lng": 2.661201766290338 },"":""}
|
||||||
{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""}
|
{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":""}
|
||||||
{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","":""}
|
{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":""}
|
||||||
{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","":""}
|
{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":""}
|
||||||
{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","":""}
|
{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":""}
|
||||||
{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","":""}
|
{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":""}
|
||||||
{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","":""}
|
{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"geo_rank":739203,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","_geo": { "lat": 43.99150729038736, "lng": 3.606143957295055 },"":""}
|
||||||
{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","":""}
|
{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":""}
|
||||||
{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","":""}
|
{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":""}
|
||||||
{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","":""}
|
{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":9422437,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","_geo": { "lat": 35.06462306367058, "lng": 135.8338440354251 },"":""}
|
||||||
{"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","":""}
|
{"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":9339230,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","_geo": { "lat": 34.39548365683149, "lng": 132.4535960928883 },"":""}
|
||||||
|
@ -47,6 +47,11 @@ test_filter!(eq_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank=1")]
|
|||||||
test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]);
|
test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]);
|
||||||
test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]);
|
test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]);
|
||||||
test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]);
|
test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]);
|
||||||
|
test_filter!(geo_radius, vec![Right("_geoRadius(50.630010347667806, 3.086251829166809, 100000)")]);
|
||||||
|
test_filter!(
|
||||||
|
not_geo_radius,
|
||||||
|
vec![Right("NOT _geoRadius(50.630010347667806, 3.086251829166809, 1000000)")]
|
||||||
|
);
|
||||||
test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]);
|
test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]);
|
||||||
test_filter!(
|
test_filter!(
|
||||||
eq_complex_filter_2,
|
eq_complex_filter_2,
|
||||||
|
@ -6,7 +6,7 @@ use either::{Either, Left, Right};
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{hashmap, hashset};
|
use maplit::{hashmap, hashset};
|
||||||
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
|
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
|
||||||
use milli::{AscDesc, Criterion, DocumentId, Index};
|
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
@ -37,6 +37,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
builder.set_filterable_fields(hashset! {
|
builder.set_filterable_fields(hashset! {
|
||||||
S("tag"),
|
S("tag"),
|
||||||
S("asc_desc_rank"),
|
S("asc_desc_rank"),
|
||||||
|
S("_geo"),
|
||||||
});
|
});
|
||||||
builder.set_sortable_fields(hashset! {
|
builder.set_sortable_fields(hashset! {
|
||||||
S("tag"),
|
S("tag"),
|
||||||
@ -99,11 +100,11 @@ pub fn expected_order(
|
|||||||
new_groups
|
new_groups
|
||||||
.extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
|
.extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
|
||||||
}
|
}
|
||||||
Criterion::Sort if sort_by == [AscDesc::Asc(S("tag"))] => {
|
Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
|
||||||
group.sort_by_key(|d| d.sort_by_rank);
|
group.sort_by_key(|d| d.sort_by_rank);
|
||||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||||
}
|
}
|
||||||
Criterion::Sort if sort_by == [AscDesc::Desc(S("tag"))] => {
|
Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
|
||||||
group.sort_by_key(|d| Reverse(d.sort_by_rank));
|
group.sort_by_key(|d| Reverse(d.sort_by_rank));
|
||||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||||
}
|
}
|
||||||
@ -162,6 +163,10 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
|
|||||||
if document.asc_desc_rank > filter.parse().unwrap() {
|
if document.asc_desc_rank > filter.parse().unwrap() {
|
||||||
id = Some(document.id.clone())
|
id = Some(document.id.clone())
|
||||||
}
|
}
|
||||||
|
} else if filter.starts_with("_geoRadius") {
|
||||||
|
id = (document.geo_rank < 100000).then(|| document.id.clone());
|
||||||
|
} else if filter.starts_with("NOT _geoRadius") {
|
||||||
|
id = (document.geo_rank > 1000000).then(|| document.id.clone());
|
||||||
}
|
}
|
||||||
id
|
id
|
||||||
}
|
}
|
||||||
@ -205,6 +210,7 @@ pub struct TestDocument {
|
|||||||
pub exact_rank: u32,
|
pub exact_rank: u32,
|
||||||
pub asc_desc_rank: u32,
|
pub asc_desc_rank: u32,
|
||||||
pub sort_by_rank: u32,
|
pub sort_by_rank: u32,
|
||||||
|
pub geo_rank: u32,
|
||||||
pub title: String,
|
pub title: String,
|
||||||
pub description: String,
|
pub description: String,
|
||||||
pub tag: String,
|
pub tag: String,
|
||||||
|
@ -5,7 +5,7 @@ use heed::EnvOpenOptions;
|
|||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
|
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
|
||||||
use milli::{AscDesc, Criterion, Index, Search, SearchResult};
|
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
|
|
||||||
@ -163,28 +163,28 @@ test_criterion!(
|
|||||||
DISALLOW_OPTIONAL_WORDS,
|
DISALLOW_OPTIONAL_WORDS,
|
||||||
ALLOW_TYPOS,
|
ALLOW_TYPOS,
|
||||||
vec![Sort],
|
vec![Sort],
|
||||||
vec![AscDesc::Asc(S("tag"))]
|
vec![AscDesc::Asc(Member::Field(S("tag")))]
|
||||||
);
|
);
|
||||||
test_criterion!(
|
test_criterion!(
|
||||||
sort_by_asc_disallow_typo,
|
sort_by_asc_disallow_typo,
|
||||||
DISALLOW_OPTIONAL_WORDS,
|
DISALLOW_OPTIONAL_WORDS,
|
||||||
DISALLOW_TYPOS,
|
DISALLOW_TYPOS,
|
||||||
vec![Sort],
|
vec![Sort],
|
||||||
vec![AscDesc::Asc(S("tag"))]
|
vec![AscDesc::Asc(Member::Field(S("tag")))]
|
||||||
);
|
);
|
||||||
test_criterion!(
|
test_criterion!(
|
||||||
sort_by_desc_allow_typo,
|
sort_by_desc_allow_typo,
|
||||||
DISALLOW_OPTIONAL_WORDS,
|
DISALLOW_OPTIONAL_WORDS,
|
||||||
ALLOW_TYPOS,
|
ALLOW_TYPOS,
|
||||||
vec![Sort],
|
vec![Sort],
|
||||||
vec![AscDesc::Desc(S("tag"))]
|
vec![AscDesc::Desc(Member::Field(S("tag")))]
|
||||||
);
|
);
|
||||||
test_criterion!(
|
test_criterion!(
|
||||||
sort_by_desc_disallow_typo,
|
sort_by_desc_disallow_typo,
|
||||||
DISALLOW_OPTIONAL_WORDS,
|
DISALLOW_OPTIONAL_WORDS,
|
||||||
DISALLOW_TYPOS,
|
DISALLOW_TYPOS,
|
||||||
vec![Sort],
|
vec![Sort],
|
||||||
vec![AscDesc::Desc(S("tag"))]
|
vec![AscDesc::Desc(Member::Field(S("tag")))]
|
||||||
);
|
);
|
||||||
test_criterion!(
|
test_criterion!(
|
||||||
default_criteria_order,
|
default_criteria_order,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
|
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
|
||||||
use milli::{AscDesc, Error, Search, UserError};
|
use milli::{AscDesc, Error, Member, Search, UserError};
|
||||||
|
|
||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
|
|
||||||
@ -16,7 +16,7 @@ fn sort_ranking_rule_missing() {
|
|||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.optional_words(true);
|
||||||
search.sort_criteria(vec![AscDesc::Asc(S("tag"))]);
|
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
|
||||||
|
|
||||||
let result = search.execute();
|
let result = search.execute();
|
||||||
assert!(matches!(result, Err(Error::UserError(UserError::SortRankingRuleMissing))));
|
assert!(matches!(result, Err(Error::UserError(UserError::SortRankingRuleMissing))));
|
||||||
|
Loading…
Reference in New Issue
Block a user