mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:11:15 +08:00
Merge #322
322: Geosearch r=ManyTheFish a=irevoire This PR introduces [basic geo-search functionalities](https://github.com/meilisearch/specifications/pull/59), it makes the engine able to index, filter and, sort by geo-point. We decided to use [the rstar library](https://docs.rs/rstar) and to save the points in [an RTree](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html) that we de/serialize in the index database [by using serde](https://serde.rs/) with [bincode](https://docs.rs/bincode). This is not an efficient way to query this tree as it will consume a lot of CPU and memory when a search is made, but at least it is an easy first way to do so. ### What we will have to do on the indexing part: - [x] Index the `_geo` fields from the documents. - [x] Create a new module with an extractor in the `extract` module that takes the `obkv_documents` and retrieves the latitude and longitude coordinates, outputting them in a `grenad::Reader` for further process. - [x] Call the extractor in the `extract::extract_documents_data` function and send the result to the `TypedChunk` module. - [x] Get the `grenad::Reader` in the `typed_chunk::write_typed_chunk_into_index` function and store all the points in the `rtree` - [x] Delete the documents from the `RTree` when deleting documents from the database. All this can be done in the `delete_documents.rs` file by getting the data structure and removing the points from it, inserting it back after the modification. - [x] Clearing the `RTree` entirely when we clear the documents from the database, everything happens in the `clear_documents.rs` file. - [x] save a Roaring bitmap of all documents containing the `_geo` field ### What we will have to do on the query part: - [x] Filter the documents at a certain distance around a point, this is done by [collecting the documents from the searched point](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html#method.nearest_neighbor_iter) while they are in range. - [x] We must introduce new `geoLowerThan` and `geoGreaterThan` variants to the `Operator` filter enum. - [x] Implement the `negative` method on both variants where the `geoGreaterThan` variant is implemented by executing the `geoLowerThan` and removing the results found from the whole list of geo faceted documents. - [x] Add the `_geoRadius` function in the pest parser. - [x] Introduce a `_geo` ascending ranking function that takes a point in parameter, ~~this function must keep the iterator on the `RTree` and make it peekable~~ This was not possible for now, we had to collect the whole iterator. Only the documents that are part of the candidates must be sent too! - [x] This ascending ranking rule will only be active if the search is set up with the `_geoPoint` parameter that indicates the center point of the ascending ranking rule. ----------- - On Meilisearch part: We must introduce a new concept, returning the documents with a new `_geoDistance` field when it passed by the `_geo` ranking rule, this has never been done before. We could maybe just do it afterward when the documents have been retrieved from the database, computing the distance from the `_geoPoint` and all of the documents to be returned. Co-authored-by: Irevoire <tamo@meilisearch.com> Co-authored-by: cvermand <33010418+bidoubiwa@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
31c8de1cca
@ -60,7 +60,13 @@ $('#query, #filters').on('input', function () {
|
||||
|
||||
const content = document.createElement('div');
|
||||
content.classList.add("content");
|
||||
content.innerHTML = element[prop];
|
||||
|
||||
// Stringify Objects and Arrays to avoid [Object object]
|
||||
if (typeof element[prop] === 'object' && element[prop] !== null) {
|
||||
content.innerHTML = JSON.stringify(element[prop]);
|
||||
} else {
|
||||
content.innerHTML = element[prop];
|
||||
}
|
||||
|
||||
field.appendChild(attribute);
|
||||
field.appendChild(content);
|
||||
|
@ -695,6 +695,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
struct QueryBody {
|
||||
query: Option<String>,
|
||||
filters: Option<String>,
|
||||
sort: Option<String>,
|
||||
facet_filters: Option<Vec<UntaggedEither<Vec<String>, String>>>,
|
||||
facet_distribution: Option<bool>,
|
||||
limit: Option<usize>,
|
||||
@ -754,6 +755,10 @@ async fn main() -> anyhow::Result<()> {
|
||||
search.limit(limit);
|
||||
}
|
||||
|
||||
if let Some(sort) = query.sort {
|
||||
search.sort_criteria(vec![sort.parse().unwrap()]);
|
||||
}
|
||||
|
||||
let SearchResult { matching_words, candidates, documents_ids } =
|
||||
search.execute().unwrap();
|
||||
|
||||
|
@ -16,6 +16,7 @@ flate2 = "1.0.20"
|
||||
fst = "0.4.5"
|
||||
fxhash = "0.2.1"
|
||||
grenad = { version = "0.3.1", default-features = false, features = ["tempfile"] }
|
||||
geoutils = "0.4.1"
|
||||
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
human_format = "1.0.3"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
@ -27,6 +28,7 @@ once_cell = "1.5.2"
|
||||
ordered-float = "2.1.1"
|
||||
rayon = "1.5.0"
|
||||
roaring = "0.6.6"
|
||||
rstar = { version = "0.9.1", features = ["serde"] }
|
||||
serde = { version = "1.0.123", features = ["derive"] }
|
||||
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
||||
slice-group-by = "0.2.6"
|
||||
|
@ -3,7 +3,7 @@ use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::error::{is_reserved_keyword, Error, UserError};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
@ -50,32 +50,101 @@ impl FromStr for Criterion {
|
||||
"sort" => Ok(Criterion::Sort),
|
||||
"exactness" => Ok(Criterion::Exactness),
|
||||
text => match AscDesc::from_str(text) {
|
||||
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
|
||||
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
|
||||
Ok(AscDesc::Asc(Member::Field(field))) => Ok(Criterion::Asc(field)),
|
||||
Ok(AscDesc::Desc(Member::Field(field))) => Ok(Criterion::Desc(field)),
|
||||
Ok(AscDesc::Asc(Member::Geo(_))) | Ok(AscDesc::Desc(Member::Geo(_))) => {
|
||||
Err(UserError::InvalidRankingRuleName { name: text.to_string() })?
|
||||
}
|
||||
Err(UserError::InvalidAscDescSyntax { name }) => {
|
||||
Err(UserError::InvalidCriterionName { name }.into())
|
||||
Err(UserError::InvalidRankingRuleName { name }.into())
|
||||
}
|
||||
Err(error) => {
|
||||
Err(UserError::InvalidCriterionName { name: error.to_string() }.into())
|
||||
Err(UserError::InvalidRankingRuleName { name: error.to_string() }.into())
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub enum Member {
|
||||
Field(String),
|
||||
Geo([f64; 2]),
|
||||
}
|
||||
|
||||
impl FromStr for Member {
|
||||
type Err = UserError;
|
||||
|
||||
fn from_str(text: &str) -> Result<Member, Self::Err> {
|
||||
match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(")")) {
|
||||
Some(point) => {
|
||||
let (lat, long) = point
|
||||
.split_once(',')
|
||||
.ok_or_else(|| UserError::InvalidRankingRuleName { name: text.to_string() })
|
||||
.and_then(|(lat, long)| {
|
||||
lat.trim()
|
||||
.parse()
|
||||
.and_then(|lat| long.trim().parse().map(|long| (lat, long)))
|
||||
.map_err(|_| UserError::InvalidRankingRuleName {
|
||||
name: text.to_string(),
|
||||
})
|
||||
})?;
|
||||
Ok(Member::Geo([lat, long]))
|
||||
}
|
||||
None => {
|
||||
if is_reserved_keyword(text) {
|
||||
return Err(UserError::InvalidReservedRankingRuleName {
|
||||
name: text.to_string(),
|
||||
})?;
|
||||
}
|
||||
Ok(Member::Field(text.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Member {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Member::Field(name) => f.write_str(name),
|
||||
Member::Geo([lat, lng]) => write!(f, "_geoPoint({}, {})", lat, lng),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Member {
|
||||
pub fn field(&self) -> Option<&str> {
|
||||
match self {
|
||||
Member::Field(field) => Some(field),
|
||||
Member::Geo(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn geo_point(&self) -> Option<&[f64; 2]> {
|
||||
match self {
|
||||
Member::Geo(point) => Some(point),
|
||||
Member::Field(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub enum AscDesc {
|
||||
Asc(String),
|
||||
Desc(String),
|
||||
Asc(Member),
|
||||
Desc(Member),
|
||||
}
|
||||
|
||||
impl AscDesc {
|
||||
pub fn field(&self) -> &str {
|
||||
pub fn member(&self) -> &Member {
|
||||
match self {
|
||||
AscDesc::Asc(field) => field,
|
||||
AscDesc::Desc(field) => field,
|
||||
AscDesc::Asc(member) => member,
|
||||
AscDesc::Desc(member) => member,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn field(&self) -> Option<&str> {
|
||||
self.member().field()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for AscDesc {
|
||||
@ -85,9 +154,9 @@ impl FromStr for AscDesc {
|
||||
/// string and let the caller create his own error
|
||||
fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
|
||||
match text.rsplit_once(':') {
|
||||
Some((field_name, "asc")) => Ok(AscDesc::Asc(field_name.to_string())),
|
||||
Some((field_name, "desc")) => Ok(AscDesc::Desc(field_name.to_string())),
|
||||
_ => Err(UserError::InvalidAscDescSyntax { name: text.to_string() }),
|
||||
Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)),
|
||||
Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)),
|
||||
_ => Err(UserError::InvalidRankingRuleName { name: text.to_string() }),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -119,3 +188,63 @@ impl fmt::Display for Criterion {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_asc_desc() {
|
||||
use big_s::S;
|
||||
use AscDesc::*;
|
||||
use Member::*;
|
||||
|
||||
let valid_req = [
|
||||
("truc:asc", Asc(Field(S("truc")))),
|
||||
("bidule:desc", Desc(Field(S("bidule")))),
|
||||
("a-b:desc", Desc(Field(S("a-b")))),
|
||||
("a:b:desc", Desc(Field(S("a:b")))),
|
||||
("a12:asc", Asc(Field(S("a12")))),
|
||||
("42:asc", Asc(Field(S("42")))),
|
||||
("_geoPoint(42, 59):asc", Asc(Geo([42., 59.]))),
|
||||
("_geoPoint(42.459, 59):desc", Desc(Geo([42.459, 59.]))),
|
||||
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
|
||||
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
|
||||
("_geoPoint(42.0002, 59.895):desc", Desc(Geo([42.0002, 59.895]))),
|
||||
("_geoPoint(42., 59.):desc", Desc(Geo([42., 59.]))),
|
||||
("truc(12, 13):desc", Desc(Field(S("truc(12, 13)")))),
|
||||
];
|
||||
|
||||
for (req, expected) in valid_req {
|
||||
let res = req.parse();
|
||||
assert!(res.is_ok(), "Failed to parse `{}`, was expecting `{:?}`", req, expected);
|
||||
assert_eq!(expected, res.unwrap());
|
||||
}
|
||||
|
||||
let invalid_req = [
|
||||
"truc:machin",
|
||||
"truc:deesc",
|
||||
"truc:asc:deesc",
|
||||
"42desc",
|
||||
"_geoPoint:asc",
|
||||
"_geoDistance:asc",
|
||||
"_geoPoint(42.12 , 59.598)",
|
||||
"_geoPoint(42.12 , 59.598):deesc",
|
||||
"_geoPoint(42.12 , 59.598):machin",
|
||||
"_geoPoint(42.12 , 59.598):asc:aasc",
|
||||
"_geoPoint(42,12 , 59,598):desc",
|
||||
"_geoPoint(35, 85, 75):asc",
|
||||
"_geoPoint(18):asc",
|
||||
];
|
||||
|
||||
for req in invalid_req {
|
||||
let res = req.parse::<AscDesc>();
|
||||
assert!(
|
||||
res.is_err(),
|
||||
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
|
||||
req,
|
||||
res,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,6 +12,10 @@ use crate::{DocumentId, FieldId};
|
||||
|
||||
pub type Object = Map<String, Value>;
|
||||
|
||||
pub fn is_reserved_keyword(keyword: &str) -> bool {
|
||||
["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
InternalError(InternalError),
|
||||
@ -54,12 +58,14 @@ pub enum UserError {
|
||||
Csv(csv::Error),
|
||||
DocumentLimitReached,
|
||||
InvalidAscDescSyntax { name: String },
|
||||
InvalidCriterionName { name: String },
|
||||
InvalidDocumentId { document_id: Value },
|
||||
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
|
||||
InvalidFilter(pest::error::Error<ParserRule>),
|
||||
InvalidFilterAttribute(pest::error::Error<ParserRule>),
|
||||
InvalidSortName { name: String },
|
||||
InvalidGeoField { document_id: Value, object: Value },
|
||||
InvalidRankingRuleName { name: String },
|
||||
InvalidReservedRankingRuleName { name: String },
|
||||
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
|
||||
SortRankingRuleMissing,
|
||||
InvalidStoreFile,
|
||||
@ -221,7 +227,15 @@ impl fmt::Display for UserError {
|
||||
Self::InvalidAscDescSyntax { name } => {
|
||||
write!(f, "invalid asc/desc syntax for {}", name)
|
||||
}
|
||||
Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
|
||||
Self::InvalidGeoField { document_id, object } => write!(
|
||||
f,
|
||||
"the document with the id: {} contains an invalid _geo field: {}",
|
||||
document_id, object
|
||||
),
|
||||
Self::InvalidRankingRuleName { name } => write!(f, "invalid criterion {}", name),
|
||||
Self::InvalidReservedRankingRuleName { name } => {
|
||||
write!(f, "{} is a reserved keyword and thus can't be used as a ranking rule", name)
|
||||
}
|
||||
Self::InvalidDocumentId { document_id } => {
|
||||
let json = serde_json::to_string(document_id).unwrap();
|
||||
write!(
|
||||
|
@ -8,6 +8,7 @@ use heed::flags::Flags;
|
||||
use heed::types::*;
|
||||
use heed::{Database, PolyDatabase, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
@ -18,8 +19,8 @@ use crate::heed_codec::facet::{
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
FieldIdWordCountCodec, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search,
|
||||
StrLevelPositionCodec, StrStrU8Codec, BEU32,
|
||||
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||
Search, StrLevelPositionCodec, StrStrU8Codec, BEU32,
|
||||
};
|
||||
|
||||
pub mod main_key {
|
||||
@ -31,6 +32,8 @@ pub mod main_key {
|
||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
@ -294,6 +297,64 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/* geo rtree */
|
||||
|
||||
/// Writes the provided `rtree` which associates coordinates to documents ids.
|
||||
pub(crate) fn put_geo_rtree(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
rtree: &RTree<GeoPoint>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<RTree<GeoPoint>>>(wtxn, main_key::GEO_RTREE_KEY, rtree)
|
||||
}
|
||||
|
||||
/// Delete the `rtree` which associates coordinates to documents ids.
|
||||
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY)
|
||||
}
|
||||
|
||||
/// Returns the `rtree` which associates coordinates to documents ids.
|
||||
pub fn geo_rtree<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<RTree<GeoPoint>>> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)?
|
||||
{
|
||||
Some(rtree) => Ok(Some(rtree)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/* geo faceted */
|
||||
|
||||
/// Writes the documents ids that are faceted with a _geo field.
|
||||
pub(crate) fn put_geo_faceted_documents_ids(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
docids: &RoaringBitmap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, RoaringBitmapCodec>(
|
||||
wtxn,
|
||||
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||
docids,
|
||||
)
|
||||
}
|
||||
|
||||
/// Delete the documents ids that are faceted with a _geo field.
|
||||
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
|
||||
}
|
||||
|
||||
/// Retrieve all the documents ids that are faceted with a _geo field.
|
||||
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
|
||||
{
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/* field distribution */
|
||||
|
||||
/// Writes the field distribution which associates every field name with
|
||||
|
@ -21,7 +21,7 @@ use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
pub use self::criterion::{default_criteria, AscDesc, Criterion};
|
||||
pub use self::criterion::{default_criteria, AscDesc, Criterion, Member};
|
||||
pub use self::error::{
|
||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||
};
|
||||
@ -51,6 +51,7 @@ pub type DocumentId = u32;
|
||||
pub type FieldId = u16;
|
||||
pub type Position = u32;
|
||||
pub type FieldDistribution = BTreeMap<String, u64>;
|
||||
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>;
|
||||
|
||||
/// Transform a raw obkv store into a JSON Object.
|
||||
pub fn obkv_to_json(
|
||||
@ -141,6 +142,15 @@ where
|
||||
Some((head, tail))
|
||||
}
|
||||
|
||||
/// Return the distance between two points in meters. Each points are composed of two f64,
|
||||
/// one latitude and one longitude.
|
||||
pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
|
||||
let a = geoutils::Location::new(a[0], a[1]);
|
||||
let b = geoutils::Location::new(b[0], b[1]);
|
||||
|
||||
a.haversine_distance_to(&b).meters()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
150
milli/src/search/criteria/geo.rs
Normal file
150
milli/src/search/criteria/geo.rs
Normal file
@ -0,0 +1,150 @@
|
||||
use std::iter;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::{GeoPoint, Index, Result};
|
||||
|
||||
pub struct Geo<'t> {
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
ascending: bool,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
rtree: Option<RTree<GeoPoint>>,
|
||||
point: [f64; 2],
|
||||
}
|
||||
|
||||
impl<'t> Geo<'t> {
|
||||
pub fn asc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
point: [f64; 2],
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, point, true)
|
||||
}
|
||||
|
||||
pub fn desc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
point: [f64; 2],
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, point, false)
|
||||
}
|
||||
|
||||
fn new(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
point: [f64; 2],
|
||||
ascending: bool,
|
||||
) -> Result<Self> {
|
||||
let candidates = Box::new(iter::empty());
|
||||
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
|
||||
let bucket_candidates = RoaringBitmap::new();
|
||||
let rtree = index.geo_rtree(rtxn)?;
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
rtxn,
|
||||
ascending,
|
||||
parent,
|
||||
candidates,
|
||||
allowed_candidates,
|
||||
bucket_candidates,
|
||||
rtree,
|
||||
point,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Criterion for Geo<'_> {
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
let rtree = self.rtree.as_ref();
|
||||
|
||||
loop {
|
||||
match self.candidates.next() {
|
||||
Some(mut candidates) => {
|
||||
candidates -= params.excluded_candidates;
|
||||
self.allowed_candidates -= &candidates;
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(self.bucket_candidates.clone()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
let mut candidates = match (&query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
}
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = match rtree {
|
||||
Some(rtree) => geo_point(
|
||||
rtree,
|
||||
self.allowed_candidates.clone(),
|
||||
self.point,
|
||||
self.ascending,
|
||||
),
|
||||
None => Box::new(std::iter::empty()),
|
||||
};
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn geo_point(
|
||||
rtree: &RTree<GeoPoint>,
|
||||
mut candidates: RoaringBitmap,
|
||||
point: [f64; 2],
|
||||
ascending: bool,
|
||||
) -> Box<dyn Iterator<Item = RoaringBitmap>> {
|
||||
let mut results = Vec::new();
|
||||
for point in rtree.nearest_neighbor_iter(&point) {
|
||||
if candidates.remove(point.data) {
|
||||
results.push(std::iter::once(point.data).collect());
|
||||
if candidates.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ascending {
|
||||
Box::new(results.into_iter())
|
||||
} else {
|
||||
Box::new(results.into_iter().rev())
|
||||
}
|
||||
}
|
@ -12,7 +12,8 @@ use self::r#final::Final;
|
||||
use self::typo::Typo;
|
||||
use self::words::Words;
|
||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||
use crate::criterion::AscDesc as AscDescName;
|
||||
use crate::criterion::{AscDesc as AscDescName, Member};
|
||||
use crate::search::criteria::geo::Geo;
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
|
||||
|
||||
@ -20,6 +21,7 @@ mod asc_desc;
|
||||
mod attribute;
|
||||
mod exactness;
|
||||
pub mod r#final;
|
||||
mod geo;
|
||||
mod initial;
|
||||
mod proximity;
|
||||
mod typo;
|
||||
@ -290,18 +292,30 @@ impl<'t> CriteriaBuilder<'t> {
|
||||
Some(ref sort_criteria) => {
|
||||
for asc_desc in sort_criteria {
|
||||
criterion = match asc_desc {
|
||||
AscDescName::Asc(field) => Box::new(AscDesc::asc(
|
||||
AscDescName::Asc(Member::Field(field)) => Box::new(AscDesc::asc(
|
||||
&self.index,
|
||||
&self.rtxn,
|
||||
criterion,
|
||||
field.to_string(),
|
||||
)?),
|
||||
AscDescName::Desc(field) => Box::new(AscDesc::desc(
|
||||
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
|
||||
&self.index,
|
||||
&self.rtxn,
|
||||
criterion,
|
||||
field.to_string(),
|
||||
)?),
|
||||
AscDescName::Asc(Member::Geo(point)) => Box::new(Geo::asc(
|
||||
&self.index,
|
||||
&self.rtxn,
|
||||
criterion,
|
||||
point.clone(),
|
||||
)?),
|
||||
AscDescName::Desc(Member::Geo(point)) => Box::new(Geo::desc(
|
||||
&self.index,
|
||||
&self.rtxn,
|
||||
criterion,
|
||||
point.clone(),
|
||||
)?),
|
||||
};
|
||||
}
|
||||
criterion
|
||||
|
@ -21,7 +21,9 @@ use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result};
|
||||
use crate::{
|
||||
distance_between_two_points, CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Operator {
|
||||
@ -32,6 +34,8 @@ pub enum Operator {
|
||||
LowerThan(f64),
|
||||
LowerThanOrEqual(f64),
|
||||
Between(f64, f64),
|
||||
GeoLowerThan([f64; 2], f64),
|
||||
GeoGreaterThan([f64; 2], f64),
|
||||
}
|
||||
|
||||
impl Operator {
|
||||
@ -46,6 +50,8 @@ impl Operator {
|
||||
LowerThan(n) => (GreaterThanOrEqual(n), None),
|
||||
LowerThanOrEqual(n) => (GreaterThan(n), None),
|
||||
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
|
||||
GeoLowerThan(point, distance) => (GeoGreaterThan(point, distance), None),
|
||||
GeoGreaterThan(point, distance) => (GeoLowerThan(point, distance), None),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -131,6 +137,7 @@ impl FilterCondition {
|
||||
Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?),
|
||||
Rule::less => Ok(Self::lower_than(fim, ff, pair)?),
|
||||
Rule::between => Ok(Self::between(fim, ff, pair)?),
|
||||
Rule::geo_radius => Ok(Self::geo_radius(fim, ff, pair)?),
|
||||
Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()),
|
||||
Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||
@ -156,6 +163,65 @@ impl FilterCondition {
|
||||
}
|
||||
}
|
||||
|
||||
fn geo_radius(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<String>,
|
||||
item: Pair<Rule>,
|
||||
) -> Result<FilterCondition> {
|
||||
if !filterable_fields.contains("_geo") {
|
||||
return Err(UserError::InvalidFilterAttribute(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `_geo` is not filterable, available filterable attributes are: {}",
|
||||
filterable_fields.iter().join(", "),
|
||||
),
|
||||
},
|
||||
item.as_span(),
|
||||
)))?;
|
||||
}
|
||||
let mut items = item.into_inner();
|
||||
let fid = match fields_ids_map.id("_geo") {
|
||||
Some(fid) => fid,
|
||||
None => return Ok(Empty),
|
||||
};
|
||||
let parameters_item = items.next().unwrap();
|
||||
// We don't need more than 3 parameters, but to handle errors correctly we are still going
|
||||
// to extract the first 4 parameters
|
||||
let param_span = parameters_item.as_span();
|
||||
let parameters = parameters_item
|
||||
.into_inner()
|
||||
.take(4)
|
||||
.map(|param| (param.clone(), param.as_span()))
|
||||
.map(|(param, span)| pest_parse(param).0.map(|arg| (arg, span)))
|
||||
.collect::<StdResult<Vec<(f64, _)>, _>>()
|
||||
.map_err(UserError::InvalidFilter)?;
|
||||
if parameters.len() != 3 {
|
||||
return Err(UserError::InvalidFilter(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"),
|
||||
},
|
||||
// we want to point to the last parameters and if there was no parameters we
|
||||
// point to the parenthesis
|
||||
parameters.last().map(|param| param.1.clone()).unwrap_or(param_span),
|
||||
)))?;
|
||||
}
|
||||
let (lat, lng, distance) = (¶meters[0], ¶meters[1], parameters[2].0);
|
||||
if let Some(span) = (!(-181.0..181.).contains(&lat.0))
|
||||
.then(|| &lat.1)
|
||||
.or((!(-181.0..181.).contains(&lng.0)).then(|| &lng.1))
|
||||
{
|
||||
return Err(UserError::InvalidFilter(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"Latitude and longitude must be contained between -180 to 180 degrees."
|
||||
),
|
||||
},
|
||||
span.clone(),
|
||||
)))?;
|
||||
}
|
||||
Ok(Operator(fid, GeoLowerThan([lat.0, lng.0], distance)))
|
||||
}
|
||||
|
||||
fn between(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<String>,
|
||||
@ -440,6 +506,34 @@ impl FilterCondition {
|
||||
LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
|
||||
LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)),
|
||||
Between(left, right) => (Included(*left), Included(*right)),
|
||||
GeoLowerThan(base_point, distance) => {
|
||||
let rtree = match index.geo_rtree(rtxn)? {
|
||||
Some(rtree) => rtree,
|
||||
None => return Ok(RoaringBitmap::new()),
|
||||
};
|
||||
|
||||
let result = rtree
|
||||
.nearest_neighbor_iter(base_point)
|
||||
.take_while(|point| {
|
||||
distance_between_two_points(base_point, point.geom()) < *distance
|
||||
})
|
||||
.map(|point| point.data)
|
||||
.collect();
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
GeoGreaterThan(point, distance) => {
|
||||
let result = Self::evaluate_operator(
|
||||
rtxn,
|
||||
index,
|
||||
numbers_db,
|
||||
strings_db,
|
||||
field_id,
|
||||
&GeoLowerThan(point.clone(), *distance),
|
||||
)?;
|
||||
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?;
|
||||
return Ok(geo_faceted_doc_ids - result);
|
||||
}
|
||||
};
|
||||
|
||||
// Ask for the biggest value that can exist for this specific field, if it exists
|
||||
@ -505,6 +599,19 @@ fn field_id(
|
||||
) -> StdResult<Option<FieldId>, PestError<Rule>> {
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
if key.as_rule() == Rule::reserved {
|
||||
return Err(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"`{}` is a reserved keyword and therefore can't be used as a filter expression. \
|
||||
Available filterable attributes are: {}",
|
||||
key.as_str(),
|
||||
filterable_fields.iter().join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
));
|
||||
}
|
||||
|
||||
if !filterable_fields.contains(key.as_str()) {
|
||||
return Err(PestError::new_from_span(
|
||||
@ -581,6 +688,13 @@ mod tests {
|
||||
let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||
let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let result = FilterCondition::from_str(&rtxn, &index, "_geo = France");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert!(error.to_string().contains(
|
||||
"`_geo` is a reserved keyword and therefore can't be used as a filter expression."
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -663,6 +777,92 @@ mod tests {
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn geo_radius() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the filterable fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
|
||||
builder.set_filterable_fields(hashset! { S("_geo"), S("price") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// basic test
|
||||
let condition =
|
||||
FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap();
|
||||
let expected = Operator(0, GeoLowerThan([12., 13.0005], 2000.));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
// test the negation of the GeoLowerThan
|
||||
let condition =
|
||||
FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap();
|
||||
let expected = Operator(0, GeoGreaterThan([50., 18.], 2000.500));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
// composition of multiple operations
|
||||
let condition = FilterCondition::from_str(
|
||||
&rtxn,
|
||||
&index,
|
||||
"(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10",
|
||||
)
|
||||
.unwrap();
|
||||
let expected = Or(
|
||||
Box::new(And(
|
||||
Box::new(Operator(0, GeoGreaterThan([1., 2.], 300.))),
|
||||
Box::new(Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))),
|
||||
)),
|
||||
Box::new(Operator(1, LowerThanOrEqual(10.))),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
// georadius don't have any parameters
|
||||
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||
|
||||
// georadius don't have any parameters
|
||||
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||
|
||||
// georadius don't have enough parameters
|
||||
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||
|
||||
// georadius have too many parameters
|
||||
let result =
|
||||
FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"));
|
||||
|
||||
// georadius have a bad latitude
|
||||
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-200, 150, 10)");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert!(error
|
||||
.to_string()
|
||||
.contains("Latitude and longitude must be contained between -180 to 180 degrees."));
|
||||
|
||||
// georadius have a bad longitude
|
||||
let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 181, 10)");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert!(error
|
||||
.to_string()
|
||||
.contains("Latitude and longitude must be contained between -180 to 180 degrees."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_array() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
|
@ -1,5 +1,5 @@
|
||||
key = _{quoted | word}
|
||||
value = _{quoted | word}
|
||||
key = _{reserved | quoted | word }
|
||||
value = _{quoted | word }
|
||||
quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP }
|
||||
string = {char*}
|
||||
word = ${(LETTER | NUMBER | "_" | "-" | ".")+}
|
||||
@ -8,6 +8,9 @@ char = _{ !(PEEK | "\\") ~ ANY
|
||||
| "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
|
||||
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})}
|
||||
|
||||
reserved = { "_geo" | "_geoDistance" | "_geoPoint" | ("_geoPoint" ~ parameters) }
|
||||
// we deliberately choose to allow empty parameters to generate more specific error message later
|
||||
parameters = {("(" ~ (value ~ ",")* ~ value? ~ ")") | ""}
|
||||
condition = _{between | eq | greater | less | geq | leq | neq}
|
||||
between = {key ~ value ~ "TO" ~ value}
|
||||
geq = {key ~ ">=" ~ value}
|
||||
@ -16,10 +19,11 @@ neq = {key ~ "!=" ~ value}
|
||||
eq = {key ~ "=" ~ value}
|
||||
greater = {key ~ ">" ~ value}
|
||||
less = {key ~ "<" ~ value}
|
||||
geo_radius = {"_geoRadius" ~ parameters }
|
||||
|
||||
prgm = {SOI ~ expr ~ EOI}
|
||||
expr = _{ ( term ~ (operation ~ term)* ) }
|
||||
term = { ("(" ~ expr ~ ")") | condition | not }
|
||||
term = { ("(" ~ expr ~ ")") | condition | not | geo_radius }
|
||||
operation = _{ and | or }
|
||||
and = {"AND"}
|
||||
or = {"OR"}
|
||||
|
@ -148,13 +148,15 @@ impl<'a> Search<'a> {
|
||||
if let Some(sort_criteria) = &self.sort_criteria {
|
||||
let sortable_fields = self.index.sortable_fields(self.rtxn)?;
|
||||
for asc_desc in sort_criteria {
|
||||
let field = asc_desc.field();
|
||||
if !sortable_fields.contains(field) {
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: field.to_string(),
|
||||
valid_fields: sortable_fields,
|
||||
// we are not supposed to find any geoPoint in the criterion
|
||||
if let Some(field) = asc_desc.field() {
|
||||
if !sortable_fields.contains(field) {
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: field.to_string(),
|
||||
valid_fields: sortable_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -48,6 +48,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
||||
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
||||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||
self.index.delete_geo_rtree(self.wtxn)?;
|
||||
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
||||
|
||||
// We clean all the faceted documents ids.
|
||||
let empty = RoaringBitmap::default();
|
||||
@ -93,7 +95,7 @@ mod tests {
|
||||
let content = &br#"[
|
||||
{ "id": 0, "name": "kevin", "age": 20 },
|
||||
{ "id": 1, "name": "kevina" },
|
||||
{ "id": 2, "name": "benoit", "country": "France" }
|
||||
{ "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
@ -107,13 +109,15 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 4);
|
||||
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 5);
|
||||
|
||||
assert!(index.words_fst(&rtxn).unwrap().is_empty());
|
||||
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
||||
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
|
||||
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
||||
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
||||
assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
|
||||
|
||||
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
||||
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
||||
|
@ -380,6 +380,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
|
||||
drop(iter);
|
||||
|
||||
if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? {
|
||||
let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?;
|
||||
|
||||
let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree
|
||||
.iter()
|
||||
.filter(|&point| self.documents_ids.contains(point.data))
|
||||
.cloned()
|
||||
.map(|point| (point, point.data))
|
||||
.unzip();
|
||||
points_to_remove.iter().for_each(|point| {
|
||||
rtree.remove(&point);
|
||||
});
|
||||
geo_faceted_doc_ids -= docids_to_remove;
|
||||
|
||||
self.index.put_geo_rtree(self.wtxn, &rtree)?;
|
||||
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
||||
}
|
||||
|
||||
// We delete the documents ids that are under the facet field id values.
|
||||
remove_docids_from_facet_field_id_number_docids(
|
||||
self.wtxn,
|
||||
@ -542,6 +560,8 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
@ -657,4 +677,95 @@ mod tests {
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_documents_with_geo_points() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_primary_key(S("id"));
|
||||
builder.set_filterable_fields(hashset!(S("_geo")));
|
||||
builder.set_sortable_fields(hashset!(S("_geo")));
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
let content = &r#"[
|
||||
{"id":"1","city":"Lille", "_geo": { "lat": 50.629973371633746, "lng": 3.0569447399419570 } },
|
||||
{"id":"2","city":"Mons-en-Barœul", "_geo": { "lat": 50.641586120121050, "lng": 3.1106593480348670 } },
|
||||
{"id":"3","city":"Hellemmes", "_geo": { "lat": 50.631220965518080, "lng": 3.1106399673339933 } },
|
||||
{"id":"4","city":"Villeneuve-d'Ascq", "_geo": { "lat": 50.622468098014565, "lng": 3.1476425513437140 } },
|
||||
{"id":"5","city":"Hem", "_geo": { "lat": 50.655250871381355, "lng": 3.1897297266244130 } },
|
||||
{"id":"6","city":"Roubaix", "_geo": { "lat": 50.692473451896710, "lng": 3.1763326737747650 } },
|
||||
{"id":"7","city":"Tourcoing", "_geo": { "lat": 50.726397466736480, "lng": 3.1541653659578670 } },
|
||||
{"id":"8","city":"Mouscron", "_geo": { "lat": 50.745325554908610, "lng": 3.2206407854429853 } },
|
||||
{"id":"9","city":"Tournai", "_geo": { "lat": 50.605342528602630, "lng": 3.3758586941351414 } },
|
||||
{"id":"10","city":"Ghent", "_geo": { "lat": 51.053777403679035, "lng": 3.6957733119926930 } },
|
||||
{"id":"11","city":"Brussels", "_geo": { "lat": 50.846640974544690, "lng": 4.3370663564281840 } },
|
||||
{"id":"12","city":"Charleroi", "_geo": { "lat": 50.409570138889480, "lng": 4.4347354315085520 } },
|
||||
{"id":"13","city":"Mons", "_geo": { "lat": 50.450294178855420, "lng": 3.9623722870904690 } },
|
||||
{"id":"14","city":"Valenciennes", "_geo": { "lat": 50.351817774473545, "lng": 3.5326283646928800 } },
|
||||
{"id":"15","city":"Arras", "_geo": { "lat": 50.284487528579950, "lng": 2.7637515844478160 } },
|
||||
{"id":"16","city":"Cambrai", "_geo": { "lat": 50.179340577906700, "lng": 3.2189409952502930 } },
|
||||
{"id":"17","city":"Bapaume", "_geo": { "lat": 50.111276127236400, "lng": 2.8547894666083120 } },
|
||||
{"id":"18","city":"Amiens", "_geo": { "lat": 49.931472529669996, "lng": 2.2710499758317080 } },
|
||||
{"id":"19","city":"Compiègne", "_geo": { "lat": 49.444980887725656, "lng": 2.7913841281529015 } },
|
||||
{"id":"20","city":"Paris", "_geo": { "lat": 48.902100060895480, "lng": 2.3708400867406930 } }
|
||||
]"#[..];
|
||||
let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
|
||||
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.execute(content.as_bytes(), |_, _| ()).unwrap();
|
||||
|
||||
let external_document_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||
let ids_to_delete: Vec<u32> = external_ids_to_delete
|
||||
.iter()
|
||||
.map(|id| external_document_ids.get(id.as_bytes()).unwrap())
|
||||
.collect();
|
||||
|
||||
// Delete some documents.
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
|
||||
external_ids_to_delete.iter().for_each(|id| drop(builder.delete_external_id(id)));
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let rtree = index.geo_rtree(&rtxn).unwrap().unwrap();
|
||||
let geo_faceted_doc_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||
|
||||
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
|
||||
let all_geo_documents = index
|
||||
.documents(&rtxn, all_geo_ids.iter().copied())
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let all_geo_faceted_ids = geo_faceted_doc_ids.iter().collect::<Vec<_>>();
|
||||
let all_geo_faceted_documents = index
|
||||
.documents(&rtxn, all_geo_faceted_ids.iter().copied())
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
assert_eq!(
|
||||
all_geo_documents, all_geo_faceted_documents,
|
||||
"There is an inconsistency between the geo_faceted database and the rtree"
|
||||
);
|
||||
|
||||
for id in all_geo_documents.iter() {
|
||||
assert!(!ids_to_delete.contains(&id), "The document {} was supposed to be deleted", id);
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
all_geo_ids.len(),
|
||||
all_geo_documents.len(),
|
||||
"We deleted documents that were not supposed to be deleted"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,44 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
|
||||
use concat_arrays::concat_arrays;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::{FieldId, InternalError, Result, UserError};
|
||||
|
||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
|
||||
pub fn extract_geo_points<R: io::Read>(
|
||||
mut obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
geo_field_id: FieldId,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let mut writer = tempfile::tempfile().and_then(|file| {
|
||||
create_writer(indexer.chunk_compression_type, indexer.chunk_compression_level, file)
|
||||
})?;
|
||||
|
||||
while let Some((docid_bytes, value)) = obkv_documents.next()? {
|
||||
let obkv = obkv::KvReader::new(value);
|
||||
let point: Value = match obkv.get(geo_field_id) {
|
||||
Some(point) => serde_json::from_slice(point).map_err(InternalError::SerdeJson)?,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) {
|
||||
// this will create an array of 16 bytes (two 8 bytes floats)
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
writer.insert(docid_bytes, bytes)?;
|
||||
} else {
|
||||
// All document must have a primary key so we can unwrap safely here
|
||||
let primary_key = obkv.get(primary_key_id).unwrap();
|
||||
let primary_key =
|
||||
serde_json::from_slice(primary_key).map_err(InternalError::SerdeJson)?;
|
||||
Err(UserError::InvalidGeoField { document_id: primary_key, object: point })?
|
||||
}
|
||||
}
|
||||
|
||||
Ok(writer_into_reader(writer)?)
|
||||
}
|
@ -3,6 +3,7 @@ mod extract_facet_number_docids;
|
||||
mod extract_facet_string_docids;
|
||||
mod extract_fid_docid_facet_values;
|
||||
mod extract_fid_word_count_docids;
|
||||
mod extract_geo_points;
|
||||
mod extract_word_docids;
|
||||
mod extract_word_level_position_docids;
|
||||
mod extract_word_pair_proximity_docids;
|
||||
@ -19,6 +20,7 @@ use self::extract_facet_number_docids::extract_facet_number_docids;
|
||||
use self::extract_facet_string_docids::extract_facet_string_docids;
|
||||
use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values;
|
||||
use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
|
||||
use self::extract_geo_points::extract_geo_points;
|
||||
use self::extract_word_docids::extract_word_docids;
|
||||
use self::extract_word_level_position_docids::extract_word_level_position_docids;
|
||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
||||
@ -37,6 +39,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: Option<HashSet<FieldId>>,
|
||||
faceted_fields: HashSet<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
geo_field_id: Option<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
) -> Result<()> {
|
||||
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
||||
@ -48,6 +52,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
lmdb_writer_sx.clone(),
|
||||
&searchable_fields,
|
||||
&faceted_fields,
|
||||
primary_key_id,
|
||||
geo_field_id,
|
||||
&stop_words,
|
||||
)
|
||||
})
|
||||
@ -168,6 +174,8 @@ fn extract_documents_data(
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
geo_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
) -> Result<(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
@ -177,6 +185,19 @@ fn extract_documents_data(
|
||||
|
||||
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
|
||||
|
||||
if let Some(geo_field_id) = geo_field_id {
|
||||
let documents_chunk_cloned = documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
let result =
|
||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_field_id);
|
||||
let _ = match result {
|
||||
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
||||
rayon::join(
|
||||
|| {
|
||||
|
@ -228,11 +228,27 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
Receiver<Result<TypedChunk>>,
|
||||
) = crossbeam_channel::unbounded();
|
||||
|
||||
// get the primary key field id
|
||||
let primary_key_id = fields_ids_map.id(&primary_key).unwrap();
|
||||
|
||||
// get searchable fields for word databases
|
||||
let searchable_fields =
|
||||
self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
|
||||
// get filterable fields for facet databases
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
// get the fid of the `_geo` field.
|
||||
let geo_field_id = match self.index.fields_ids_map(self.wtxn)?.id("_geo") {
|
||||
Some(gfid) => {
|
||||
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
if is_sortable || is_filterable {
|
||||
Some(gfid)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
// let stop_words = stop_words.as_ref();
|
||||
@ -261,6 +277,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
lmdb_writer_sx.clone(),
|
||||
searchable_fields,
|
||||
faceted_fields,
|
||||
primary_key_id,
|
||||
geo_field_id,
|
||||
stop_words,
|
||||
)
|
||||
});
|
||||
@ -876,12 +894,12 @@ mod tests {
|
||||
// First we send 3 documents with an id for only one of them.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let documents = &r#"[
|
||||
{ "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5 },
|
||||
{ "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
|
||||
{ "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 },
|
||||
{ "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 },
|
||||
{ "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" },
|
||||
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
|
||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams" }
|
||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
@ -917,7 +935,7 @@ mod tests {
|
||||
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
|
||||
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
|
||||
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
|
||||
{ "objectId": 30, "title": "Hamlet" }
|
||||
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
@ -934,7 +952,7 @@ mod tests {
|
||||
assert!(external_documents_ids.get("30").is_none());
|
||||
|
||||
let content = &br#"[
|
||||
{ "objectId": 30, "title": "Hamlet" }
|
||||
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
@ -944,7 +962,7 @@ mod tests {
|
||||
assert!(external_documents_ids.get("30").is_some());
|
||||
|
||||
let content = &br#"[
|
||||
{ "objectId": 30, "title": "Hamlet" }
|
||||
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
|
@ -1,4 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
|
||||
use heed::types::ByteSlice;
|
||||
@ -6,11 +7,12 @@ use heed::{BytesDecode, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::helpers::{
|
||||
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
|
||||
self, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, valid_lmdb_key,
|
||||
CursorClonableMmap,
|
||||
};
|
||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||
use crate::update::index_documents::helpers::into_clonable_grenad;
|
||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Index, Result};
|
||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
|
||||
@ -24,6 +26,7 @@ pub(crate) enum TypedChunk {
|
||||
WordPairProximityDocids(grenad::Reader<File>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||
GeoPoints(grenad::Reader<File>),
|
||||
}
|
||||
|
||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||
@ -177,6 +180,24 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::GeoPoints(mut geo_points) => {
|
||||
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
|
||||
|
||||
while let Some((key, value)) = geo_points.next()? {
|
||||
// convert the key back to a u32 (4 bytes)
|
||||
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||
|
||||
// convert the latitude and longitude back to a f64 (8 bytes)
|
||||
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
|
||||
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||
rtree.insert(GeoPoint::new(point, docid));
|
||||
geo_faceted_docids.insert(docid);
|
||||
}
|
||||
index.put_geo_rtree(wtxn, &rtree)?;
|
||||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((RoaringBitmap::new(), is_merged_database))
|
||||
|
@ -1,17 +1,17 @@
|
||||
{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","":""}
|
||||
{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""}
|
||||
{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""}
|
||||
{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""}
|
||||
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""}
|
||||
{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""}
|
||||
{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""}
|
||||
{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""}
|
||||
{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","":""}
|
||||
{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","":""}
|
||||
{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","":""}
|
||||
{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","":""}
|
||||
{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","":""}
|
||||
{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","":""}
|
||||
{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","":""}
|
||||
{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","":""}
|
||||
{"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","":""}
|
||||
{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":""}
|
||||
{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":""}
|
||||
{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":""}
|
||||
{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":""}
|
||||
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":""}
|
||||
{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":""}
|
||||
{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"geo_rank":34692,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","_geo": { "lat": 50.78776041427129, "lng": 2.661201766290338 },"":""}
|
||||
{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":""}
|
||||
{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":""}
|
||||
{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":""}
|
||||
{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":""}
|
||||
{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":""}
|
||||
{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"geo_rank":739203,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","_geo": { "lat": 43.99150729038736, "lng": 3.606143957295055 },"":""}
|
||||
{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":""}
|
||||
{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":""}
|
||||
{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":9422437,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","_geo": { "lat": 35.06462306367058, "lng": 135.8338440354251 },"":""}
|
||||
{"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":9339230,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","_geo": { "lat": 34.39548365683149, "lng": 132.4535960928883 },"":""}
|
||||
|
@ -47,6 +47,11 @@ test_filter!(eq_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank=1")]
|
||||
test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]);
|
||||
test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]);
|
||||
test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]);
|
||||
test_filter!(geo_radius, vec![Right("_geoRadius(50.630010347667806, 3.086251829166809, 100000)")]);
|
||||
test_filter!(
|
||||
not_geo_radius,
|
||||
vec![Right("NOT _geoRadius(50.630010347667806, 3.086251829166809, 1000000)")]
|
||||
);
|
||||
test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]);
|
||||
test_filter!(
|
||||
eq_complex_filter_2,
|
||||
|
@ -6,7 +6,7 @@ use either::{Either, Left, Right};
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{hashmap, hashset};
|
||||
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index};
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
|
||||
use serde::Deserialize;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
@ -37,6 +37,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
S("_geo"),
|
||||
});
|
||||
builder.set_sortable_fields(hashset! {
|
||||
S("tag"),
|
||||
@ -99,11 +100,11 @@ pub fn expected_order(
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Sort if sort_by == [AscDesc::Asc(S("tag"))] => {
|
||||
Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
|
||||
group.sort_by_key(|d| d.sort_by_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Sort if sort_by == [AscDesc::Desc(S("tag"))] => {
|
||||
Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
|
||||
group.sort_by_key(|d| Reverse(d.sort_by_rank));
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||
}
|
||||
@ -162,6 +163,10 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
|
||||
if document.asc_desc_rank > filter.parse().unwrap() {
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
} else if filter.starts_with("_geoRadius") {
|
||||
id = (document.geo_rank < 100000).then(|| document.id.clone());
|
||||
} else if filter.starts_with("NOT _geoRadius") {
|
||||
id = (document.geo_rank > 1000000).then(|| document.id.clone());
|
||||
}
|
||||
id
|
||||
}
|
||||
@ -205,6 +210,7 @@ pub struct TestDocument {
|
||||
pub exact_rank: u32,
|
||||
pub asc_desc_rank: u32,
|
||||
pub sort_by_rank: u32,
|
||||
pub geo_rank: u32,
|
||||
pub title: String,
|
||||
pub description: String,
|
||||
pub tag: String,
|
||||
|
@ -5,7 +5,7 @@ use heed::EnvOpenOptions;
|
||||
use itertools::Itertools;
|
||||
use maplit::hashset;
|
||||
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
|
||||
use milli::{AscDesc, Criterion, Index, Search, SearchResult};
|
||||
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
|
||||
use rand::Rng;
|
||||
use Criterion::*;
|
||||
|
||||
@ -163,28 +163,28 @@ test_criterion!(
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
ALLOW_TYPOS,
|
||||
vec![Sort],
|
||||
vec![AscDesc::Asc(S("tag"))]
|
||||
vec![AscDesc::Asc(Member::Field(S("tag")))]
|
||||
);
|
||||
test_criterion!(
|
||||
sort_by_asc_disallow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
DISALLOW_TYPOS,
|
||||
vec![Sort],
|
||||
vec![AscDesc::Asc(S("tag"))]
|
||||
vec![AscDesc::Asc(Member::Field(S("tag")))]
|
||||
);
|
||||
test_criterion!(
|
||||
sort_by_desc_allow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
ALLOW_TYPOS,
|
||||
vec![Sort],
|
||||
vec![AscDesc::Desc(S("tag"))]
|
||||
vec![AscDesc::Desc(Member::Field(S("tag")))]
|
||||
);
|
||||
test_criterion!(
|
||||
sort_by_desc_disallow_typo,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
DISALLOW_TYPOS,
|
||||
vec![Sort],
|
||||
vec![AscDesc::Desc(S("tag"))]
|
||||
vec![AscDesc::Desc(Member::Field(S("tag")))]
|
||||
);
|
||||
test_criterion!(
|
||||
default_criteria_order,
|
||||
|
@ -1,6 +1,6 @@
|
||||
use big_s::S;
|
||||
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
|
||||
use milli::{AscDesc, Error, Search, UserError};
|
||||
use milli::{AscDesc, Error, Member, Search, UserError};
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
@ -16,7 +16,7 @@ fn sort_ranking_rule_missing() {
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
search.authorize_typos(true);
|
||||
search.optional_words(true);
|
||||
search.sort_criteria(vec![AscDesc::Asc(S("tag"))]);
|
||||
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
|
||||
|
||||
let result = search.execute();
|
||||
assert!(matches!(result, Err(Error::UserError(UserError::SortRankingRuleMissing))));
|
||||
|
Loading…
Reference in New Issue
Block a user