mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 20:15:07 +08:00
31c8de1cca
322: Geosearch r=ManyTheFish a=irevoire This PR introduces [basic geo-search functionalities](https://github.com/meilisearch/specifications/pull/59), it makes the engine able to index, filter and, sort by geo-point. We decided to use [the rstar library](https://docs.rs/rstar) and to save the points in [an RTree](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html) that we de/serialize in the index database [by using serde](https://serde.rs/) with [bincode](https://docs.rs/bincode). This is not an efficient way to query this tree as it will consume a lot of CPU and memory when a search is made, but at least it is an easy first way to do so. ### What we will have to do on the indexing part: - [x] Index the `_geo` fields from the documents. - [x] Create a new module with an extractor in the `extract` module that takes the `obkv_documents` and retrieves the latitude and longitude coordinates, outputting them in a `grenad::Reader` for further process. - [x] Call the extractor in the `extract::extract_documents_data` function and send the result to the `TypedChunk` module. - [x] Get the `grenad::Reader` in the `typed_chunk::write_typed_chunk_into_index` function and store all the points in the `rtree` - [x] Delete the documents from the `RTree` when deleting documents from the database. All this can be done in the `delete_documents.rs` file by getting the data structure and removing the points from it, inserting it back after the modification. - [x] Clearing the `RTree` entirely when we clear the documents from the database, everything happens in the `clear_documents.rs` file. - [x] save a Roaring bitmap of all documents containing the `_geo` field ### What we will have to do on the query part: - [x] Filter the documents at a certain distance around a point, this is done by [collecting the documents from the searched point](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html#method.nearest_neighbor_iter) while they are in range. - [x] We must introduce new `geoLowerThan` and `geoGreaterThan` variants to the `Operator` filter enum. - [x] Implement the `negative` method on both variants where the `geoGreaterThan` variant is implemented by executing the `geoLowerThan` and removing the results found from the whole list of geo faceted documents. - [x] Add the `_geoRadius` function in the pest parser. - [x] Introduce a `_geo` ascending ranking function that takes a point in parameter, ~~this function must keep the iterator on the `RTree` and make it peekable~~ This was not possible for now, we had to collect the whole iterator. Only the documents that are part of the candidates must be sent too! - [x] This ascending ranking rule will only be active if the search is set up with the `_geoPoint` parameter that indicates the center point of the ascending ranking rule. ----------- - On Meilisearch part: We must introduce a new concept, returning the documents with a new `_geoDistance` field when it passed by the `_geo` ranking rule, this has never been done before. We could maybe just do it afterward when the documents have been retrieved from the database, computing the distance from the `_geoPoint` and all of the documents to be returned. Co-authored-by: Irevoire <tamo@meilisearch.com> Co-authored-by: cvermand <33010418+bidoubiwa@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com>
58 lines
1.6 KiB
TOML
58 lines
1.6 KiB
TOML
[package]
|
|
name = "milli"
|
|
version = "0.13.1"
|
|
authors = ["Kerollmops <clement@meilisearch.com>"]
|
|
edition = "2018"
|
|
|
|
[dependencies]
|
|
bstr = "0.2.15"
|
|
byteorder = "1.4.2"
|
|
chrono = { version = "0.4.19", features = ["serde"] }
|
|
concat-arrays = "0.1.2"
|
|
crossbeam-channel = "0.5.1"
|
|
csv = "1.1.5"
|
|
either = "1.6.1"
|
|
flate2 = "1.0.20"
|
|
fst = "0.4.5"
|
|
fxhash = "0.2.1"
|
|
grenad = { version = "0.3.1", default-features = false, features = ["tempfile"] }
|
|
geoutils = "0.4.1"
|
|
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
|
human_format = "1.0.3"
|
|
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
|
linked-hash-map = "0.5.4"
|
|
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
|
|
memmap = "0.7.0"
|
|
obkv = "0.2.0"
|
|
once_cell = "1.5.2"
|
|
ordered-float = "2.1.1"
|
|
rayon = "1.5.0"
|
|
roaring = "0.6.6"
|
|
rstar = { version = "0.9.1", features = ["serde"] }
|
|
serde = { version = "1.0.123", features = ["derive"] }
|
|
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
|
slice-group-by = "0.2.6"
|
|
smallstr = { version = "0.2.0", features = ["serde"] }
|
|
smallvec = "1.6.1"
|
|
tempfile = "3.2.0"
|
|
uuid = { version = "0.8.2", features = ["v4"] }
|
|
|
|
# facet filter parser
|
|
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
|
pest_derive = "2.1.0"
|
|
|
|
# documents words self-join
|
|
itertools = "0.10.0"
|
|
|
|
# logging
|
|
log = "0.4.14"
|
|
logging_timer = "1.0.0"
|
|
|
|
[dev-dependencies]
|
|
big_s = "1.0.2"
|
|
maplit = "1.0.2"
|
|
rand = "0.8.3"
|
|
|
|
[features]
|
|
default = []
|