mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 19:45:05 +08:00
get rids of the index crate + the document_types crate
This commit is contained in:
parent
9a74ea0943
commit
667c282e19
271
Cargo.lock
generated
271
Cargo.lock
generated
@ -355,12 +355,6 @@ dependencies = [
|
|||||||
"critical-section",
|
"critical-section",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "atomic_refcell"
|
|
||||||
version = "0.1.8"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "73b5e5f48b927f04e952dedc932f31995a65a0bf65ec971c74436e51bf6e970d"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atty"
|
name = "atty"
|
||||||
version = "0.2.14"
|
version = "0.2.14"
|
||||||
@ -1023,17 +1017,6 @@ dependencies = [
|
|||||||
"syn 1.0.101",
|
"syn 1.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "derivative"
|
|
||||||
version = "2.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2 1.0.46",
|
|
||||||
"quote 1.0.21",
|
|
||||||
"syn 1.0.101",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_builder"
|
name = "derive_builder"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
@ -1084,12 +1067,6 @@ version = "1.3.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "08ff6a4480d42625e59bc4e8b5dc3723279fd24d83afe8aa20df217276261cd6"
|
checksum = "08ff6a4480d42625e59bc4e8b5dc3723279fd24d83afe8aa20df217276261cd6"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "difflib"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "digest"
|
name = "digest"
|
||||||
version = "0.10.5"
|
version = "0.10.5"
|
||||||
@ -1122,24 +1099,6 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "document-formats"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"csv",
|
|
||||||
"either",
|
|
||||||
"meilisearch-types",
|
|
||||||
"milli 0.33.0",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "downcast"
|
|
||||||
version = "0.11.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "0.29.0"
|
version = "0.29.0"
|
||||||
@ -1148,7 +1107,6 @@ dependencies = [
|
|||||||
"big_s",
|
"big_s",
|
||||||
"flate2",
|
"flate2",
|
||||||
"http",
|
"http",
|
||||||
"index",
|
|
||||||
"index-scheduler",
|
"index-scheduler",
|
||||||
"insta",
|
"insta",
|
||||||
"log",
|
"log",
|
||||||
@ -1351,8 +1309,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "0.33.0"
|
version = "0.33.4"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"nom",
|
"nom",
|
||||||
"nom_locate",
|
"nom_locate",
|
||||||
@ -1379,8 +1337,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "0.33.0"
|
version = "0.33.4"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
@ -1393,15 +1351,6 @@ dependencies = [
|
|||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "float-cmp"
|
|
||||||
version = "0.9.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4"
|
|
||||||
dependencies = [
|
|
||||||
"num-traits",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fnv"
|
name = "fnv"
|
||||||
version = "1.0.7"
|
version = "1.0.7"
|
||||||
@ -1417,18 +1366,6 @@ dependencies = [
|
|||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fragile"
|
|
||||||
version = "1.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "85dcb89d2b10c5f6133de2efd8c11959ce9dbb46a2f7a4cab208c4eeda6ce1ab"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fs_extra"
|
|
||||||
version = "1.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fst"
|
name = "fst"
|
||||||
version = "0.4.7"
|
version = "0.4.7"
|
||||||
@ -1828,35 +1765,6 @@ dependencies = [
|
|||||||
"unicode-normalization",
|
"unicode-normalization",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "index"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"bincode",
|
|
||||||
"csv",
|
|
||||||
"derivative",
|
|
||||||
"either",
|
|
||||||
"file-store",
|
|
||||||
"fst",
|
|
||||||
"indexmap",
|
|
||||||
"lazy_static",
|
|
||||||
"log",
|
|
||||||
"meilisearch-types",
|
|
||||||
"milli 0.33.0",
|
|
||||||
"nelson",
|
|
||||||
"obkv",
|
|
||||||
"permissive-json-pointer",
|
|
||||||
"proptest",
|
|
||||||
"proptest-derive",
|
|
||||||
"regex",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
"thiserror",
|
|
||||||
"time",
|
|
||||||
"uuid 1.1.2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@ -1867,13 +1775,10 @@ dependencies = [
|
|||||||
"crossbeam",
|
"crossbeam",
|
||||||
"csv",
|
"csv",
|
||||||
"derive_builder",
|
"derive_builder",
|
||||||
"document-formats",
|
|
||||||
"file-store",
|
"file-store",
|
||||||
"index",
|
|
||||||
"insta",
|
"insta",
|
||||||
"log",
|
"log",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"milli 0.33.0",
|
|
||||||
"nelson",
|
"nelson",
|
||||||
"roaring 0.9.0",
|
"roaring 0.9.0",
|
||||||
"serde",
|
"serde",
|
||||||
@ -1983,8 +1888,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "0.33.0"
|
version = "0.33.4"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
@ -2374,7 +2279,6 @@ dependencies = [
|
|||||||
"cargo_toml",
|
"cargo_toml",
|
||||||
"clap 4.0.9",
|
"clap 4.0.9",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"document-formats",
|
|
||||||
"either",
|
"either",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"file-store",
|
"file-store",
|
||||||
@ -2384,7 +2288,6 @@ dependencies = [
|
|||||||
"futures-util",
|
"futures-util",
|
||||||
"hex",
|
"hex",
|
||||||
"http",
|
"http",
|
||||||
"index",
|
|
||||||
"index-scheduler",
|
"index-scheduler",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"itertools",
|
"itertools",
|
||||||
@ -2394,7 +2297,6 @@ dependencies = [
|
|||||||
"manifest-dir-macros",
|
"manifest-dir-macros",
|
||||||
"maplit",
|
"maplit",
|
||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"meilisearch-lib",
|
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"mimalloc",
|
"mimalloc",
|
||||||
"mime",
|
"mime",
|
||||||
@ -2402,6 +2304,7 @@ dependencies = [
|
|||||||
"obkv",
|
"obkv",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
|
"permissive-json-pointer",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"platform-dirs",
|
"platform-dirs",
|
||||||
"prometheus",
|
"prometheus",
|
||||||
@ -2437,78 +2340,14 @@ dependencies = [
|
|||||||
"zip",
|
"zip",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "meilisearch-lib"
|
|
||||||
version = "0.29.1"
|
|
||||||
dependencies = [
|
|
||||||
"actix-rt",
|
|
||||||
"actix-web",
|
|
||||||
"anyhow",
|
|
||||||
"async-stream",
|
|
||||||
"async-trait",
|
|
||||||
"atomic_refcell",
|
|
||||||
"byte-unit",
|
|
||||||
"bytes",
|
|
||||||
"clap 4.0.9",
|
|
||||||
"crossbeam-channel",
|
|
||||||
"csv",
|
|
||||||
"derivative",
|
|
||||||
"either",
|
|
||||||
"file-store",
|
|
||||||
"flate2",
|
|
||||||
"fs_extra",
|
|
||||||
"fst",
|
|
||||||
"futures",
|
|
||||||
"futures-util",
|
|
||||||
"http",
|
|
||||||
"index",
|
|
||||||
"index-scheduler",
|
|
||||||
"indexmap",
|
|
||||||
"itertools",
|
|
||||||
"lazy_static",
|
|
||||||
"log",
|
|
||||||
"meilisearch-auth",
|
|
||||||
"meilisearch-types",
|
|
||||||
"milli 0.34.0",
|
|
||||||
"mime",
|
|
||||||
"mockall",
|
|
||||||
"nelson",
|
|
||||||
"num_cpus",
|
|
||||||
"obkv",
|
|
||||||
"once_cell",
|
|
||||||
"page_size",
|
|
||||||
"parking_lot",
|
|
||||||
"paste",
|
|
||||||
"permissive-json-pointer",
|
|
||||||
"proptest",
|
|
||||||
"proptest-derive",
|
|
||||||
"rand",
|
|
||||||
"rayon",
|
|
||||||
"regex",
|
|
||||||
"reqwest",
|
|
||||||
"roaring 0.10.1",
|
|
||||||
"rustls",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
"siphasher",
|
|
||||||
"slice-group-by",
|
|
||||||
"sysinfo",
|
|
||||||
"tar",
|
|
||||||
"tempfile",
|
|
||||||
"thiserror",
|
|
||||||
"time",
|
|
||||||
"tokio",
|
|
||||||
"uuid 1.1.2",
|
|
||||||
"walkdir",
|
|
||||||
"whoami",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "0.29.1"
|
version = "0.29.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"milli 0.33.0",
|
"csv",
|
||||||
|
"either",
|
||||||
|
"milli 0.33.4",
|
||||||
"proptest",
|
"proptest",
|
||||||
"proptest-derive",
|
"proptest-derive",
|
||||||
"serde",
|
"serde",
|
||||||
@ -2542,8 +2381,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "0.33.0"
|
version = "0.33.4"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bimap",
|
"bimap",
|
||||||
"bincode",
|
"bincode",
|
||||||
@ -2554,15 +2393,15 @@ dependencies = [
|
|||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
"either",
|
"either",
|
||||||
"filter-parser 0.33.0",
|
"filter-parser 0.33.4",
|
||||||
"flatten-serde-json 0.33.0",
|
"flatten-serde-json 0.33.4",
|
||||||
"fst",
|
"fst",
|
||||||
"fxhash",
|
"fxhash",
|
||||||
"geoutils 0.4.1",
|
"geoutils 0.4.1",
|
||||||
"grenad",
|
"grenad",
|
||||||
"heed",
|
"heed",
|
||||||
"itertools",
|
"itertools",
|
||||||
"json-depth-checker 0.33.0",
|
"json-depth-checker 0.33.4",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
"log",
|
"log",
|
||||||
"logging_timer",
|
"logging_timer",
|
||||||
@ -2682,33 +2521,6 @@ dependencies = [
|
|||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "mockall"
|
|
||||||
version = "0.11.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e2be9a9090bc1cac2930688fa9478092a64c6a92ddc6ae0692d46b37d9cab709"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"downcast",
|
|
||||||
"fragile",
|
|
||||||
"lazy_static",
|
|
||||||
"mockall_derive",
|
|
||||||
"predicates",
|
|
||||||
"predicates-tree",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "mockall_derive"
|
|
||||||
version = "0.11.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "86d702a0530a0141cf4ed147cf5ec7be6f2c187d4e37fcbefc39cf34116bfe8f"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"proc-macro2 1.0.46",
|
|
||||||
"quote 1.0.21",
|
|
||||||
"syn 1.0.101",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nb"
|
name = "nb"
|
||||||
version = "0.1.3"
|
version = "0.1.3"
|
||||||
@ -2750,12 +2562,6 @@ dependencies = [
|
|||||||
"nom",
|
"nom",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "normalize-line-endings"
|
|
||||||
version = "0.3.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ntapi"
|
name = "ntapi"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@ -3081,36 +2887,6 @@ version = "0.2.16"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
|
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "predicates"
|
|
||||||
version = "2.1.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a5aab5be6e4732b473071984b3164dbbfb7a3674d30ea5ff44410b6bcd960c3c"
|
|
||||||
dependencies = [
|
|
||||||
"difflib",
|
|
||||||
"float-cmp",
|
|
||||||
"itertools",
|
|
||||||
"normalize-line-endings",
|
|
||||||
"predicates-core",
|
|
||||||
"regex",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "predicates-core"
|
|
||||||
version = "1.0.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "da1c2388b1513e1b605fcec39a95e0a9e8ef088f71443ef37099fa9ae6673fcb"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "predicates-tree"
|
|
||||||
version = "1.0.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4d86de6de25020a36c6d3643a86d9a6a9f552107c0559c60ea03551b5e16c032"
|
|
||||||
dependencies = [
|
|
||||||
"predicates-core",
|
|
||||||
"termtree",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro-error"
|
name = "proc-macro-error"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
@ -3941,12 +3717,6 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "termtree"
|
|
||||||
version = "0.2.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "textwrap"
|
name = "textwrap"
|
||||||
version = "0.15.1"
|
version = "0.15.1"
|
||||||
@ -4413,17 +4183,6 @@ dependencies = [
|
|||||||
"hashbrown 0.7.2",
|
"hashbrown 0.7.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "whoami"
|
|
||||||
version = "1.2.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d6631b6a2fd59b1841b622e8f1a7ad241ef0a46f2d580464ce8140ac94cbd571"
|
|
||||||
dependencies = [
|
|
||||||
"bumpalo",
|
|
||||||
"wasm-bindgen",
|
|
||||||
"web-sys",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
|
@ -5,8 +5,6 @@ members = [
|
|||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"index-scheduler",
|
"index-scheduler",
|
||||||
"document-formats",
|
|
||||||
"index",
|
|
||||||
"dump",
|
"dump",
|
||||||
"file-store",
|
"file-store",
|
||||||
"permissive-json-pointer",
|
"permissive-json-pointer",
|
||||||
|
@ -1,14 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "document-formats"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
csv = "1.1.6"
|
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
|
||||||
either = { version = "1.6.1", features = ["serde"] }
|
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" }
|
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
|
@ -1,155 +0,0 @@
|
|||||||
use std::borrow::Borrow;
|
|
||||||
use std::fmt::{self, Debug, Display};
|
|
||||||
use std::io::{self, BufReader, Read, Seek, Write};
|
|
||||||
|
|
||||||
use either::Either;
|
|
||||||
use meilisearch_types::error::{Code, ErrorCode};
|
|
||||||
use meilisearch_types::internal_error;
|
|
||||||
use milli::documents::{DocumentsBatchBuilder, Error};
|
|
||||||
use milli::Object;
|
|
||||||
use serde::Deserialize;
|
|
||||||
|
|
||||||
type Result<T> = std::result::Result<T, DocumentFormatError>;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum PayloadType {
|
|
||||||
Ndjson,
|
|
||||||
Json,
|
|
||||||
Csv,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for PayloadType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
PayloadType::Ndjson => f.write_str("ndjson"),
|
|
||||||
PayloadType::Json => f.write_str("json"),
|
|
||||||
PayloadType::Csv => f.write_str("csv"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum DocumentFormatError {
|
|
||||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
|
||||||
MalformedPayload(Error, PayloadType),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for DocumentFormatError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e),
|
|
||||||
Self::MalformedPayload(me, b) => match me.borrow() {
|
|
||||||
Error::Json(se) => {
|
|
||||||
// https://github.com/meilisearch/meilisearch/issues/2107
|
|
||||||
// The user input maybe insanely long. We need to truncate it.
|
|
||||||
let mut serde_msg = se.to_string();
|
|
||||||
let ellipsis = "...";
|
|
||||||
if serde_msg.len() > 100 + ellipsis.len() {
|
|
||||||
serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis);
|
|
||||||
}
|
|
||||||
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.",
|
|
||||||
b, serde_msg
|
|
||||||
)
|
|
||||||
}
|
|
||||||
_ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for DocumentFormatError {}
|
|
||||||
|
|
||||||
impl From<(PayloadType, Error)> for DocumentFormatError {
|
|
||||||
fn from((ty, error): (PayloadType, Error)) -> Self {
|
|
||||||
match error {
|
|
||||||
Error::Io(e) => Self::Internal(Box::new(e)),
|
|
||||||
e => Self::MalformedPayload(e, ty),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ErrorCode for DocumentFormatError {
|
|
||||||
fn error_code(&self) -> Code {
|
|
||||||
match self {
|
|
||||||
DocumentFormatError::Internal(_) => Code::Internal,
|
|
||||||
DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
internal_error!(DocumentFormatError: io::Error);
|
|
||||||
|
|
||||||
/// Reads CSV from input and write an obkv batch to writer.
|
|
||||||
pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
|
||||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
|
||||||
|
|
||||||
let csv = csv::Reader::from_reader(input);
|
|
||||||
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
|
|
||||||
|
|
||||||
let count = builder.documents_count();
|
|
||||||
let _ = builder
|
|
||||||
.into_inner()
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
|
|
||||||
Ok(count as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reads JSON Lines from input and write an obkv batch to writer.
|
|
||||||
pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
|
||||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
|
||||||
let reader = BufReader::new(input);
|
|
||||||
|
|
||||||
for result in serde_json::Deserializer::from_reader(reader).into_iter() {
|
|
||||||
let object = result
|
|
||||||
.map_err(Error::Json)
|
|
||||||
.map_err(|e| (PayloadType::Ndjson, e))?;
|
|
||||||
builder
|
|
||||||
.append_json_object(&object)
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let count = builder.documents_count();
|
|
||||||
let _ = builder
|
|
||||||
.into_inner()
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
|
|
||||||
Ok(count as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reads JSON from input and write an obkv batch to writer.
|
|
||||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
|
||||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
|
||||||
let reader = BufReader::new(input);
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
|
||||||
#[serde(transparent)]
|
|
||||||
struct ArrayOrSingleObject {
|
|
||||||
#[serde(with = "either::serde_untagged")]
|
|
||||||
inner: Either<Vec<Object>, Object>,
|
|
||||||
}
|
|
||||||
|
|
||||||
let content: ArrayOrSingleObject = serde_json::from_reader(reader)
|
|
||||||
.map_err(Error::Json)
|
|
||||||
.map_err(|e| (PayloadType::Json, e))?;
|
|
||||||
|
|
||||||
for object in content.inner.map_right(|o| vec![o]).into_inner() {
|
|
||||||
builder
|
|
||||||
.append_json_object(&object)
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let count = builder.documents_count();
|
|
||||||
let _ = builder
|
|
||||||
.into_inner()
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
|
|
||||||
Ok(count as usize)
|
|
||||||
}
|
|
@ -6,7 +6,6 @@ edition = "2021"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
index = { path = "../index" }
|
|
||||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
serde = { version = "1.0.136", features = ["derive"] }
|
||||||
|
@ -11,10 +11,7 @@ bincode = "1.3.3"
|
|||||||
csv = "1.1.6"
|
csv = "1.1.6"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" }
|
|
||||||
index = { path = "../index" }
|
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
document-formats = { path = "../document-formats" }
|
|
||||||
roaring = "0.9.0"
|
roaring = "0.9.0"
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
serde = { version = "1.0.136", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
use milli::update::IndexDocumentsMethod::{self, ReplaceDocuments, UpdateDocuments};
|
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||||
|
self, ReplaceDocuments, UpdateDocuments,
|
||||||
|
};
|
||||||
use std::ops::ControlFlow::{self, Break, Continue};
|
use std::ops::ControlFlow::{self, Break, Continue};
|
||||||
|
|
||||||
use crate::{task::Kind, TaskId};
|
use crate::{task::Kind, TaskId};
|
||||||
|
@ -3,14 +3,19 @@ use crate::{
|
|||||||
task::{Details, Kind, KindWithContent, Status, Task},
|
task::{Details, Kind, KindWithContent, Status, Task},
|
||||||
Error, IndexScheduler, Result, TaskId,
|
Error, IndexScheduler, Result, TaskId,
|
||||||
};
|
};
|
||||||
use index::apply_settings_to_builder;
|
|
||||||
use index::error::IndexError;
|
|
||||||
use index::{Settings, Unchecked};
|
|
||||||
use log::{debug, info};
|
use log::{debug, info};
|
||||||
use milli::heed::{RoTxn, RwTxn};
|
use meilisearch_types::milli::update::IndexDocumentsConfig;
|
||||||
use milli::update::IndexDocumentsConfig;
|
use meilisearch_types::milli::update::{
|
||||||
use milli::update::{DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod};
|
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod,
|
||||||
use milli::{documents::DocumentsBatchReader, BEU32};
|
};
|
||||||
|
use meilisearch_types::milli::{
|
||||||
|
self, documents::DocumentsBatchReader, update::Settings as MilliSettings, BEU32,
|
||||||
|
};
|
||||||
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
|
use meilisearch_types::{
|
||||||
|
heed::{RoTxn, RwTxn},
|
||||||
|
Index,
|
||||||
|
};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
@ -527,7 +532,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
if let Some(primary_key) = primary_key.clone() {
|
if let Some(primary_key) = primary_key.clone() {
|
||||||
let mut index_wtxn = index.write_txn()?;
|
let mut index_wtxn = index.write_txn()?;
|
||||||
let mut builder = milli::update::Settings::new(
|
let mut builder = MilliSettings::new(
|
||||||
&mut index_wtxn,
|
&mut index_wtxn,
|
||||||
&index,
|
&index,
|
||||||
self.index_mapper.indexer_config(),
|
self.index_mapper.indexer_config(),
|
||||||
@ -576,7 +581,7 @@ impl IndexScheduler {
|
|||||||
fn apply_index_operation<'txn, 'i>(
|
fn apply_index_operation<'txn, 'i>(
|
||||||
&self,
|
&self,
|
||||||
index_wtxn: &'txn mut RwTxn<'i, '_>,
|
index_wtxn: &'txn mut RwTxn<'i, '_>,
|
||||||
index: &'i milli::Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
match operation {
|
match operation {
|
||||||
@ -639,7 +644,7 @@ impl IndexScheduler {
|
|||||||
for content_uuid in content_files.into_iter() {
|
for content_uuid in content_files.into_iter() {
|
||||||
let content_file = self.file_store.get_update(content_uuid)?;
|
let content_file = self.file_store.get_update(content_uuid)?;
|
||||||
let reader = DocumentsBatchReader::from_reader(content_file)
|
let reader = DocumentsBatchReader::from_reader(content_file)
|
||||||
.map_err(IndexError::from)?;
|
.map_err(milli::Error::from)?;
|
||||||
let (new_builder, user_result) = builder.add_documents(reader)?;
|
let (new_builder, user_result) = builder.add_documents(reader)?;
|
||||||
builder = new_builder;
|
builder = new_builder;
|
||||||
|
|
||||||
@ -648,7 +653,7 @@ impl IndexScheduler {
|
|||||||
indexed_documents: count,
|
indexed_documents: count,
|
||||||
number_of_documents: count,
|
number_of_documents: count,
|
||||||
}),
|
}),
|
||||||
Err(e) => Err(IndexError::from(e)),
|
Err(e) => Err(milli::Error::from(e)),
|
||||||
};
|
};
|
||||||
|
|
||||||
results.push(user_result);
|
results.push(user_result);
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use meilisearch_types::error::{Code, ErrorCode};
|
use meilisearch_types::error::{Code, ErrorCode};
|
||||||
use milli::heed;
|
use meilisearch_types::heed;
|
||||||
|
use meilisearch_types::milli;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::TaskId;
|
use crate::TaskId;
|
||||||
@ -26,8 +27,6 @@ pub enum Error {
|
|||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Milli(#[from] milli::Error),
|
Milli(#[from] milli::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
IndexError(#[from] index::error::IndexError),
|
|
||||||
#[error(transparent)]
|
|
||||||
FileStore(#[from] file_store::Error),
|
FileStore(#[from] file_store::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
IoError(#[from] std::io::Error),
|
IoError(#[from] std::io::Error),
|
||||||
@ -48,7 +47,6 @@ impl ErrorCode for Error {
|
|||||||
// TODO: TAMO: are all these errors really internal?
|
// TODO: TAMO: are all these errors really internal?
|
||||||
Error::Heed(_) => Code::Internal,
|
Error::Heed(_) => Code::Internal,
|
||||||
Error::Milli(_) => Code::Internal,
|
Error::Milli(_) => Code::Internal,
|
||||||
Error::IndexError(_) => Code::Internal,
|
|
||||||
Error::FileStore(_) => Code::Internal,
|
Error::FileStore(_) => Code::Internal,
|
||||||
Error::IoError(_) => Code::Internal,
|
Error::IoError(_) => Code::Internal,
|
||||||
Error::Anyhow(_) => Code::Internal,
|
Error::Anyhow(_) => Code::Internal,
|
||||||
|
@ -5,13 +5,12 @@ use std::sync::{Arc, RwLock};
|
|||||||
use std::{fs, thread};
|
use std::{fs, thread};
|
||||||
|
|
||||||
use log::error;
|
use log::error;
|
||||||
use milli::Index;
|
use meilisearch_types::heed::types::{SerdeBincode, Str};
|
||||||
|
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
|
||||||
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
|
use meilisearch_types::milli::Index;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use milli::heed::types::{SerdeBincode, Str};
|
|
||||||
use milli::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
|
|
||||||
use milli::update::IndexerConfig;
|
|
||||||
|
|
||||||
use self::IndexStatus::{Available, BeingDeleted};
|
use self::IndexStatus::{Available, BeingDeleted};
|
||||||
use crate::{Error, Result};
|
use crate::{Error, Result};
|
||||||
|
|
||||||
@ -70,7 +69,7 @@ impl IndexMapper {
|
|||||||
fs::create_dir_all(&index_path)?;
|
fs::create_dir_all(&index_path)?;
|
||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(self.index_size);
|
options.map_size(self.index_size);
|
||||||
Ok(milli::Index::new(options, &index_path)?)
|
Ok(Index::new(options, &index_path)?)
|
||||||
}
|
}
|
||||||
error => error,
|
error => error,
|
||||||
}
|
}
|
||||||
@ -153,7 +152,7 @@ impl IndexMapper {
|
|||||||
fs::create_dir_all(&index_path)?;
|
fs::create_dir_all(&index_path)?;
|
||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(self.index_size);
|
options.map_size(self.index_size);
|
||||||
let index = milli::Index::new(options, &index_path)?;
|
let index = Index::new(options, &index_path)?;
|
||||||
entry.insert(Available(index.clone()));
|
entry.insert(Available(index.clone()));
|
||||||
index
|
index
|
||||||
}
|
}
|
||||||
|
@ -7,8 +7,6 @@ mod snapshot;
|
|||||||
pub mod task;
|
pub mod task;
|
||||||
mod utils;
|
mod utils;
|
||||||
|
|
||||||
pub use milli;
|
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
pub type TaskId = u32;
|
pub type TaskId = u32;
|
||||||
|
|
||||||
@ -26,10 +24,10 @@ use synchronoise::SignalEvent;
|
|||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use milli::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||||
use milli::heed::{self, Database, Env};
|
use meilisearch_types::heed::{self, Database, Env};
|
||||||
use milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use milli::{Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{Index, RoaringBitmapCodec, BEU32};
|
||||||
|
|
||||||
use crate::index_mapper::IndexMapper;
|
use crate::index_mapper::IndexMapper;
|
||||||
use crate::task::Task;
|
use crate::task::Task;
|
||||||
@ -452,7 +450,7 @@ impl IndexScheduler {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use insta::*;
|
use insta::*;
|
||||||
use milli::update::IndexDocumentsMethod::ReplaceDocuments;
|
use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
@ -512,7 +510,8 @@ mod tests {
|
|||||||
.create_update_file_with_uuid(file_uuid)
|
.create_update_file_with_uuid(file_uuid)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let documents_count =
|
let documents_count =
|
||||||
document_formats::read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
|
meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut())
|
||||||
|
.unwrap() as u64;
|
||||||
(file, documents_count)
|
(file, documents_count)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -779,7 +778,8 @@ mod tests {
|
|||||||
|
|
||||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
|
||||||
let documents_count =
|
let documents_count =
|
||||||
document_formats::read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
|
meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut())
|
||||||
|
.unwrap() as u64;
|
||||||
index_scheduler
|
index_scheduler
|
||||||
.register(KindWithContent::DocumentImport {
|
.register(KindWithContent::DocumentImport {
|
||||||
index_uid: S("doggos"),
|
index_uid: S("doggos"),
|
||||||
|
@ -1,10 +1,8 @@
|
|||||||
use milli::{
|
use meilisearch_types::heed::{
|
||||||
heed::{
|
|
||||||
types::{OwnedType, SerdeBincode, SerdeJson, Str},
|
types::{OwnedType, SerdeBincode, SerdeJson, Str},
|
||||||
Database, RoTxn,
|
Database, RoTxn,
|
||||||
},
|
|
||||||
RoaringBitmapCodec, BEU32,
|
|
||||||
};
|
};
|
||||||
|
use meilisearch_types::milli::{RoaringBitmapCodec, BEU32};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use index::{Settings, Unchecked};
|
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use milli::update::IndexDocumentsMethod;
|
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||||
|
use meilisearch_types::settings::{Settings, Unchecked};
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
use std::{
|
use std::{
|
||||||
@ -543,7 +543,7 @@ fn serialize_duration<S: Serializer>(
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use milli::heed::{types::SerdeJson, BytesDecode, BytesEncode};
|
use meilisearch_types::heed::{types::SerdeJson, BytesDecode, BytesEncode};
|
||||||
|
|
||||||
use crate::assert_smol_debug_snapshot;
|
use crate::assert_smol_debug_snapshot;
|
||||||
|
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
//! Utility functions on the DBs. Mainly getter and setters.
|
//! Utility functions on the DBs. Mainly getter and setters.
|
||||||
|
|
||||||
use milli::{
|
use meilisearch_types::heed::{types::DecodeIgnore, RoTxn, RwTxn};
|
||||||
heed::{types::DecodeIgnore, RoTxn, RwTxn},
|
use meilisearch_types::milli::BEU32;
|
||||||
BEU32,
|
|
||||||
};
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -1,33 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "index"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
anyhow = "1.0.64"
|
|
||||||
bincode = "1.3.3"
|
|
||||||
csv = "1.1.6"
|
|
||||||
derivative = "2.2.0"
|
|
||||||
either = { version = "1.6.1", features = ["serde"] }
|
|
||||||
fst = "0.4.7"
|
|
||||||
indexmap = { version = "1.8.0", features = ["serde-1"] }
|
|
||||||
lazy_static = "1.4.0"
|
|
||||||
log = "0.4.14"
|
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" }
|
|
||||||
obkv = "0.2.0"
|
|
||||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
|
||||||
regex = "1.5.5"
|
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
|
||||||
thiserror = "1.0.30"
|
|
||||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
|
||||||
file-store = { path = "../file-store" }
|
|
||||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
|
||||||
proptest = "1.0.0"
|
|
||||||
proptest-derive = "0.3.0"
|
|
@ -1,160 +0,0 @@
|
|||||||
use std::fs::{create_dir_all, File};
|
|
||||||
use std::io::{BufReader, Seek, SeekFrom, Write};
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
use milli::documents::DocumentsBatchReader;
|
|
||||||
use milli::heed::{EnvOpenOptions, RoTxn};
|
|
||||||
use milli::update::{IndexDocumentsConfig, IndexerConfig};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use crate::document_formats::read_ndjson;
|
|
||||||
use crate::index::updates::apply_settings_to_builder;
|
|
||||||
|
|
||||||
use super::error::Result;
|
|
||||||
use super::{index::Index, Settings, Unchecked};
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
struct DumpMeta {
|
|
||||||
settings: Settings<Unchecked>,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
const META_FILE_NAME: &str = "meta.json";
|
|
||||||
const DATA_FILE_NAME: &str = "documents.jsonl";
|
|
||||||
|
|
||||||
impl Index {
|
|
||||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
|
||||||
// acquire write txn make sure any ongoing write is finished before we start.
|
|
||||||
let txn = self.write_txn()?;
|
|
||||||
let path = path.as_ref().join(format!("indexes/{}", self.uuid));
|
|
||||||
|
|
||||||
create_dir_all(&path)?;
|
|
||||||
|
|
||||||
self.dump_documents(&txn, &path)?;
|
|
||||||
self.dump_meta(&txn, &path)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
|
||||||
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
|
|
||||||
let mut document_file = File::create(&document_file_path)?;
|
|
||||||
|
|
||||||
let documents = self.all_documents(txn)?;
|
|
||||||
let fields_ids_map = self.fields_ids_map(txn)?;
|
|
||||||
|
|
||||||
// dump documents
|
|
||||||
let mut json_map = IndexMap::new();
|
|
||||||
for document in documents {
|
|
||||||
let (_, reader) = document?;
|
|
||||||
|
|
||||||
for (fid, bytes) in reader.iter() {
|
|
||||||
if let Some(name) = fields_ids_map.name(fid) {
|
|
||||||
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
serde_json::to_writer(&mut document_file, &json_map)?;
|
|
||||||
document_file.write_all(b"\n")?;
|
|
||||||
|
|
||||||
json_map.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
|
||||||
let meta_file_path = path.as_ref().join(META_FILE_NAME);
|
|
||||||
let mut meta_file = File::create(&meta_file_path)?;
|
|
||||||
|
|
||||||
let settings = self.settings_txn(txn)?.into_unchecked();
|
|
||||||
let primary_key = self.primary_key(txn)?.map(String::from);
|
|
||||||
let meta = DumpMeta {
|
|
||||||
settings,
|
|
||||||
primary_key,
|
|
||||||
};
|
|
||||||
|
|
||||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn load_dump(
|
|
||||||
src: impl AsRef<Path>,
|
|
||||||
dst: impl AsRef<Path>,
|
|
||||||
size: usize,
|
|
||||||
indexer_config: &IndexerConfig,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let dir_name = src
|
|
||||||
.as_ref()
|
|
||||||
.file_name()
|
|
||||||
.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
|
||||||
|
|
||||||
let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
|
||||||
create_dir_all(&dst_dir_path)?;
|
|
||||||
|
|
||||||
let meta_path = src.as_ref().join(META_FILE_NAME);
|
|
||||||
let meta_file = File::open(meta_path)?;
|
|
||||||
let DumpMeta {
|
|
||||||
settings,
|
|
||||||
primary_key,
|
|
||||||
} = serde_json::from_reader(meta_file)?;
|
|
||||||
let settings = settings.check();
|
|
||||||
|
|
||||||
let mut options = EnvOpenOptions::new();
|
|
||||||
options.map_size(size);
|
|
||||||
let index = milli::Index::new(options, &dst_dir_path)?;
|
|
||||||
|
|
||||||
let mut txn = index.write_txn()?;
|
|
||||||
|
|
||||||
// Apply settings first
|
|
||||||
let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config);
|
|
||||||
|
|
||||||
if let Some(primary_key) = primary_key {
|
|
||||||
builder.set_primary_key(primary_key);
|
|
||||||
}
|
|
||||||
|
|
||||||
apply_settings_to_builder(&settings, &mut builder);
|
|
||||||
|
|
||||||
builder.execute(|_| ())?;
|
|
||||||
|
|
||||||
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
|
||||||
let reader = BufReader::new(File::open(&document_file_path)?);
|
|
||||||
|
|
||||||
let mut tmp_doc_file = tempfile::tempfile()?;
|
|
||||||
|
|
||||||
let empty = match read_ndjson(reader, &mut tmp_doc_file) {
|
|
||||||
// if there was no document in the file it's because the index was empty
|
|
||||||
Ok(0) => true,
|
|
||||||
Ok(_) => false,
|
|
||||||
Err(e) => return Err(e.into()),
|
|
||||||
};
|
|
||||||
|
|
||||||
if !empty {
|
|
||||||
tmp_doc_file.seek(SeekFrom::Start(0))?;
|
|
||||||
|
|
||||||
let documents_reader = DocumentsBatchReader::from_reader(tmp_doc_file)?;
|
|
||||||
|
|
||||||
//If the document file is empty, we don't perform the document addition, to prevent
|
|
||||||
//a primary key error to be thrown.
|
|
||||||
let config = IndexDocumentsConfig::default();
|
|
||||||
let builder = milli::update::IndexDocuments::new(
|
|
||||||
&mut txn,
|
|
||||||
&index,
|
|
||||||
indexer_config,
|
|
||||||
config,
|
|
||||||
|_| (),
|
|
||||||
)?;
|
|
||||||
let (builder, user_error) = builder.add_documents(documents_reader)?;
|
|
||||||
user_error?;
|
|
||||||
builder.execute()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
txn.commit()?;
|
|
||||||
index.prepare_for_closing().wait();
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,122 +0,0 @@
|
|||||||
use std::error::Error;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use meilisearch_types::error::{Code, ErrorCode};
|
|
||||||
use meilisearch_types::internal_error;
|
|
||||||
use milli::UserError;
|
|
||||||
use serde_json::Value;
|
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, IndexError>;
|
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
|
||||||
pub enum IndexError {
|
|
||||||
#[error("An internal error has occurred. `{0}`.")]
|
|
||||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
|
||||||
#[error("Document `{0}` not found.")]
|
|
||||||
DocumentNotFound(String),
|
|
||||||
#[error("{0}")]
|
|
||||||
Facet(#[from] FacetError),
|
|
||||||
#[error("{0}")]
|
|
||||||
Milli(#[from] milli::Error),
|
|
||||||
}
|
|
||||||
|
|
||||||
internal_error!(
|
|
||||||
IndexError: std::io::Error,
|
|
||||||
milli::heed::Error,
|
|
||||||
fst::Error,
|
|
||||||
serde_json::Error,
|
|
||||||
file_store::Error,
|
|
||||||
milli::documents::Error
|
|
||||||
);
|
|
||||||
|
|
||||||
impl ErrorCode for IndexError {
|
|
||||||
fn error_code(&self) -> Code {
|
|
||||||
match self {
|
|
||||||
IndexError::Internal(_) => Code::Internal,
|
|
||||||
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
|
|
||||||
IndexError::Facet(e) => e.error_code(),
|
|
||||||
IndexError::Milli(e) => MilliError(e).error_code(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ErrorCode for &IndexError {
|
|
||||||
fn error_code(&self) -> Code {
|
|
||||||
match self {
|
|
||||||
IndexError::Internal(_) => Code::Internal,
|
|
||||||
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
|
|
||||||
IndexError::Facet(e) => e.error_code(),
|
|
||||||
IndexError::Milli(e) => MilliError(e).error_code(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<milli::UserError> for IndexError {
|
|
||||||
fn from(error: milli::UserError) -> IndexError {
|
|
||||||
IndexError::Milli(error.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
|
||||||
pub enum FacetError {
|
|
||||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
|
||||||
InvalidExpression(&'static [&'static str], Value),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ErrorCode for FacetError {
|
|
||||||
fn error_code(&self) -> Code {
|
|
||||||
match self {
|
|
||||||
FacetError::InvalidExpression(_, _) => Code::Filter,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct MilliError<'a>(pub &'a milli::Error);
|
|
||||||
|
|
||||||
impl Error for MilliError<'_> {}
|
|
||||||
|
|
||||||
impl fmt::Display for MilliError<'_> {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
self.0.fmt(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ErrorCode for MilliError<'_> {
|
|
||||||
fn error_code(&self) -> Code {
|
|
||||||
match self.0 {
|
|
||||||
milli::Error::InternalError(_) => Code::Internal,
|
|
||||||
milli::Error::IoError(_) => Code::Internal,
|
|
||||||
milli::Error::UserError(ref error) => {
|
|
||||||
match error {
|
|
||||||
// TODO: wait for spec for new error codes.
|
|
||||||
UserError::SerdeJson(_)
|
|
||||||
| UserError::InvalidLmdbOpenOptions
|
|
||||||
| UserError::DocumentLimitReached
|
|
||||||
| UserError::AccessingSoftDeletedDocument { .. }
|
|
||||||
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
|
|
||||||
UserError::InvalidStoreFile => Code::InvalidStore,
|
|
||||||
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
|
|
||||||
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
|
|
||||||
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
|
|
||||||
UserError::InvalidFilter(_) => Code::Filter,
|
|
||||||
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
|
|
||||||
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
|
|
||||||
Code::InvalidDocumentId
|
|
||||||
}
|
|
||||||
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
|
|
||||||
UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent,
|
|
||||||
UserError::SortRankingRuleMissing => Code::Sort,
|
|
||||||
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
|
|
||||||
UserError::InvalidSortableAttribute { .. } => Code::Sort,
|
|
||||||
UserError::CriterionError(_) => Code::InvalidRankingRule,
|
|
||||||
UserError::InvalidGeoField { .. } => Code::InvalidGeoField,
|
|
||||||
UserError::SortError(_) => Code::Sort,
|
|
||||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
|
||||||
Code::InvalidMinWordLengthForTypo
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,869 +0,0 @@
|
|||||||
use std::cmp::min;
|
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
use std::str::FromStr;
|
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
use either::Either;
|
|
||||||
use fst::IntoStreamer;
|
|
||||||
use milli::heed::RoTxn;
|
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
|
||||||
use milli::update::Setting;
|
|
||||||
use milli::{
|
|
||||||
obkv_to_json, AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds,
|
|
||||||
MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
|
||||||
};
|
|
||||||
use regex::Regex;
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use serde_json::{json, Value};
|
|
||||||
|
|
||||||
use crate::error::FacetError;
|
|
||||||
use crate::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings};
|
|
||||||
use crate::{Checked, Settings};
|
|
||||||
|
|
||||||
use super::error::{IndexError, Result};
|
|
||||||
|
|
||||||
pub type Document = serde_json::Map<String, Value>;
|
|
||||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
|
||||||
|
|
||||||
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
|
||||||
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
|
||||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
|
||||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
|
||||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
|
||||||
|
|
||||||
/// The maximimum number of results that the engine
|
|
||||||
/// will be able to return in one search call.
|
|
||||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
||||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
|
||||||
pub struct SearchQuery {
|
|
||||||
pub q: Option<String>,
|
|
||||||
pub offset: Option<usize>,
|
|
||||||
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
|
||||||
pub limit: usize,
|
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
|
||||||
pub attributes_to_crop: Option<Vec<String>>,
|
|
||||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
|
||||||
pub crop_length: usize,
|
|
||||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
|
||||||
// Default to false
|
|
||||||
#[serde(default = "Default::default")]
|
|
||||||
pub show_matches_position: bool,
|
|
||||||
pub filter: Option<Value>,
|
|
||||||
pub sort: Option<Vec<String>>,
|
|
||||||
pub facets: Option<Vec<String>>,
|
|
||||||
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
|
|
||||||
pub highlight_pre_tag: String,
|
|
||||||
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
|
|
||||||
pub highlight_post_tag: String,
|
|
||||||
#[serde(default = "DEFAULT_CROP_MARKER")]
|
|
||||||
pub crop_marker: String,
|
|
||||||
#[serde(default)]
|
|
||||||
pub matching_strategy: MatchingStrategy,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub enum MatchingStrategy {
|
|
||||||
/// Remove query words from last to first
|
|
||||||
Last,
|
|
||||||
/// All query words are mandatory
|
|
||||||
All,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for MatchingStrategy {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::Last
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
|
||||||
fn from(other: MatchingStrategy) -> Self {
|
|
||||||
match other {
|
|
||||||
MatchingStrategy::Last => Self::Last,
|
|
||||||
MatchingStrategy::All => Self::All,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
|
||||||
pub struct SearchHit {
|
|
||||||
#[serde(flatten)]
|
|
||||||
pub document: Document,
|
|
||||||
#[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
|
|
||||||
pub formatted: Document,
|
|
||||||
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
|
||||||
pub matches_position: Option<MatchesPosition>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct SearchResult {
|
|
||||||
pub hits: Vec<SearchHit>,
|
|
||||||
pub estimated_total_hits: u64,
|
|
||||||
pub query: String,
|
|
||||||
pub limit: usize,
|
|
||||||
pub offset: usize,
|
|
||||||
pub processing_time_ms: u128,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn perform_search(index: &Index, query: SearchQuery) -> Result<SearchResult> {
|
|
||||||
let before_search = Instant::now();
|
|
||||||
let rtxn = index.read_txn()?;
|
|
||||||
|
|
||||||
let mut search = index.search(&rtxn);
|
|
||||||
|
|
||||||
if let Some(ref query) = query.q {
|
|
||||||
search.query(query);
|
|
||||||
}
|
|
||||||
|
|
||||||
search.terms_matching_strategy(query.matching_strategy.into());
|
|
||||||
|
|
||||||
let max_total_hits = index
|
|
||||||
.pagination_max_total_hits(&rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
|
||||||
|
|
||||||
// Make sure that a user can't get more documents than the hard limit,
|
|
||||||
// we align that on the offset too.
|
|
||||||
let offset = min(query.offset.unwrap_or(0), max_total_hits);
|
|
||||||
let limit = min(query.limit, max_total_hits.saturating_sub(offset));
|
|
||||||
|
|
||||||
search.offset(offset);
|
|
||||||
search.limit(limit);
|
|
||||||
|
|
||||||
if let Some(ref filter) = query.filter {
|
|
||||||
if let Some(facets) = parse_filter(filter)? {
|
|
||||||
search.filter(facets);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref sort) = query.sort {
|
|
||||||
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
|
|
||||||
Ok(sorts) => sorts,
|
|
||||||
Err(asc_desc_error) => {
|
|
||||||
return Err(IndexError::Milli(SortError::from(asc_desc_error).into()))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
search.sort_criteria(sort);
|
|
||||||
}
|
|
||||||
|
|
||||||
let milli::SearchResult {
|
|
||||||
documents_ids,
|
|
||||||
matching_words,
|
|
||||||
candidates,
|
|
||||||
..
|
|
||||||
} = search.execute()?;
|
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
|
||||||
|
|
||||||
let displayed_ids = index
|
|
||||||
.displayed_fields_ids(&rtxn)?
|
|
||||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
|
||||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
|
||||||
|
|
||||||
let fids = |attrs: &BTreeSet<String>| {
|
|
||||||
let mut ids = BTreeSet::new();
|
|
||||||
for attr in attrs {
|
|
||||||
if attr == "*" {
|
|
||||||
ids = displayed_ids.clone();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr) {
|
|
||||||
ids.insert(id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ids
|
|
||||||
};
|
|
||||||
|
|
||||||
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
|
|
||||||
// but these attributes must be also be present
|
|
||||||
// - in the fields_ids_map
|
|
||||||
// - in the the displayed attributes
|
|
||||||
let to_retrieve_ids: BTreeSet<_> = query
|
|
||||||
.attributes_to_retrieve
|
|
||||||
.as_ref()
|
|
||||||
.map(fids)
|
|
||||||
.unwrap_or_else(|| displayed_ids.clone())
|
|
||||||
.intersection(&displayed_ids)
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
|
|
||||||
|
|
||||||
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
|
|
||||||
|
|
||||||
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
|
|
||||||
// These attributes are:
|
|
||||||
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
|
|
||||||
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
|
|
||||||
// But these attributes must be also present in displayed attributes
|
|
||||||
let formatted_options = compute_formatted_options(
|
|
||||||
&attr_to_highlight,
|
|
||||||
&attr_to_crop,
|
|
||||||
query.crop_length,
|
|
||||||
&to_retrieve_ids,
|
|
||||||
&fields_ids_map,
|
|
||||||
&displayed_ids,
|
|
||||||
);
|
|
||||||
|
|
||||||
let tokenizer = TokenizerBuilder::default().build();
|
|
||||||
|
|
||||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
|
||||||
formatter_builder.crop_marker(query.crop_marker);
|
|
||||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
|
||||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
|
||||||
|
|
||||||
let documents_iter = index.documents(&rtxn, documents_ids)?;
|
|
||||||
|
|
||||||
for (_id, obkv) in documents_iter {
|
|
||||||
// First generate a document with all the displayed fields
|
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
|
||||||
|
|
||||||
// select the attributes to retrieve
|
|
||||||
let attributes_to_retrieve = to_retrieve_ids
|
|
||||||
.iter()
|
|
||||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
|
||||||
let mut document =
|
|
||||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
|
||||||
|
|
||||||
let (matches_position, formatted) = format_fields(
|
|
||||||
&displayed_document,
|
|
||||||
&fields_ids_map,
|
|
||||||
&formatter_builder,
|
|
||||||
&formatted_options,
|
|
||||||
query.show_matches_position,
|
|
||||||
&displayed_ids,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
if let Some(sort) = query.sort.as_ref() {
|
|
||||||
insert_geo_distance(sort, &mut document);
|
|
||||||
}
|
|
||||||
|
|
||||||
let hit = SearchHit {
|
|
||||||
document,
|
|
||||||
formatted,
|
|
||||||
matches_position,
|
|
||||||
};
|
|
||||||
documents.push(hit);
|
|
||||||
}
|
|
||||||
|
|
||||||
let estimated_total_hits = candidates.len();
|
|
||||||
|
|
||||||
let facet_distribution = match query.facets {
|
|
||||||
Some(ref fields) => {
|
|
||||||
let mut facet_distribution = index.facets_distribution(&rtxn);
|
|
||||||
|
|
||||||
let max_values_by_facet = index
|
|
||||||
.max_values_per_facet(&rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
|
||||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
|
||||||
|
|
||||||
if fields.iter().all(|f| f != "*") {
|
|
||||||
facet_distribution.facets(fields);
|
|
||||||
}
|
|
||||||
let distribution = facet_distribution.candidates(candidates).execute()?;
|
|
||||||
|
|
||||||
Some(distribution)
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = SearchResult {
|
|
||||||
hits: documents,
|
|
||||||
estimated_total_hits,
|
|
||||||
query: query.q.clone().unwrap_or_default(),
|
|
||||||
limit: query.limit,
|
|
||||||
offset: query.offset.unwrap_or_default(),
|
|
||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
|
||||||
facet_distribution,
|
|
||||||
};
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn all_documents<'a>(
|
|
||||||
index: &Index,
|
|
||||||
rtxn: &'a RoTxn,
|
|
||||||
) -> Result<impl Iterator<Item = Result<Document>> + 'a> {
|
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
|
||||||
|
|
||||||
Ok(index.all_documents(rtxn)?.map(move |ret| {
|
|
||||||
ret.map_err(IndexError::from)
|
|
||||||
.and_then(|(_key, document)| -> Result<_> {
|
|
||||||
Ok(obkv_to_json(&all_fields, &fields_ids_map, document)?)
|
|
||||||
})
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn retrieve_documents<S: AsRef<str>>(
|
|
||||||
index: &Index,
|
|
||||||
offset: usize,
|
|
||||||
limit: usize,
|
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
|
||||||
) -> Result<(u64, Vec<Document>)> {
|
|
||||||
let rtxn = index.read_txn()?;
|
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
|
||||||
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
|
|
||||||
let document = match &attributes_to_retrieve {
|
|
||||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
|
||||||
&document?,
|
|
||||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
|
||||||
),
|
|
||||||
None => document?,
|
|
||||||
};
|
|
||||||
documents.push(document);
|
|
||||||
}
|
|
||||||
|
|
||||||
let number_of_documents = index.number_of_documents(&rtxn)?;
|
|
||||||
Ok((number_of_documents, documents))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn retrieve_document<S: AsRef<str>>(
|
|
||||||
index: &Index,
|
|
||||||
doc_id: &str,
|
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
|
||||||
) -> Result<Document> {
|
|
||||||
let txn = index.read_txn()?;
|
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&txn)?;
|
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
|
||||||
|
|
||||||
let internal_id = index
|
|
||||||
.external_documents_ids(&txn)?
|
|
||||||
.get(doc_id.as_bytes())
|
|
||||||
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.to_string()))?;
|
|
||||||
|
|
||||||
let document = index
|
|
||||||
.documents(&txn, std::iter::once(internal_id))?
|
|
||||||
.into_iter()
|
|
||||||
.next()
|
|
||||||
.map(|(_, d)| d)
|
|
||||||
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.to_string()))?;
|
|
||||||
|
|
||||||
let document = obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
|
||||||
let document = match &attributes_to_retrieve {
|
|
||||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
|
||||||
&document,
|
|
||||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
|
||||||
),
|
|
||||||
None => document,
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(document)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>> {
|
|
||||||
let displayed_attributes = index
|
|
||||||
.displayed_fields(rtxn)?
|
|
||||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
|
||||||
|
|
||||||
let searchable_attributes = index
|
|
||||||
.user_defined_searchable_fields(rtxn)?
|
|
||||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
|
||||||
|
|
||||||
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
|
||||||
|
|
||||||
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
|
||||||
|
|
||||||
let criteria = index
|
|
||||||
.criteria(rtxn)?
|
|
||||||
.into_iter()
|
|
||||||
.map(|c| c.to_string())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let stop_words = index
|
|
||||||
.stop_words(rtxn)?
|
|
||||||
.map(|stop_words| -> Result<BTreeSet<_>> {
|
|
||||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
|
||||||
})
|
|
||||||
.transpose()?
|
|
||||||
.unwrap_or_default();
|
|
||||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
|
||||||
|
|
||||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
|
||||||
// this information we are going to put space between words.
|
|
||||||
let synonyms = index
|
|
||||||
.synonyms(rtxn)?
|
|
||||||
.iter()
|
|
||||||
.map(|(key, values)| {
|
|
||||||
(
|
|
||||||
key.join(" "),
|
|
||||||
values.iter().map(|value| value.join(" ")).collect(),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
|
||||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
|
||||||
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
|
|
||||||
};
|
|
||||||
|
|
||||||
let disabled_words = match index.exact_words(rtxn)? {
|
|
||||||
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
|
||||||
None => BTreeSet::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let disabled_attributes = index
|
|
||||||
.exact_attributes(rtxn)?
|
|
||||||
.into_iter()
|
|
||||||
.map(String::from)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let typo_tolerance = TypoSettings {
|
|
||||||
enabled: Setting::Set(index.authorize_typos(rtxn)?),
|
|
||||||
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
|
||||||
disable_on_words: Setting::Set(disabled_words),
|
|
||||||
disable_on_attributes: Setting::Set(disabled_attributes),
|
|
||||||
};
|
|
||||||
|
|
||||||
let faceting = FacetingSettings {
|
|
||||||
max_values_per_facet: Setting::Set(
|
|
||||||
index
|
|
||||||
.max_values_per_facet(rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
let pagination = PaginationSettings {
|
|
||||||
max_total_hits: Setting::Set(
|
|
||||||
index
|
|
||||||
.pagination_max_total_hits(rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(Settings {
|
|
||||||
displayed_attributes: match displayed_attributes {
|
|
||||||
Some(attrs) => Setting::Set(attrs),
|
|
||||||
None => Setting::Reset,
|
|
||||||
},
|
|
||||||
searchable_attributes: match searchable_attributes {
|
|
||||||
Some(attrs) => Setting::Set(attrs),
|
|
||||||
None => Setting::Reset,
|
|
||||||
},
|
|
||||||
filterable_attributes: Setting::Set(filterable_attributes),
|
|
||||||
sortable_attributes: Setting::Set(sortable_attributes),
|
|
||||||
ranking_rules: Setting::Set(criteria),
|
|
||||||
stop_words: Setting::Set(stop_words),
|
|
||||||
distinct_attribute: match distinct_field {
|
|
||||||
Some(field) => Setting::Set(field),
|
|
||||||
None => Setting::Reset,
|
|
||||||
},
|
|
||||||
synonyms: Setting::Set(synonyms),
|
|
||||||
typo_tolerance: Setting::Set(typo_tolerance),
|
|
||||||
faceting: Setting::Set(faceting),
|
|
||||||
pagination: Setting::Set(pagination),
|
|
||||||
_kind: PhantomData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
|
||||||
lazy_static::lazy_static! {
|
|
||||||
static ref GEO_REGEX: Regex =
|
|
||||||
Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap();
|
|
||||||
};
|
|
||||||
if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) {
|
|
||||||
// TODO: TAMO: milli encountered an internal error, what do we want to do?
|
|
||||||
let base = [
|
|
||||||
capture_group[1].parse().unwrap(),
|
|
||||||
capture_group[2].parse().unwrap(),
|
|
||||||
];
|
|
||||||
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
|
|
||||||
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
|
|
||||||
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
|
|
||||||
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn compute_formatted_options(
|
|
||||||
attr_to_highlight: &HashSet<String>,
|
|
||||||
attr_to_crop: &[String],
|
|
||||||
query_crop_length: usize,
|
|
||||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
|
||||||
fields_ids_map: &FieldsIdsMap,
|
|
||||||
displayed_ids: &BTreeSet<FieldId>,
|
|
||||||
) -> BTreeMap<FieldId, FormatOptions> {
|
|
||||||
let mut formatted_options = BTreeMap::new();
|
|
||||||
|
|
||||||
add_highlight_to_formatted_options(
|
|
||||||
&mut formatted_options,
|
|
||||||
attr_to_highlight,
|
|
||||||
fields_ids_map,
|
|
||||||
displayed_ids,
|
|
||||||
);
|
|
||||||
|
|
||||||
add_crop_to_formatted_options(
|
|
||||||
&mut formatted_options,
|
|
||||||
attr_to_crop,
|
|
||||||
query_crop_length,
|
|
||||||
fields_ids_map,
|
|
||||||
displayed_ids,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Should not return `_formatted` if no valid attributes to highlight/crop
|
|
||||||
if !formatted_options.is_empty() {
|
|
||||||
add_non_formatted_ids_to_formatted_options(&mut formatted_options, to_retrieve_ids);
|
|
||||||
}
|
|
||||||
|
|
||||||
formatted_options
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_highlight_to_formatted_options(
|
|
||||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
|
||||||
attr_to_highlight: &HashSet<String>,
|
|
||||||
fields_ids_map: &FieldsIdsMap,
|
|
||||||
displayed_ids: &BTreeSet<FieldId>,
|
|
||||||
) {
|
|
||||||
for attr in attr_to_highlight {
|
|
||||||
let new_format = FormatOptions {
|
|
||||||
highlight: true,
|
|
||||||
crop: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
if attr == "*" {
|
|
||||||
for id in displayed_ids {
|
|
||||||
formatted_options.insert(*id, new_format);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr) {
|
|
||||||
if displayed_ids.contains(&id) {
|
|
||||||
formatted_options.insert(id, new_format);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_crop_to_formatted_options(
|
|
||||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
|
||||||
attr_to_crop: &[String],
|
|
||||||
crop_length: usize,
|
|
||||||
fields_ids_map: &FieldsIdsMap,
|
|
||||||
displayed_ids: &BTreeSet<FieldId>,
|
|
||||||
) {
|
|
||||||
for attr in attr_to_crop {
|
|
||||||
let mut split = attr.rsplitn(2, ':');
|
|
||||||
let (attr_name, attr_len) = match split.next().zip(split.next()) {
|
|
||||||
Some((len, name)) => {
|
|
||||||
let crop_len = len.parse::<usize>().unwrap_or(crop_length);
|
|
||||||
(name, crop_len)
|
|
||||||
}
|
|
||||||
None => (attr.as_str(), crop_length),
|
|
||||||
};
|
|
||||||
|
|
||||||
if attr_name == "*" {
|
|
||||||
for id in displayed_ids {
|
|
||||||
formatted_options
|
|
||||||
.entry(*id)
|
|
||||||
.and_modify(|f| f.crop = Some(attr_len))
|
|
||||||
.or_insert(FormatOptions {
|
|
||||||
highlight: false,
|
|
||||||
crop: Some(attr_len),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr_name) {
|
|
||||||
if displayed_ids.contains(&id) {
|
|
||||||
formatted_options
|
|
||||||
.entry(id)
|
|
||||||
.and_modify(|f| f.crop = Some(attr_len))
|
|
||||||
.or_insert(FormatOptions {
|
|
||||||
highlight: false,
|
|
||||||
crop: Some(attr_len),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_non_formatted_ids_to_formatted_options(
|
|
||||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
|
||||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
|
||||||
) {
|
|
||||||
for id in to_retrieve_ids {
|
|
||||||
formatted_options.entry(*id).or_insert(FormatOptions {
|
|
||||||
highlight: false,
|
|
||||||
crop: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn make_document(
|
|
||||||
displayed_attributes: &BTreeSet<FieldId>,
|
|
||||||
field_ids_map: &FieldsIdsMap,
|
|
||||||
obkv: obkv::KvReaderU16,
|
|
||||||
) -> Result<Document> {
|
|
||||||
let mut document = serde_json::Map::new();
|
|
||||||
|
|
||||||
// recreate the original json
|
|
||||||
for (key, value) in obkv.iter() {
|
|
||||||
let value = serde_json::from_slice(value)?;
|
|
||||||
let key = field_ids_map
|
|
||||||
.name(key)
|
|
||||||
.expect("Missing field name")
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
document.insert(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// select the attributes to retrieve
|
|
||||||
let displayed_attributes = displayed_attributes
|
|
||||||
.iter()
|
|
||||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
|
||||||
|
|
||||||
let document = permissive_json_pointer::select_values(&document, displayed_attributes);
|
|
||||||
Ok(document)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn format_fields<'a, A: AsRef<[u8]>>(
|
|
||||||
document: &Document,
|
|
||||||
field_ids_map: &FieldsIdsMap,
|
|
||||||
builder: &MatcherBuilder<'a, A>,
|
|
||||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
|
||||||
compute_matches: bool,
|
|
||||||
displayable_ids: &BTreeSet<FieldId>,
|
|
||||||
) -> Result<(Option<MatchesPosition>, Document)> {
|
|
||||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
|
||||||
let mut document = document.clone();
|
|
||||||
|
|
||||||
// select the attributes to retrieve
|
|
||||||
let displayable_names = displayable_ids
|
|
||||||
.iter()
|
|
||||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
|
||||||
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
|
|
||||||
// To get the formatting option of each key we need to see all the rules that applies
|
|
||||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
|
||||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
|
||||||
// highlighted.
|
|
||||||
let format = formatted_options
|
|
||||||
.iter()
|
|
||||||
.filter(|(field, _option)| {
|
|
||||||
let name = field_ids_map.name(**field).unwrap();
|
|
||||||
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
|
||||||
})
|
|
||||||
.map(|(_, option)| *option)
|
|
||||||
.reduce(|acc, option| acc.merge(option));
|
|
||||||
let mut infos = Vec::new();
|
|
||||||
|
|
||||||
*value = format_value(
|
|
||||||
std::mem::take(value),
|
|
||||||
builder,
|
|
||||||
format,
|
|
||||||
&mut infos,
|
|
||||||
compute_matches,
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Some(matches) = matches_position.as_mut() {
|
|
||||||
if !infos.is_empty() {
|
|
||||||
matches.insert(key.to_owned(), infos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let selectors = formatted_options
|
|
||||||
.keys()
|
|
||||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
|
||||||
// before.
|
|
||||||
.map(|&fid| field_ids_map.name(fid).unwrap());
|
|
||||||
let document = permissive_json_pointer::select_values(&document, selectors);
|
|
||||||
|
|
||||||
Ok((matches_position, document))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn format_value<'a, A: AsRef<[u8]>>(
|
|
||||||
value: Value,
|
|
||||||
builder: &MatcherBuilder<'a, A>,
|
|
||||||
format_options: Option<FormatOptions>,
|
|
||||||
infos: &mut Vec<MatchBounds>,
|
|
||||||
compute_matches: bool,
|
|
||||||
) -> Value {
|
|
||||||
match value {
|
|
||||||
Value::String(old_string) => {
|
|
||||||
let mut matcher = builder.build(&old_string);
|
|
||||||
if compute_matches {
|
|
||||||
let matches = matcher.matches();
|
|
||||||
infos.extend_from_slice(&matches[..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
match format_options {
|
|
||||||
Some(format_options) => {
|
|
||||||
let value = matcher.format(format_options);
|
|
||||||
Value::String(value.into_owned())
|
|
||||||
}
|
|
||||||
None => Value::String(old_string),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Value::Array(values) => Value::Array(
|
|
||||||
values
|
|
||||||
.into_iter()
|
|
||||||
.map(|v| {
|
|
||||||
format_value(
|
|
||||||
v,
|
|
||||||
builder,
|
|
||||||
format_options.map(|format_options| FormatOptions {
|
|
||||||
highlight: format_options.highlight,
|
|
||||||
crop: None,
|
|
||||||
}),
|
|
||||||
infos,
|
|
||||||
compute_matches,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
),
|
|
||||||
Value::Object(object) => Value::Object(
|
|
||||||
object
|
|
||||||
.into_iter()
|
|
||||||
.map(|(k, v)| {
|
|
||||||
(
|
|
||||||
k,
|
|
||||||
format_value(
|
|
||||||
v,
|
|
||||||
builder,
|
|
||||||
format_options.map(|format_options| FormatOptions {
|
|
||||||
highlight: format_options.highlight,
|
|
||||||
crop: None,
|
|
||||||
}),
|
|
||||||
infos,
|
|
||||||
compute_matches,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
),
|
|
||||||
Value::Number(number) => {
|
|
||||||
let s = number.to_string();
|
|
||||||
|
|
||||||
let mut matcher = builder.build(&s);
|
|
||||||
if compute_matches {
|
|
||||||
let matches = matcher.matches();
|
|
||||||
infos.extend_from_slice(&matches[..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
match format_options {
|
|
||||||
Some(format_options) => {
|
|
||||||
let value = matcher.format(format_options);
|
|
||||||
Value::String(value.into_owned())
|
|
||||||
}
|
|
||||||
None => Value::Number(number),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
value => value,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_filter(facets: &Value) -> Result<Option<Filter>> {
|
|
||||||
match facets {
|
|
||||||
Value::String(expr) => {
|
|
||||||
let condition = Filter::from_str(expr)?;
|
|
||||||
Ok(condition)
|
|
||||||
}
|
|
||||||
Value::Array(arr) => parse_filter_array(arr),
|
|
||||||
v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>> {
|
|
||||||
let mut ands = Vec::new();
|
|
||||||
for value in arr {
|
|
||||||
match value {
|
|
||||||
Value::String(s) => ands.push(Either::Right(s.as_str())),
|
|
||||||
Value::Array(arr) => {
|
|
||||||
let mut ors = Vec::new();
|
|
||||||
for value in arr {
|
|
||||||
match value {
|
|
||||||
Value::String(s) => ors.push(s.as_str()),
|
|
||||||
v => {
|
|
||||||
return Err(FacetError::InvalidExpression(&["String"], v.clone()).into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ands.push(Either::Left(ors));
|
|
||||||
}
|
|
||||||
v => {
|
|
||||||
return Err(
|
|
||||||
FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Filter::from_array(ands)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_insert_geo_distance() {
|
|
||||||
let value: Document = serde_json::from_str(
|
|
||||||
r#"{
|
|
||||||
"_geo": {
|
|
||||||
"lat": 50.629973371633746,
|
|
||||||
"lng": 3.0569447399419567
|
|
||||||
},
|
|
||||||
"city": "Lille",
|
|
||||||
"id": "1"
|
|
||||||
}"#,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()];
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()];
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
let sorters =
|
|
||||||
&["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()];
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
let sorters = &[
|
|
||||||
"prix:asc",
|
|
||||||
"villeneuve:desc",
|
|
||||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
|
||||||
"ubu:asc",
|
|
||||||
]
|
|
||||||
.map(|s| s.to_string());
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
// only the first geoPoint is used to compute the distance
|
|
||||||
let sorters = &[
|
|
||||||
"chien:desc",
|
|
||||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
|
||||||
"pangolin:desc",
|
|
||||||
"_geoPoint(100.0, -80.0):asc",
|
|
||||||
"chat:asc",
|
|
||||||
]
|
|
||||||
.map(|s| s.to_string());
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
// there was no _geoPoint so nothing is inserted in the document
|
|
||||||
let sorters = &["chien:asc".to_string()];
|
|
||||||
let mut document = value;
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), None);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,429 +0,0 @@
|
|||||||
use std::collections::{BTreeMap, BTreeSet};
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
|
|
||||||
use milli::update::Setting;
|
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
|
||||||
|
|
||||||
fn serialize_with_wildcard<S>(
|
|
||||||
field: &Setting<Vec<String>>,
|
|
||||||
s: S,
|
|
||||||
) -> std::result::Result<S::Ok, S::Error>
|
|
||||||
where
|
|
||||||
S: Serializer,
|
|
||||||
{
|
|
||||||
let wildcard = vec!["*".to_string()];
|
|
||||||
match field {
|
|
||||||
Setting::Set(value) => Some(value),
|
|
||||||
Setting::Reset => Some(&wildcard),
|
|
||||||
Setting::NotSet => None,
|
|
||||||
}
|
|
||||||
.serialize(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
|
|
||||||
pub struct Checked;
|
|
||||||
|
|
||||||
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
pub struct Unchecked;
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct MinWordSizeTyposSetting {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub one_typo: Setting<u8>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub two_typos: Setting<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct TypoSettings {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub enabled: Setting<bool>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub disable_on_words: Setting<BTreeSet<String>>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub disable_on_attributes: Setting<BTreeSet<String>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct FacetingSettings {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub max_values_per_facet: Setting<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct PaginationSettings {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub max_total_hits: Setting<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
|
||||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
|
||||||
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
pub struct Settings<T> {
|
|
||||||
#[serde(
|
|
||||||
default,
|
|
||||||
serialize_with = "serialize_with_wildcard",
|
|
||||||
skip_serializing_if = "Setting::is_not_set"
|
|
||||||
)]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub displayed_attributes: Setting<Vec<String>>,
|
|
||||||
|
|
||||||
#[serde(
|
|
||||||
default,
|
|
||||||
serialize_with = "serialize_with_wildcard",
|
|
||||||
skip_serializing_if = "Setting::is_not_set"
|
|
||||||
)]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub searchable_attributes: Setting<Vec<String>>,
|
|
||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub ranking_rules: Setting<Vec<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub stop_words: Setting<BTreeSet<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub distinct_attribute: Setting<String>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub typo_tolerance: Setting<TypoSettings>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub faceting: Setting<FacetingSettings>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub pagination: Setting<PaginationSettings>,
|
|
||||||
|
|
||||||
#[serde(skip)]
|
|
||||||
pub _kind: PhantomData<T>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Settings<Checked> {
|
|
||||||
pub fn cleared() -> Settings<Checked> {
|
|
||||||
Settings {
|
|
||||||
displayed_attributes: Setting::Reset,
|
|
||||||
searchable_attributes: Setting::Reset,
|
|
||||||
filterable_attributes: Setting::Reset,
|
|
||||||
sortable_attributes: Setting::Reset,
|
|
||||||
ranking_rules: Setting::Reset,
|
|
||||||
stop_words: Setting::Reset,
|
|
||||||
synonyms: Setting::Reset,
|
|
||||||
distinct_attribute: Setting::Reset,
|
|
||||||
typo_tolerance: Setting::Reset,
|
|
||||||
faceting: Setting::Reset,
|
|
||||||
pagination: Setting::Reset,
|
|
||||||
_kind: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
|
||||||
let Self {
|
|
||||||
displayed_attributes,
|
|
||||||
searchable_attributes,
|
|
||||||
filterable_attributes,
|
|
||||||
sortable_attributes,
|
|
||||||
ranking_rules,
|
|
||||||
stop_words,
|
|
||||||
synonyms,
|
|
||||||
distinct_attribute,
|
|
||||||
typo_tolerance,
|
|
||||||
faceting,
|
|
||||||
pagination,
|
|
||||||
..
|
|
||||||
} = self;
|
|
||||||
|
|
||||||
Settings {
|
|
||||||
displayed_attributes,
|
|
||||||
searchable_attributes,
|
|
||||||
filterable_attributes,
|
|
||||||
sortable_attributes,
|
|
||||||
ranking_rules,
|
|
||||||
stop_words,
|
|
||||||
synonyms,
|
|
||||||
distinct_attribute,
|
|
||||||
typo_tolerance,
|
|
||||||
faceting,
|
|
||||||
pagination,
|
|
||||||
_kind: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Settings<Unchecked> {
|
|
||||||
pub fn check(self) -> Settings<Checked> {
|
|
||||||
let displayed_attributes = match self.displayed_attributes {
|
|
||||||
Setting::Set(fields) => {
|
|
||||||
if fields.iter().any(|f| f == "*") {
|
|
||||||
Setting::Reset
|
|
||||||
} else {
|
|
||||||
Setting::Set(fields)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
otherwise => otherwise,
|
|
||||||
};
|
|
||||||
|
|
||||||
let searchable_attributes = match self.searchable_attributes {
|
|
||||||
Setting::Set(fields) => {
|
|
||||||
if fields.iter().any(|f| f == "*") {
|
|
||||||
Setting::Reset
|
|
||||||
} else {
|
|
||||||
Setting::Set(fields)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
otherwise => otherwise,
|
|
||||||
};
|
|
||||||
|
|
||||||
Settings {
|
|
||||||
displayed_attributes,
|
|
||||||
searchable_attributes,
|
|
||||||
filterable_attributes: self.filterable_attributes,
|
|
||||||
sortable_attributes: self.sortable_attributes,
|
|
||||||
ranking_rules: self.ranking_rules,
|
|
||||||
stop_words: self.stop_words,
|
|
||||||
synonyms: self.synonyms,
|
|
||||||
distinct_attribute: self.distinct_attribute,
|
|
||||||
typo_tolerance: self.typo_tolerance,
|
|
||||||
faceting: self.faceting,
|
|
||||||
pagination: self.pagination,
|
|
||||||
_kind: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct Facets {
|
|
||||||
pub level_group_size: Option<NonZeroUsize>,
|
|
||||||
pub min_level_size: Option<NonZeroUsize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn apply_settings_to_builder(
|
|
||||||
settings: &Settings<Checked>,
|
|
||||||
builder: &mut milli::update::Settings,
|
|
||||||
) {
|
|
||||||
match settings.searchable_attributes {
|
|
||||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
|
||||||
Setting::Reset => builder.reset_searchable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.displayed_attributes {
|
|
||||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
|
||||||
Setting::Reset => builder.reset_displayed_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.filterable_attributes {
|
|
||||||
Setting::Set(ref facets) => {
|
|
||||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_filterable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.sortable_attributes {
|
|
||||||
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
|
|
||||||
Setting::Reset => builder.reset_sortable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.ranking_rules {
|
|
||||||
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
|
||||||
Setting::Reset => builder.reset_criteria(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.stop_words {
|
|
||||||
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
|
||||||
Setting::Reset => builder.reset_stop_words(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.synonyms {
|
|
||||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
|
||||||
Setting::Reset => builder.reset_synonyms(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.distinct_attribute {
|
|
||||||
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
|
||||||
Setting::Reset => builder.reset_distinct_field(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.typo_tolerance {
|
|
||||||
Setting::Set(ref value) => {
|
|
||||||
match value.enabled {
|
|
||||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
|
||||||
Setting::Reset => builder.reset_authorize_typos(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match value.min_word_size_for_typos {
|
|
||||||
Setting::Set(ref setting) => {
|
|
||||||
match setting.one_typo {
|
|
||||||
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
|
|
||||||
Setting::Reset => builder.reset_min_word_len_one_typo(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
match setting.two_typos {
|
|
||||||
Setting::Set(val) => builder.set_min_word_len_two_typos(val),
|
|
||||||
Setting::Reset => builder.reset_min_word_len_two_typos(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Setting::Reset => {
|
|
||||||
builder.reset_min_word_len_one_typo();
|
|
||||||
builder.reset_min_word_len_two_typos();
|
|
||||||
}
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match value.disable_on_words {
|
|
||||||
Setting::Set(ref words) => {
|
|
||||||
builder.set_exact_words(words.clone());
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_exact_words(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match value.disable_on_attributes {
|
|
||||||
Setting::Set(ref words) => {
|
|
||||||
builder.set_exact_attributes(words.iter().cloned().collect())
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_exact_attributes(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Setting::Reset => {
|
|
||||||
// all typo settings need to be reset here.
|
|
||||||
builder.reset_authorize_typos();
|
|
||||||
builder.reset_min_word_len_one_typo();
|
|
||||||
builder.reset_min_word_len_two_typos();
|
|
||||||
builder.reset_exact_words();
|
|
||||||
builder.reset_exact_attributes();
|
|
||||||
}
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.faceting {
|
|
||||||
Setting::Set(ref value) => match value.max_values_per_facet {
|
|
||||||
Setting::Set(val) => builder.set_max_values_per_facet(val),
|
|
||||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
},
|
|
||||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.pagination {
|
|
||||||
Setting::Set(ref value) => match value.max_total_hits {
|
|
||||||
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
|
|
||||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
},
|
|
||||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub(crate) mod test {
|
|
||||||
use proptest::prelude::*;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
pub(super) fn setting_strategy<T: Arbitrary + Clone>() -> impl Strategy<Value = Setting<T>> {
|
|
||||||
prop_oneof![
|
|
||||||
Just(Setting::NotSet),
|
|
||||||
Just(Setting::Reset),
|
|
||||||
any::<T>().prop_map(Setting::Set)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_setting_check() {
|
|
||||||
// test no changes
|
|
||||||
let settings = Settings {
|
|
||||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
|
||||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
|
||||||
filterable_attributes: Setting::NotSet,
|
|
||||||
sortable_attributes: Setting::NotSet,
|
|
||||||
ranking_rules: Setting::NotSet,
|
|
||||||
stop_words: Setting::NotSet,
|
|
||||||
synonyms: Setting::NotSet,
|
|
||||||
distinct_attribute: Setting::NotSet,
|
|
||||||
typo_tolerance: Setting::NotSet,
|
|
||||||
faceting: Setting::NotSet,
|
|
||||||
pagination: Setting::NotSet,
|
|
||||||
_kind: PhantomData::<Unchecked>,
|
|
||||||
};
|
|
||||||
|
|
||||||
let checked = settings.clone().check();
|
|
||||||
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
|
||||||
assert_eq!(
|
|
||||||
settings.searchable_attributes,
|
|
||||||
checked.searchable_attributes
|
|
||||||
);
|
|
||||||
|
|
||||||
// test wildcard
|
|
||||||
// test no changes
|
|
||||||
let settings = Settings {
|
|
||||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
|
||||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
|
||||||
filterable_attributes: Setting::NotSet,
|
|
||||||
sortable_attributes: Setting::NotSet,
|
|
||||||
ranking_rules: Setting::NotSet,
|
|
||||||
stop_words: Setting::NotSet,
|
|
||||||
synonyms: Setting::NotSet,
|
|
||||||
distinct_attribute: Setting::NotSet,
|
|
||||||
typo_tolerance: Setting::NotSet,
|
|
||||||
faceting: Setting::NotSet,
|
|
||||||
pagination: Setting::NotSet,
|
|
||||||
_kind: PhantomData::<Unchecked>,
|
|
||||||
};
|
|
||||||
|
|
||||||
let checked = settings.check();
|
|
||||||
assert_eq!(checked.displayed_attributes, Setting::Reset);
|
|
||||||
assert_eq!(checked.searchable_attributes, Setting::Reset);
|
|
||||||
}
|
|
||||||
}
|
|
@ -47,17 +47,15 @@ jsonwebtoken = "8.1.1"
|
|||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
meilisearch-lib = { path = "../meilisearch-lib", default-features = false }
|
|
||||||
index = { path = "../index" }
|
|
||||||
index-scheduler = { path = "../index-scheduler" }
|
index-scheduler = { path = "../index-scheduler" }
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
document-formats = { path = "../document-formats" }
|
|
||||||
mimalloc = { version = "0.1.29", default-features = false }
|
mimalloc = { version = "0.1.29", default-features = false }
|
||||||
mime = "0.3.16"
|
mime = "0.3.16"
|
||||||
num_cpus = "1.13.1"
|
num_cpus = "1.13.1"
|
||||||
obkv = "0.2.0"
|
obkv = "0.2.0"
|
||||||
once_cell = "1.15.0"
|
once_cell = "1.15.0"
|
||||||
parking_lot = "0.12.1"
|
parking_lot = "0.12.1"
|
||||||
|
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||||
pin-project-lite = "0.2.9"
|
pin-project-lite = "0.2.9"
|
||||||
platform-dirs = "0.3.0"
|
platform-dirs = "0.3.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
@ -98,7 +96,7 @@ yaup = "0.2.1"
|
|||||||
temp-env = "0.3.1"
|
temp-env = "0.3.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["analytics", "meilisearch-lib/default", "mini-dashboard"]
|
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
|
||||||
metrics = ["prometheus"]
|
metrics = ["prometheus"]
|
||||||
analytics = ["segment"]
|
analytics = ["segment"]
|
||||||
mini-dashboard = [
|
mini-dashboard = [
|
||||||
@ -112,10 +110,10 @@ mini-dashboard = [
|
|||||||
"tempfile",
|
"tempfile",
|
||||||
"zip",
|
"zip",
|
||||||
]
|
]
|
||||||
chinese = ["meilisearch-lib/chinese"]
|
chinese = ["meilisearch-types/chinese"]
|
||||||
hebrew = ["meilisearch-lib/hebrew"]
|
hebrew = ["meilisearch-types/hebrew"]
|
||||||
japanese = ["meilisearch-lib/japanese"]
|
japanese = ["meilisearch-types/japanese"]
|
||||||
thai = ["meilisearch-lib/thai"]
|
thai = ["meilisearch-types/thai"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.3/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.3/build.zip"
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use actix_web as aweb;
|
use actix_web as aweb;
|
||||||
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
||||||
use document_formats::DocumentFormatError;
|
use meilisearch_types::document_formats::DocumentFormatError;
|
||||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||||
|
use serde_json::Value;
|
||||||
use tokio::task::JoinError;
|
use tokio::task::JoinError;
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
@ -14,9 +15,19 @@ pub enum MeilisearchHttpError {
|
|||||||
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
|
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
|
||||||
)]
|
)]
|
||||||
InvalidContentType(String, Vec<String>),
|
InvalidContentType(String, Vec<String>),
|
||||||
|
#[error("Document `{0}` not found.")]
|
||||||
|
DocumentNotFound(String),
|
||||||
|
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||||
|
InvalidExpression(&'static [&'static str], Value),
|
||||||
|
#[error(transparent)]
|
||||||
|
SerdeJson(#[from] serde_json::Error),
|
||||||
|
#[error(transparent)]
|
||||||
|
HeedError(#[from] meilisearch_types::heed::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
IndexScheduler(#[from] index_scheduler::Error),
|
IndexScheduler(#[from] index_scheduler::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
|
Milli(#[from] meilisearch_types::milli::Error),
|
||||||
|
#[error(transparent)]
|
||||||
Payload(#[from] PayloadError),
|
Payload(#[from] PayloadError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
FileStore(#[from] file_store::Error),
|
FileStore(#[from] file_store::Error),
|
||||||
@ -31,7 +42,12 @@ impl ErrorCode for MeilisearchHttpError {
|
|||||||
match self {
|
match self {
|
||||||
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
|
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
|
||||||
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
|
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
|
||||||
|
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||||
|
MeilisearchHttpError::InvalidExpression(_, _) => Code::Filter,
|
||||||
|
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||||
|
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||||
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
||||||
|
MeilisearchHttpError::Milli(e) => e.error_code(),
|
||||||
MeilisearchHttpError::Payload(e) => e.error_code(),
|
MeilisearchHttpError::Payload(e) => e.error_code(),
|
||||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||||
|
@ -6,6 +6,7 @@ pub mod analytics;
|
|||||||
pub mod extractors;
|
pub mod extractors;
|
||||||
pub mod option;
|
pub mod option;
|
||||||
pub mod routes;
|
pub mod routes;
|
||||||
|
pub mod search;
|
||||||
|
|
||||||
#[cfg(feature = "metrics")]
|
#[cfg(feature = "metrics")]
|
||||||
pub mod metrics;
|
pub mod metrics;
|
||||||
@ -38,6 +39,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
|
|||||||
opt.db_path.join("indexes"),
|
opt.db_path.join("indexes"),
|
||||||
opt.max_index_size.get_bytes() as usize,
|
opt.max_index_size.get_bytes() as usize,
|
||||||
(&opt.indexer_options).try_into()?,
|
(&opt.indexer_options).try_into()?,
|
||||||
|
true,
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
todo!("We'll see later"),
|
todo!("We'll see later"),
|
||||||
)?;
|
)?;
|
||||||
@ -45,8 +47,6 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
|
|||||||
/*
|
/*
|
||||||
TODO: We should start a thread to handle the snapshots.
|
TODO: We should start a thread to handle the snapshots.
|
||||||
meilisearch
|
meilisearch
|
||||||
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
|
|
||||||
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
|
|
||||||
// snapshot
|
// snapshot
|
||||||
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
|
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
|
||||||
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
||||||
|
@ -11,11 +11,7 @@ use std::{fmt, fs};
|
|||||||
|
|
||||||
use byte_unit::{Byte, ByteError};
|
use byte_unit::{Byte, ByteError};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch_lib::{
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
export_to_env_if_not_present,
|
|
||||||
options::{IndexerOpts, SchedulerConfig},
|
|
||||||
};
|
|
||||||
use index_scheduler::milli::update::IndexerConfig;
|
|
||||||
use rustls::{
|
use rustls::{
|
||||||
server::{
|
server::{
|
||||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,
|
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,
|
||||||
|
@ -1,26 +1,24 @@
|
|||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
|
|
||||||
use actix_web::error::PayloadError;
|
|
||||||
use actix_web::http::header::CONTENT_TYPE;
|
use actix_web::http::header::CONTENT_TYPE;
|
||||||
use actix_web::web::{Bytes, Data};
|
use actix_web::web::Data;
|
||||||
use actix_web::HttpMessage;
|
use actix_web::HttpMessage;
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use bstr::ByteSlice;
|
use bstr::ByteSlice;
|
||||||
use document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
use futures::StreamExt;
|
||||||
use futures::{Stream, StreamExt};
|
use index_scheduler::{IndexScheduler, KindWithContent, TaskView};
|
||||||
use index::{retrieve_document, retrieve_documents};
|
|
||||||
use index_scheduler::milli::update::IndexDocumentsMethod;
|
|
||||||
use index_scheduler::IndexScheduler;
|
|
||||||
use index_scheduler::{KindWithContent, TaskView};
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::heed::RoTxn;
|
||||||
|
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||||
use meilisearch_types::star_or::StarOr;
|
use meilisearch_types::star_or::StarOr;
|
||||||
|
use meilisearch_types::{milli, Document, Index};
|
||||||
use mime::Mime;
|
use mime::Mime;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_cs::vec::CS;
|
use serde_cs::vec::CS;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tokio::sync::mpsc;
|
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::Analytics;
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
@ -37,17 +35,6 @@ static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
|||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
/// This is required because Payload is not Sync nor Send
|
|
||||||
fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, PayloadError>> {
|
|
||||||
let (snd, recv) = mpsc::channel(1);
|
|
||||||
tokio::task::spawn_local(async move {
|
|
||||||
while let Some(data) = payload.next().await {
|
|
||||||
let _ = snd.send(data).await;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
tokio_stream::wrappers::ReceiverStream::new(recv)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extracts the mime type from the content type and return
|
/// Extracts the mime type from the content type and return
|
||||||
/// a meilisearch error if anything bad happen.
|
/// a meilisearch error if anything bad happen.
|
||||||
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
|
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
|
||||||
@ -344,3 +331,76 @@ pub async fn clear_all_documents(
|
|||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn all_documents<'a>(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &'a RoTxn,
|
||||||
|
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||||
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
|
||||||
|
Ok(index.all_documents(rtxn)?.map(move |ret| {
|
||||||
|
ret.map_err(ResponseError::from)
|
||||||
|
.and_then(|(_key, document)| -> Result<_, ResponseError> {
|
||||||
|
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retrieve_documents<S: AsRef<str>>(
|
||||||
|
index: &Index,
|
||||||
|
offset: usize,
|
||||||
|
limit: usize,
|
||||||
|
attributes_to_retrieve: Option<Vec<S>>,
|
||||||
|
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
|
||||||
|
let document = match &attributes_to_retrieve {
|
||||||
|
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||||
|
&document?,
|
||||||
|
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||||
|
),
|
||||||
|
None => document?,
|
||||||
|
};
|
||||||
|
documents.push(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
let number_of_documents = index.number_of_documents(&rtxn)?;
|
||||||
|
Ok((number_of_documents, documents))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retrieve_document<S: AsRef<str>>(
|
||||||
|
index: &Index,
|
||||||
|
doc_id: &str,
|
||||||
|
attributes_to_retrieve: Option<Vec<S>>,
|
||||||
|
) -> Result<Document, ResponseError> {
|
||||||
|
let txn = index.read_txn()?;
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(&txn)?;
|
||||||
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
|
||||||
|
let internal_id = index
|
||||||
|
.external_documents_ids(&txn)?
|
||||||
|
.get(doc_id.as_bytes())
|
||||||
|
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||||
|
|
||||||
|
let document = index
|
||||||
|
.documents(&txn, std::iter::once(internal_id))?
|
||||||
|
.into_iter()
|
||||||
|
.next()
|
||||||
|
.map(|(_, d)| d)
|
||||||
|
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||||
|
|
||||||
|
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||||
|
let document = match &attributes_to_retrieve {
|
||||||
|
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||||
|
&document,
|
||||||
|
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||||
|
),
|
||||||
|
None => document,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(document)
|
||||||
|
}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use index_scheduler::milli::{FieldDistribution, Index};
|
|
||||||
use index_scheduler::{IndexScheduler, KindWithContent, Query, Status};
|
use index_scheduler::{IndexScheduler, KindWithContent, Query, Status};
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::milli::{self, FieldDistribution, Index};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
@ -51,7 +51,7 @@ pub struct IndexView {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl IndexView {
|
impl IndexView {
|
||||||
fn new(uid: String, index: &Index) -> Result<IndexView, index::error::IndexError> {
|
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
Ok(IndexView {
|
Ok(IndexView {
|
||||||
uid,
|
uid,
|
||||||
|
@ -1,10 +1,5 @@
|
|||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use index::{
|
|
||||||
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
|
||||||
DEFAULT_SEARCH_OFFSET,
|
|
||||||
};
|
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use meilisearch_auth::IndexSearchRules;
|
use meilisearch_auth::IndexSearchRules;
|
||||||
@ -16,6 +11,11 @@ use serde_json::Value;
|
|||||||
use crate::analytics::{Analytics, SearchAggregator};
|
use crate::analytics::{Analytics, SearchAggregator};
|
||||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::search::{
|
||||||
|
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
|
DEFAULT_SEARCH_OFFSET
|
||||||
|
};
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(
|
cfg.service(
|
||||||
|
@ -1,14 +1,26 @@
|
|||||||
|
use std::collections::BTreeSet;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
|
use fst::IntoStreamer;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use index::{Settings, Unchecked};
|
|
||||||
use index_scheduler::{IndexScheduler, KindWithContent};
|
use index_scheduler::{IndexScheduler, KindWithContent};
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::heed::RoTxn;
|
||||||
|
use meilisearch_types::milli::update::Setting;
|
||||||
|
use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET};
|
||||||
|
use meilisearch_types::settings::{
|
||||||
|
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
|
||||||
|
Unchecked,
|
||||||
|
};
|
||||||
|
use meilisearch_types::Index;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::Analytics;
|
||||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||||
|
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! make_setting_route {
|
macro_rules! make_setting_route {
|
||||||
@ -18,14 +30,15 @@ macro_rules! make_setting_route {
|
|||||||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
use index::Settings;
|
|
||||||
use index_scheduler::milli::update::Setting;
|
|
||||||
use index_scheduler::{IndexScheduler, KindWithContent};
|
use index_scheduler::{IndexScheduler, KindWithContent};
|
||||||
|
use meilisearch_types::milli::update::Setting;
|
||||||
|
use meilisearch_types::settings::Settings;
|
||||||
|
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use $crate::analytics::Analytics;
|
use $crate::analytics::Analytics;
|
||||||
use $crate::extractors::authentication::{policies::*, GuardedData};
|
use $crate::extractors::authentication::{policies::*, GuardedData};
|
||||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use $crate::routes::indexes::settings::settings;
|
||||||
|
|
||||||
pub async fn delete(
|
pub async fn delete(
|
||||||
index_scheduler: GuardedData<
|
index_scheduler: GuardedData<
|
||||||
@ -98,7 +111,7 @@ macro_rules! make_setting_route {
|
|||||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let settings = index::settings(&index, &rtxn)?;
|
let settings = settings(&index, &rtxn)?;
|
||||||
|
|
||||||
debug!("returns: {:?}", settings);
|
debug!("returns: {:?}", settings);
|
||||||
let mut json = serde_json::json!(&settings);
|
let mut json = serde_json::json!(&settings);
|
||||||
@ -185,11 +198,11 @@ make_setting_route!(
|
|||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
"/typo-tolerance",
|
"/typo-tolerance",
|
||||||
patch,
|
patch,
|
||||||
index::updates::TypoSettings,
|
meilisearch_types::settings::TypoSettings,
|
||||||
typo_tolerance,
|
typo_tolerance,
|
||||||
"typoTolerance",
|
"typoTolerance",
|
||||||
analytics,
|
analytics,
|
||||||
|setting: &Option<index::updates::TypoSettings>, req: &HttpRequest| {
|
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
@ -295,11 +308,11 @@ make_setting_route!(
|
|||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
"/faceting",
|
"/faceting",
|
||||||
patch,
|
patch,
|
||||||
index::updates::FacetingSettings,
|
meilisearch_types::settings::FacetingSettings,
|
||||||
faceting,
|
faceting,
|
||||||
"faceting",
|
"faceting",
|
||||||
analytics,
|
analytics,
|
||||||
|setting: &Option<index::updates::FacetingSettings>, req: &HttpRequest| {
|
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
@ -317,11 +330,11 @@ make_setting_route!(
|
|||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
"/pagination",
|
"/pagination",
|
||||||
patch,
|
patch,
|
||||||
index::updates::PaginationSettings,
|
meilisearch_types::settings::PaginationSettings,
|
||||||
pagination,
|
pagination,
|
||||||
"pagination",
|
"pagination",
|
||||||
analytics,
|
analytics,
|
||||||
|setting: &Option<index::updates::PaginationSettings>, req: &HttpRequest| {
|
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
@ -456,7 +469,7 @@ pub async fn get_all(
|
|||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let new_settings = index::settings(&index, &rtxn)?;
|
let new_settings = settings(&index, &rtxn)?;
|
||||||
debug!("returns: {:?}", new_settings);
|
debug!("returns: {:?}", new_settings);
|
||||||
Ok(HttpResponse::Ok().json(new_settings))
|
Ok(HttpResponse::Ok().json(new_settings))
|
||||||
}
|
}
|
||||||
@ -479,3 +492,108 @@ pub async fn delete_all(
|
|||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> {
|
||||||
|
let displayed_attributes = index
|
||||||
|
.displayed_fields(rtxn)?
|
||||||
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
|
|
||||||
|
let searchable_attributes = index
|
||||||
|
.user_defined_searchable_fields(rtxn)?
|
||||||
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
|
|
||||||
|
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
||||||
|
|
||||||
|
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
||||||
|
|
||||||
|
let criteria = index
|
||||||
|
.criteria(rtxn)?
|
||||||
|
.into_iter()
|
||||||
|
.map(|c| c.to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let stop_words = index
|
||||||
|
.stop_words(rtxn)?
|
||||||
|
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
|
||||||
|
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||||
|
})
|
||||||
|
.transpose()?
|
||||||
|
.unwrap_or_default();
|
||||||
|
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||||
|
|
||||||
|
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||||
|
// this information we are going to put space between words.
|
||||||
|
let synonyms = index
|
||||||
|
.synonyms(rtxn)?
|
||||||
|
.iter()
|
||||||
|
.map(|(key, values)| {
|
||||||
|
(
|
||||||
|
key.join(" "),
|
||||||
|
values.iter().map(|value| value.join(" ")).collect(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||||
|
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||||
|
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
|
||||||
|
};
|
||||||
|
|
||||||
|
let disabled_words = match index.exact_words(rtxn)? {
|
||||||
|
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
||||||
|
None => BTreeSet::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let disabled_attributes = index
|
||||||
|
.exact_attributes(rtxn)?
|
||||||
|
.into_iter()
|
||||||
|
.map(String::from)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let typo_tolerance = TypoSettings {
|
||||||
|
enabled: Setting::Set(index.authorize_typos(rtxn)?),
|
||||||
|
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
||||||
|
disable_on_words: Setting::Set(disabled_words),
|
||||||
|
disable_on_attributes: Setting::Set(disabled_attributes),
|
||||||
|
};
|
||||||
|
|
||||||
|
let faceting = FacetingSettings {
|
||||||
|
max_values_per_facet: Setting::Set(
|
||||||
|
index
|
||||||
|
.max_values_per_facet(rtxn)?
|
||||||
|
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
let pagination = PaginationSettings {
|
||||||
|
max_total_hits: Setting::Set(
|
||||||
|
index
|
||||||
|
.pagination_max_total_hits(rtxn)?
|
||||||
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Settings {
|
||||||
|
displayed_attributes: match displayed_attributes {
|
||||||
|
Some(attrs) => Setting::Set(attrs),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
|
searchable_attributes: match searchable_attributes {
|
||||||
|
Some(attrs) => Setting::Set(attrs),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
|
filterable_attributes: Setting::Set(filterable_attributes),
|
||||||
|
sortable_attributes: Setting::Set(sortable_attributes),
|
||||||
|
ranking_rules: Setting::Set(criteria),
|
||||||
|
stop_words: Setting::Set(stop_words),
|
||||||
|
distinct_attribute: match distinct_field {
|
||||||
|
Some(field) => Setting::Set(field),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
|
synonyms: Setting::Set(synonyms),
|
||||||
|
typo_tolerance: Setting::Set(typo_tolerance),
|
||||||
|
faceting: Setting::Set(faceting),
|
||||||
|
pagination: Setting::Set(pagination),
|
||||||
|
_kind: PhantomData,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
@ -2,10 +2,10 @@ use std::collections::BTreeMap;
|
|||||||
|
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use index::{Settings, Unchecked};
|
|
||||||
use index_scheduler::{IndexScheduler, Query, Status};
|
use index_scheduler::{IndexScheduler, Query, Status};
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::settings::{Settings, Unchecked};
|
||||||
use meilisearch_types::star_or::StarOr;
|
use meilisearch_types::star_or::StarOr;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
@ -6,12 +6,24 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
actix-web = { version = "4.2.1", default-features = false }
|
actix-web = { version = "4.2.1", default-features = false }
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" }
|
csv = "1.1.6"
|
||||||
|
either = { version = "1.6.1", features = ["serde"] }
|
||||||
|
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
|
||||||
proptest = { version = "1.0.0", optional = true }
|
proptest = { version = "1.0.0", optional = true }
|
||||||
proptest-derive = { version = "0.3.0", optional = true }
|
proptest-derive = { version = "0.3.0", optional = true }
|
||||||
serde = { version = "1.0.145", features = ["derive"] }
|
serde = { version = "1.0.145", features = ["derive"] }
|
||||||
serde_json = "1.0.85"
|
serde_json = "1.0.85"
|
||||||
tokio = "1.0"
|
tokio = "1.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
proptest = "1.0.0"
|
||||||
|
proptest-derive = "0.3.0"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
default = ["milli/default"]
|
||||||
|
|
||||||
test-traits = ["proptest", "proptest-derive"]
|
test-traits = ["proptest", "proptest-derive"]
|
||||||
|
chinese = ["milli/chinese"]
|
||||||
|
hebrew = ["milli/hebrew"]
|
||||||
|
japanese = ["milli/japanese"]
|
||||||
|
thai = ["milli/thai"]
|
||||||
|
@ -1,3 +1,11 @@
|
|||||||
|
pub mod document_formats;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod index_uid;
|
pub mod index_uid;
|
||||||
|
pub mod settings;
|
||||||
pub mod star_or;
|
pub mod star_or;
|
||||||
|
|
||||||
|
pub use milli;
|
||||||
|
pub use milli::heed;
|
||||||
|
pub use milli::Index;
|
||||||
|
|
||||||
|
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||||
|
Loading…
Reference in New Issue
Block a user