From 51ba1bd7d35029f1495950f0adc38bd6720ef84b Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Thu, 1 Apr 2021 17:49:11 +0300 Subject: [PATCH] fix(http, index): init analyzer with optional stop words Next release update tokenizer --- Cargo.lock | 28 ++++++---------------------- meilisearch-http/Cargo.toml | 22 +++++++++++----------- meilisearch-http/src/index/search.rs | 5 ++++- 3 files changed, 21 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7fd30f444..461a4789b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1847,7 +1847,7 @@ dependencies = [ "log", "main_error", "meilisearch-error", - "meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?branch=main)", + "meilisearch-tokenizer", "memmap", "milli", "mime", @@ -1893,22 +1893,6 @@ dependencies = [ "whatlang", ] -[[package]] -name = "meilisearch-tokenizer" -version = "0.1.1" -source = "git+https://github.com/meilisearch/Tokenizer.git?branch=main#31ba3ff4a15501f12b7d37ac64ddce7c35a9757c" -dependencies = [ - "character_converter", - "cow-utils", - "deunicode", - "fst", - "jieba-rs", - "once_cell", - "slice-group-by", - "unicode-segmentation", - "whatlang", -] - [[package]] name = "memchr" version = "2.3.4" @@ -1937,7 +1921,7 @@ dependencies = [ [[package]] name = "milli" version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?rev=2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c#2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.1.0#2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" dependencies = [ "anyhow", "bstr", @@ -1957,7 +1941,7 @@ dependencies = [ "linked-hash-map", "log", "logging_timer", - "meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0)", + "meilisearch-tokenizer", "memmap", "num-traits", "obkv", @@ -2252,7 +2236,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "pest" version = "2.1.3" -source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" dependencies = [ "ucd-trie", ] @@ -2260,8 +2245,7 @@ dependencies = [ [[package]] name = "pest" version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" dependencies = [ "ucd-trie", ] diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 03ae35729..3e04b876a 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -35,14 +35,14 @@ futures-util = "0.3.8" grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } heed = "0.10.6" http = "0.2.1" -indexmap = { version = "1.3.2", features = ["serde-1"] } +indexmap = { version = "1.3.2", features = ["serde-1"] } itertools = "0.10.0" log = "0.4.8" main_error = "0.1.0" meilisearch-error = { path = "../meilisearch-error" } -meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" } +meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.0" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", rev = "2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.1.0" } mime = "0.3.16" once_cell = "1.5.2" parking_lot = "0.11.1" @@ -66,14 +66,14 @@ oxidized-json-checker = "0.3.2" [dependencies.sentry] default-features = false features = [ - "with_client_implementation", - "with_panic", - "with_failure", - "with_device_info", - "with_rust_info", - "with_reqwest_transport", - "with_rustls", - "with_env_logger" + "with_client_implementation", + "with_panic", + "with_failure", + "with_device_info", + "with_rust_info", + "with_reqwest_transport", + "with_rustls", + "with_env_logger" ] optional = true version = "0.18.1" diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 7311687d2..4b9753b82 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -155,7 +155,10 @@ pub struct Highlighter<'a, A> { impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { pub fn new(stop_words: &'a fst::Set) -> Self { - let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); + let mut config = AnalyzerConfig::default(); + config.stop_words(stop_words); + + let analyzer = Analyzer::new(config); Self { analyzer } }