From 81f343a46ae2274178b35740b50ab74affbcfbd9 Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 8 Jan 2021 16:23:23 +0100 Subject: [PATCH 1/4] add word limit to search queries --- meilisearch-core/src/lib.rs | 1 + meilisearch-core/src/query_tree.rs | 3 +++ meilisearch-http/tests/search.rs | 29 +++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index bcdad62b1..947ad5fb7 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -39,6 +39,7 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; pub use meilisearch_schema::Schema; pub use query_words_mapper::QueryWordsMapper; +pub use query_tree::MAX_QUERY_LEN; use compact_arena::SmallArena; use log::{error, trace}; diff --git a/meilisearch-core/src/query_tree.rs b/meilisearch-core/src/query_tree.rs index c2f43818f..5d10e9bef 100644 --- a/meilisearch-core/src/query_tree.rs +++ b/meilisearch-core/src/query_tree.rs @@ -16,6 +16,8 @@ use crate::{store, DocumentId, DocIndex, MResult, FstSetCow}; use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa}; use crate::QueryWordsMapper; +pub const MAX_QUERY_LEN: usize = 10; + #[derive(Clone, PartialEq, Eq, Hash)] pub enum Operation { And(Vec), @@ -181,6 +183,7 @@ fn split_query_string<'a, A: AsRef<[u8]>>(s: &str, stop_words: &'a fst::Set) .tokens() .filter(|t| t.is_word()) .map(|t| t.word.to_string()) + .take(MAX_QUERY_LEN) .enumerate() .collect() } diff --git a/meilisearch-http/tests/search.rs b/meilisearch-http/tests/search.rs index 9da6b964e..13dc4c898 100644 --- a/meilisearch-http/tests/search.rs +++ b/meilisearch-http/tests/search.rs @@ -1945,3 +1945,32 @@ async fn test_filter_nb_hits_search_normal() { println!("result: {}", response); assert_eq!(response["nbHits"], 1); } + +#[actix_rt::test] +async fn test_max_word_query() { + use meilisearch_core::MAX_QUERY_LEN; + + let mut server = common::Server::with_uid("test"); + let body = json!({ + "uid": "test", + "primaryKey": "id", + }); + server.create_index(body).await; + let documents = json!([ + {"id": 1, "value": "1 2 3 4 5 6 7 8 9 10 11"}, + {"id": 2, "value": "1 2 3 4 5 6 7 8 9 10"}] + ); + server.add_or_update_multiple_documents(documents).await; + + // We want to create a request where the 11 will be ignored. We have 2 documents, where a query + // with only one should return both, but a query with 1 and 11 should return only the first. + // This is how we know that outstanding query words have been ignored + let query = (0..MAX_QUERY_LEN) + .map(|_| "1") + .chain(std::iter::once("11")) + .fold(String::new(), |s, w| s + " " + w); + let (response, _) = server.search_post(json!({"q": query})).await; + assert_eq!(response["nbHits"], 2); + let (response, _) = server.search_post(json!({"q": "1 11"})).await; + assert_eq!(response["nbHits"], 1); +} From e3e475c5b14c61e0a96b5b3d003446ae334a0018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 19 Jan 2021 00:18:52 +0100 Subject: [PATCH 2/4] Update LICENSE --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 8f028e3f2..03ad189ef 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2020 Meili SAS +Copyright (c) 2019-2021 Meili SAS Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 2fe52d0a4f51cc5243b55f00f45d593f465d8e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Harper?= Date: Tue, 26 Jan 2021 15:14:53 -0500 Subject: [PATCH 3/4] fix homebrew name brew is the command, the package manager name is homebrew --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f37d27abd..e8b2ec9e3 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ For more information about features go to [our documentation](https://docs.meili ### Deploy the Server -#### Brew (Mac OS) +#### Homebrew (Mac OS) ```bash brew update && brew install meilisearch From eeccdce33a39747666a644e60a57c75ea9ec7a12 Mon Sep 17 00:00:00 2001 From: many Date: Thu, 28 Jan 2021 10:33:44 +0100 Subject: [PATCH 4/4] update tokenizer to v0.1.3 --- Cargo.lock | 2 +- meilisearch-core/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65f050a41..97ad12184 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1753,7 +1753,7 @@ dependencies = [ [[package]] name = "meilisearch-tokenizer" version = "0.1.1" -source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.2#8d91cd52f30aa4b651a085c15056938f7b599646" +source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.3#d3fe5311a66c1f31682a297df8a8b6b8916f4252" dependencies = [ "character_converter", "cow-utils", diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index 7d930096c..3fe030e9f 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -26,7 +26,7 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } log = "0.4.11" meilisearch-error = { path = "../meilisearch-error", version = "0.18.1" } meilisearch-schema = { path = "../meilisearch-schema", version = "0.18.1" } -meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.2" } +meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.3" } meilisearch-types = { path = "../meilisearch-types", version = "0.18.1" } once_cell = "1.5.2" ordered-float = { version = "2.0.1", features = ["serde"] }