From 956adfc90ac3e22048d0e17e73986c1f9a3fd5a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?KARASZI=20Istv=C3=A1n?= Date: Thu, 7 Jan 2021 16:50:13 +0100 Subject: [PATCH 01/15] Replace in-place compression Compress gzip files to a temporary file first and then do an atomic rename. --- Cargo.lock | 1 + meilisearch-http/Cargo.toml | 1 + meilisearch-http/src/helpers/compression.rs | 12 ++++++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da4f99f6c..c306c86dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1734,6 +1734,7 @@ dependencies = [ "tempfile", "tokio", "ureq", + "uuid", "vergen", "walkdir", "whoami", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 7212a584a..0ad18e340 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -51,6 +51,7 @@ tar = "0.4.30" tempfile = "3.1.0" tokio = { version = "0.2", features = ["macros"] } ureq = { version = "2.0.0", features = ["tls"], default-features = false } +uuid = "0.8" walkdir = "2.3.1" whoami = "1.0.3" diff --git a/meilisearch-http/src/helpers/compression.rs b/meilisearch-http/src/helpers/compression.rs index ff3e1258f..93f5c6a08 100644 --- a/meilisearch-http/src/helpers/compression.rs +++ b/meilisearch-http/src/helpers/compression.rs @@ -1,19 +1,27 @@ use flate2::Compression; use flate2::read::GzDecoder; use flate2::write::GzEncoder; -use std::fs::{create_dir_all, File}; +use std::fs::{create_dir_all, rename, File}; use std::path::Path; use tar::{Builder, Archive}; +use uuid::Uuid; use crate::error::Error; pub fn to_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> { - let f = File::create(dest)?; + let file_name = format!(".{}", Uuid::new_v4().to_urn()); + let p = dest.with_file_name(file_name); + let tmp_dest = p.as_path(); + + let f = File::create(tmp_dest)?; let gz_encoder = GzEncoder::new(f, Compression::default()); let mut tar_encoder = Builder::new(gz_encoder); tar_encoder.append_dir_all(".", src)?; let gz_encoder = tar_encoder.into_inner()?; gz_encoder.finish()?; + + rename(tmp_dest, dest)?; + Ok(()) } From 81f343a46ae2274178b35740b50ab74affbcfbd9 Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 8 Jan 2021 16:23:23 +0100 Subject: [PATCH 02/15] add word limit to search queries --- meilisearch-core/src/lib.rs | 1 + meilisearch-core/src/query_tree.rs | 3 +++ meilisearch-http/tests/search.rs | 29 +++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index bcdad62b1..947ad5fb7 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -39,6 +39,7 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; pub use meilisearch_schema::Schema; pub use query_words_mapper::QueryWordsMapper; +pub use query_tree::MAX_QUERY_LEN; use compact_arena::SmallArena; use log::{error, trace}; diff --git a/meilisearch-core/src/query_tree.rs b/meilisearch-core/src/query_tree.rs index c2f43818f..5d10e9bef 100644 --- a/meilisearch-core/src/query_tree.rs +++ b/meilisearch-core/src/query_tree.rs @@ -16,6 +16,8 @@ use crate::{store, DocumentId, DocIndex, MResult, FstSetCow}; use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa}; use crate::QueryWordsMapper; +pub const MAX_QUERY_LEN: usize = 10; + #[derive(Clone, PartialEq, Eq, Hash)] pub enum Operation { And(Vec), @@ -181,6 +183,7 @@ fn split_query_string<'a, A: AsRef<[u8]>>(s: &str, stop_words: &'a fst::Set) .tokens() .filter(|t| t.is_word()) .map(|t| t.word.to_string()) + .take(MAX_QUERY_LEN) .enumerate() .collect() } diff --git a/meilisearch-http/tests/search.rs b/meilisearch-http/tests/search.rs index 9da6b964e..13dc4c898 100644 --- a/meilisearch-http/tests/search.rs +++ b/meilisearch-http/tests/search.rs @@ -1945,3 +1945,32 @@ async fn test_filter_nb_hits_search_normal() { println!("result: {}", response); assert_eq!(response["nbHits"], 1); } + +#[actix_rt::test] +async fn test_max_word_query() { + use meilisearch_core::MAX_QUERY_LEN; + + let mut server = common::Server::with_uid("test"); + let body = json!({ + "uid": "test", + "primaryKey": "id", + }); + server.create_index(body).await; + let documents = json!([ + {"id": 1, "value": "1 2 3 4 5 6 7 8 9 10 11"}, + {"id": 2, "value": "1 2 3 4 5 6 7 8 9 10"}] + ); + server.add_or_update_multiple_documents(documents).await; + + // We want to create a request where the 11 will be ignored. We have 2 documents, where a query + // with only one should return both, but a query with 1 and 11 should return only the first. + // This is how we know that outstanding query words have been ignored + let query = (0..MAX_QUERY_LEN) + .map(|_| "1") + .chain(std::iter::once("11")) + .fold(String::new(), |s, w| s + " " + w); + let (response, _) = server.search_post(json!({"q": query})).await; + assert_eq!(response["nbHits"], 2); + let (response, _) = server.search_post(json!({"q": "1 11"})).await; + assert_eq!(response["nbHits"], 1); +} From 06b2a587affa0ff4c06a4aff81724f96c8b1e719 Mon Sep 17 00:00:00 2001 From: many Date: Tue, 12 Jan 2021 13:53:32 +0100 Subject: [PATCH 03/15] normalize synonyms during indexation --- .../src/update/settings_update.rs | 19 ++++- meilisearch-http/tests/search_settings.rs | 83 +++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index 7b82c1c6e..205ef216a 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -1,9 +1,10 @@ -use std::collections::{BTreeMap, BTreeSet}; +use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}}; use heed::Result as ZResult; use fst::{set::OpBuilder, SetBuilder}; use sdset::SetBuf; use meilisearch_schema::Schema; +use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig}; use crate::database::{MainT, UpdateT}; use crate::settings::{UpdateState, SettingsUpdate, RankingRule}; @@ -289,13 +290,27 @@ pub fn apply_synonyms_update( let main_store = index.main; let synonyms_store = index.synonyms; + let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?; + let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); + + fn normalize>(analyzer: &Analyzer, text: &str) -> String { + analyzer.analyze(&text) + .tokens() + .fold(String::new(), |mut s, t| { + s.push_str(&t.word); + s + }) + } let mut synonyms_builder = SetBuilder::memory(); synonyms_store.clear(writer)?; - for (word, alternatives) in synonyms.clone() { + for (word, alternatives) in synonyms { + let word = normalize(&analyzer, &word); + synonyms_builder.insert(&word)?; let alternatives = { + let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect(); let alternatives = SetBuf::from_dirty(alternatives); let mut alternatives_builder = SetBuilder::memory(); alternatives_builder.extend_iter(alternatives)?; diff --git a/meilisearch-http/tests/search_settings.rs b/meilisearch-http/tests/search_settings.rs index 46417498d..97d27023a 100644 --- a/meilisearch-http/tests/search_settings.rs +++ b/meilisearch-http/tests/search_settings.rs @@ -167,6 +167,89 @@ async fn search_with_settings_stop_words() { async fn search_with_settings_synonyms() { let mut server = common::Server::test_server().await; + let config = json!({ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "desc(age)", + "exactness", + "desc(balance)" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "name", + "age", + "color", + "gender", + "email", + "address", + "about" + ], + "displayedAttributes": [ + "name", + "age", + "gender", + "color", + "email", + "phone", + "address", + "balance" + ], + "stopWords": null, + "synonyms": { + "Application": [ + "Exercitation" + ] + }, + }); + + server.update_all_settings(config).await; + + let query = "q=application&limit=3"; + let expect = json!([ + { + "balance": "$1,921.58", + "age": 31, + "color": "Green", + "name": "Harper Carson", + "gender": "male", + "email": "harpercarson@chorizon.com", + "phone": "+1 (912) 430-3243", + "address": "883 Dennett Place, Knowlton, New Mexico, 9219" + }, + { + "balance": "$1,706.13", + "age": 27, + "color": "Green", + "name": "Cherry Orr", + "gender": "female", + "email": "cherryorr@chorizon.com", + "phone": "+1 (995) 479-3174", + "address": "442 Beverly Road, Ventress, New Mexico, 3361" + }, + { + "balance": "$1,476.39", + "age": 28, + "color": "brown", + "name": "Maureen Dale", + "gender": "female", + "email": "maureendale@chorizon.com", + "phone": "+1 (984) 538-3684", + "address": "817 Newton Street, Bannock, Wyoming, 1468" + } + ]); + + let (response, _status_code) = server.search_get(query).await; + assert_json_eq!(expect, response["hits"].clone(), ordered: false); +} + +#[actix_rt::test] +async fn search_with_settings_normalized_synonyms() { + let mut server = common::Server::test_server().await; + let config = json!({ "rankingRules": [ "typo", From bc0d53e819407311b4e1d8f7d67cd744e5128bd6 Mon Sep 17 00:00:00 2001 From: Many Date: Wed, 13 Jan 2021 13:17:19 +0100 Subject: [PATCH 04/15] Update meilisearch-core/src/update/settings_update.rs Co-authored-by: marin --- meilisearch-core/src/update/settings_update.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index 205ef216a..7bb84c866 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -296,10 +296,7 @@ pub fn apply_synonyms_update( fn normalize>(analyzer: &Analyzer, text: &str) -> String { analyzer.analyze(&text) .tokens() - .fold(String::new(), |mut s, t| { - s.push_str(&t.word); - s - }) + .fold(String::new(), |mut s, t| s + t.text()) } let mut synonyms_builder = SetBuilder::memory(); From 430a5f902bca52a5cadf8e8c158431e9e88e3a07 Mon Sep 17 00:00:00 2001 From: mpostma Date: Sat, 9 Jan 2021 13:26:23 +0100 Subject: [PATCH 05/15] fix race condition in document addition --- .../src/update/documents_addition.rs | 32 ++++++++-- meilisearch-core/src/update/mod.rs | 35 ++++++----- meilisearch-http/src/dump.rs | 13 ++-- meilisearch-http/src/routes/document.rs | 61 ++++++++++--------- 4 files changed, 84 insertions(+), 57 deletions(-) diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index 00fdd5122..71ed8033c 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -23,6 +23,8 @@ pub struct DocumentsAddition { updates_store: store::Updates, updates_results_store: store::UpdatesResults, updates_notifier: UpdateEventsEmitter, + // Whether the user explicitely set the primary key in the update + primary_key: Option, documents: Vec, is_partial: bool, } @@ -39,6 +41,7 @@ impl DocumentsAddition { updates_notifier, documents: Vec::new(), is_partial: false, + primary_key: None, } } @@ -53,9 +56,14 @@ impl DocumentsAddition { updates_notifier, documents: Vec::new(), is_partial: true, + primary_key: None, } } + pub fn set_primary_key(&mut self, primary_key: String) { + self.primary_key = Some(primary_key); + } + pub fn update_document(&mut self, document: D) { self.documents.push(document); } @@ -71,6 +79,7 @@ impl DocumentsAddition { self.updates_results_store, self.documents, self.is_partial, + self.primary_key, )?; Ok(update_id) } @@ -88,6 +97,7 @@ pub fn push_documents_addition( updates_results_store: store::UpdatesResults, addition: Vec, is_partial: bool, + primary_key: Option, ) -> MResult { let mut values = Vec::with_capacity(addition.len()); for add in addition { @@ -99,9 +109,9 @@ pub fn push_documents_addition( let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; let update = if is_partial { - Update::documents_partial(values) + Update::documents_partial(primary_key, values) } else { - Update::documents_addition(values) + Update::documents_addition(primary_key, values) }; updates_store.put_update(writer, last_update_id, &update)?; @@ -149,7 +159,8 @@ pub fn apply_addition( writer: &mut heed::RwTxn, index: &store::Index, new_documents: Vec>, - partial: bool + partial: bool, + primary_key: Option, ) -> MResult<()> { let mut schema = match index.main.schema(writer)? { @@ -162,7 +173,14 @@ pub fn apply_addition( let internal_docids = index.main.internal_docids(writer)?; let mut available_ids = DiscoverIds::new(&internal_docids); - let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; + let primary_key = match schema.primary_key() { + Some(primary_key) => primary_key.to_string(), + None => { + let name = primary_key.ok_or(Error::MissingPrimaryKey)?; + schema.set_primary_key(&name)?; + name + } + }; // 1. store documents ids for future deletion let mut documents_additions = HashMap::new(); @@ -275,16 +293,18 @@ pub fn apply_documents_partial_addition( writer: &mut heed::RwTxn, index: &store::Index, new_documents: Vec>, + primary_key: Option, ) -> MResult<()> { - apply_addition(writer, index, new_documents, true) + apply_addition(writer, index, new_documents, true, primary_key) } pub fn apply_documents_addition( writer: &mut heed::RwTxn, index: &store::Index, new_documents: Vec>, + primary_key: Option, ) -> MResult<()> { - apply_addition(writer, index, new_documents, false) + apply_addition(writer, index, new_documents, false, primary_key) } pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Index) -> MResult<()> { diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index d10f484a4..bcc03ec3f 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -52,16 +52,16 @@ impl Update { } } - fn documents_addition(documents: Vec>) -> Update { + fn documents_addition(primary_key: Option, documents: Vec>) -> Update { Update { - data: UpdateData::DocumentsAddition(documents), + data: UpdateData::DocumentsAddition{ documents, primary_key }, enqueued_at: Utc::now(), } } - fn documents_partial(documents: Vec>) -> Update { + fn documents_partial(primary_key: Option, documents: Vec>) -> Update { Update { - data: UpdateData::DocumentsPartial(documents), + data: UpdateData::DocumentsPartial{ documents, primary_key }, enqueued_at: Utc::now(), } } @@ -85,8 +85,15 @@ impl Update { pub enum UpdateData { ClearAll, Customs(Vec), - DocumentsAddition(Vec>), - DocumentsPartial(Vec>), + // (primary key, documents) + DocumentsAddition { + primary_key: Option, + documents: Vec> + }, + DocumentsPartial { + primary_key: Option, + documents: Vec>, + }, DocumentsDeletion(Vec), Settings(Box) } @@ -96,11 +103,11 @@ impl UpdateData { match self { UpdateData::ClearAll => UpdateType::ClearAll, UpdateData::Customs(_) => UpdateType::Customs, - UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition { - number: addition.len(), + UpdateData::DocumentsAddition{ documents, .. } => UpdateType::DocumentsAddition { + number: documents.len(), }, - UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial { - number: addition.len(), + UpdateData::DocumentsPartial{ documents, .. } => UpdateType::DocumentsPartial { + number: documents.len(), }, UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion { number: deletion.len(), @@ -239,25 +246,25 @@ pub fn update_task( (update_type, result, start.elapsed()) } - UpdateData::DocumentsAddition(documents) => { + UpdateData::DocumentsAddition { documents, primary_key } => { let start = Instant::now(); let update_type = UpdateType::DocumentsAddition { number: documents.len(), }; - let result = apply_documents_addition(writer, index, documents); + let result = apply_documents_addition(writer, index, documents, primary_key); (update_type, result, start.elapsed()) } - UpdateData::DocumentsPartial(documents) => { + UpdateData::DocumentsPartial{ documents, primary_key } => { let start = Instant::now(); let update_type = UpdateType::DocumentsPartial { number: documents.len(), }; - let result = apply_documents_partial_addition(writer, index, documents); + let result = apply_documents_partial_addition(writer, index, documents, primary_key); (update_type, result, start.elapsed()) } diff --git a/meilisearch-http/src/dump.rs b/meilisearch-http/src/dump.rs index c4513af6f..bf5752830 100644 --- a/meilisearch-http/src/dump.rs +++ b/meilisearch-http/src/dump.rs @@ -128,15 +128,15 @@ fn import_index_v1( // push document in buffer values.push(document?); // if buffer is full, create and apply a batch, and clean buffer - if values.len() == document_batch_size { + if values.len() == document_batch_size { let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size)); - apply_documents_addition(write_txn, &index, batch)?; + apply_documents_addition(write_txn, &index, batch, None)?; } } - // apply documents remaining in the buffer - if !values.is_empty() { - apply_documents_addition(write_txn, &index, values)?; + // apply documents remaining in the buffer + if !values.is_empty() { + apply_documents_addition(write_txn, &index, values, None)?; } // sync index information: stats, updated_at, last_update @@ -289,7 +289,6 @@ fn dump_index_documents(data: &web::Data, reader: &MainReader, dir_path: & /// Write error with a context. fn fail_dump_process(data: &web::Data, dump_info: DumpInfo, context: &str, error: E) { let error_message = format!("{}; {}", context, error); - error!("Something went wrong during dump process: {}", &error_message); data.set_current_dump_info(dump_info.with_error(Error::dump_failed(error_message).into())) } @@ -405,7 +404,7 @@ pub fn init_dump_process(data: &web::Data, dumps_dir: &Path) -> Result) -> Option { - for key in document.keys() { - if key.to_lowercase().contains("id") { - return Some(key.to_string()); - } - } - None -} - #[derive(Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] struct UpdateDocumentsQuery { @@ -168,26 +159,6 @@ async fn update_multiple_documents( is_partial: bool, ) -> Result { let update_id = data.get_or_create_index(&path.index_uid, |index| { - let reader = data.db.main_read_txn()?; - - let mut schema = index - .main - .schema(&reader)? - .ok_or(meilisearch_core::Error::SchemaMissing)?; - - if schema.primary_key().is_none() { - let id = match ¶ms.primary_key { - Some(id) => id.to_string(), - None => body - .first() - .and_then(find_primary_key) - .ok_or(meilisearch_core::Error::MissingPrimaryKey)?, - }; - - schema.set_primary_key(&id).map_err(Error::bad_request)?; - - data.db.main_write(|w| index.main.put_schema(w, &schema))?; - } let mut document_addition = if is_partial { index.documents_partial_addition() @@ -195,6 +166,27 @@ async fn update_multiple_documents( index.documents_addition() }; + // Return an early error if primary key is already set, otherwise, try to set it up in the + // update later. + let reader = data.db.main_read_txn()?; + let schema = index + .main + .schema(&reader)? + .ok_or(meilisearch_core::Error::SchemaMissing)?; + + match (params.into_inner().primary_key, schema.primary_key()) { + (Some(_), Some(_)) => return Err(meilisearch_schema::Error::PrimaryKeyAlreadyPresent)?, + (Some(key), None) => document_addition.set_primary_key(key), + (None, None) => { + let key = body + .first() + .and_then(find_primary_key) + .ok_or(meilisearch_core::Error::MissingPrimaryKey)?; + document_addition.set_primary_key(key); + } + (None, Some(_)) => () + } + for document in body.into_inner() { document_addition.update_document(document); } @@ -204,6 +196,15 @@ async fn update_multiple_documents( return Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id))); } +fn find_primary_key(document: &IndexMap) -> Option { + for key in document.keys() { + if key.to_lowercase().contains("id") { + return Some(key.to_string()); + } + } + None +} + #[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")] async fn add_documents( data: web::Data, From 1d910dbb425d7dbaacb5255b7dfe20b112f991b2 Mon Sep 17 00:00:00 2001 From: marin Date: Fri, 15 Jan 2021 00:55:31 +0100 Subject: [PATCH 06/15] Update meilisearch-core/src/update/documents_addition.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- meilisearch-core/src/update/documents_addition.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index 71ed8033c..26bbd94b2 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -23,7 +23,7 @@ pub struct DocumentsAddition { updates_store: store::Updates, updates_results_store: store::UpdatesResults, updates_notifier: UpdateEventsEmitter, - // Whether the user explicitely set the primary key in the update + // Whether the user explicitly set the primary key in the update primary_key: Option, documents: Vec, is_partial: bool, From e3e475c5b14c61e0a96b5b3d003446ae334a0018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 19 Jan 2021 00:18:52 +0100 Subject: [PATCH 07/15] Update LICENSE --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 8f028e3f2..03ad189ef 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2020 Meili SAS +Copyright (c) 2019-2021 Meili SAS Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 2fe52d0a4f51cc5243b55f00f45d593f465d8e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Harper?= Date: Tue, 26 Jan 2021 15:14:53 -0500 Subject: [PATCH 08/15] fix homebrew name brew is the command, the package manager name is homebrew --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f37d27abd..e8b2ec9e3 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ For more information about features go to [our documentation](https://docs.meili ### Deploy the Server -#### Brew (Mac OS) +#### Homebrew (Mac OS) ```bash brew update && brew install meilisearch From eeccdce33a39747666a644e60a57c75ea9ec7a12 Mon Sep 17 00:00:00 2001 From: many Date: Thu, 28 Jan 2021 10:33:44 +0100 Subject: [PATCH 09/15] update tokenizer to v0.1.3 --- Cargo.lock | 2 +- meilisearch-core/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65f050a41..97ad12184 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1753,7 +1753,7 @@ dependencies = [ [[package]] name = "meilisearch-tokenizer" version = "0.1.1" -source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.2#8d91cd52f30aa4b651a085c15056938f7b599646" +source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.3#d3fe5311a66c1f31682a297df8a8b6b8916f4252" dependencies = [ "character_converter", "cow-utils", diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index 7d930096c..3fe030e9f 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -26,7 +26,7 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } log = "0.4.11" meilisearch-error = { path = "../meilisearch-error", version = "0.18.1" } meilisearch-schema = { path = "../meilisearch-schema", version = "0.18.1" } -meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.2" } +meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.3" } meilisearch-types = { path = "../meilisearch-types", version = "0.18.1" } once_cell = "1.5.2" ordered-float = { version = "2.0.1", features = ["serde"] } From 940f83698cfaa46fdbe5a18d7b5f28bfdf268f80 Mon Sep 17 00:00:00 2001 From: Many Date: Mon, 1 Feb 2021 12:06:48 +0100 Subject: [PATCH 10/15] Update meilisearch-core/src/update/settings_update.rs Co-authored-by: marin --- meilisearch-core/src/update/settings_update.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index 7bb84c866..e96d5702c 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -296,7 +296,7 @@ pub fn apply_synonyms_update( fn normalize>(analyzer: &Analyzer, text: &str) -> String { analyzer.analyze(&text) .tokens() - .fold(String::new(), |mut s, t| s + t.text()) + .fold(String::new(), |s, t| s + t.text()) } let mut synonyms_builder = SetBuilder::memory(); From 97f35de41f6738fee0c6b7c5994bc2c02accec71 Mon Sep 17 00:00:00 2001 From: mpostma Date: Mon, 1 Feb 2021 18:59:22 +0100 Subject: [PATCH 11/15] fix flaky test --- meilisearch-http/tests/index_update.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/tests/index_update.rs b/meilisearch-http/tests/index_update.rs index df4639252..4d7e025a6 100644 --- a/meilisearch-http/tests/index_update.rs +++ b/meilisearch-http/tests/index_update.rs @@ -94,13 +94,21 @@ async fn return_update_status_of_pushed_documents() { ]; let mut update_ids = Vec::new(); - + let mut bodies = bodies.into_iter(); + let url = "/indexes/test/documents?primaryKey=title"; + let (response, status_code) = server.post_request(&url, bodies.next().unwrap()).await; + assert_eq!(status_code, 202); + let update_id = response["updateId"].as_u64().unwrap(); + update_ids.push(update_id); + server.wait_update_id(update_id).await; + + let url = "/indexes/test/documents"; for body in bodies { - let (response, status_code) = server.post_request(&url, body).await; - assert_eq!(status_code, 202); - let update_id = response["updateId"].as_u64().unwrap(); - update_ids.push(update_id); + let (response, status_code) = server.post_request(&url, body).await; + assert_eq!(status_code, 202); + let update_id = response["updateId"].as_u64().unwrap(); + update_ids.push(update_id); } // 2. Fetch the status of index. @@ -173,7 +181,7 @@ async fn should_return_existing_update() { let (response, status_code) = server.create_index(body).await; assert_eq!(status_code, 201); assert_eq!(response["primaryKey"], json!(null)); - + let body = json!([{ "title": "Test", "comment": "comment test" From 8760beed1c51358523a57f023146f7715ebb40b2 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 2 Feb 2021 14:23:33 +0100 Subject: [PATCH 12/15] bump meilisearch --- Cargo.lock | 10 +++++----- meilisearch-core/Cargo.toml | 8 ++++---- meilisearch-error/Cargo.toml | 2 +- meilisearch-http/Cargo.toml | 8 ++++---- meilisearch-schema/Cargo.toml | 4 ++-- meilisearch-tokenizer/Cargo.toml | 2 +- meilisearch-types/Cargo.toml | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afdf18a9b..bde553a4c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1639,7 +1639,7 @@ checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" [[package]] name = "meilisearch-core" -version = "0.18.1" +version = "0.19.0" dependencies = [ "arc-swap", "assert_matches", @@ -1686,14 +1686,14 @@ dependencies = [ [[package]] name = "meilisearch-error" -version = "0.18.1" +version = "0.19.0" dependencies = [ "actix-http", ] [[package]] name = "meilisearch-http" -version = "0.18.1" +version = "0.19.0" dependencies = [ "actix-cors", "actix-http", @@ -1742,7 +1742,7 @@ dependencies = [ [[package]] name = "meilisearch-schema" -version = "0.18.1" +version = "0.19.0" dependencies = [ "indexmap", "meilisearch-error", @@ -1769,7 +1769,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "0.18.1" +version = "0.19.0" dependencies = [ "serde", "zerocopy", diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index 3fe030e9f..2fd717842 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-core" -version = "0.18.1" +version = "0.19.0" license = "MIT" authors = ["Kerollmops "] edition = "2018" @@ -24,10 +24,10 @@ intervaltree = "0.2.6" itertools = "0.10.0" levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } log = "0.4.11" -meilisearch-error = { path = "../meilisearch-error", version = "0.18.1" } -meilisearch-schema = { path = "../meilisearch-schema", version = "0.18.1" } +meilisearch-error = { path = "../meilisearch-error", version = "0.19.0" } +meilisearch-schema = { path = "../meilisearch-schema", version = "0.19.0" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.3" } -meilisearch-types = { path = "../meilisearch-types", version = "0.18.1" } +meilisearch-types = { path = "../meilisearch-types", version = "0.19.0" } once_cell = "1.5.2" ordered-float = { version = "2.0.1", features = ["serde"] } pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" } diff --git a/meilisearch-error/Cargo.toml b/meilisearch-error/Cargo.toml index ac0cc9f5b..d5a474ea5 100644 --- a/meilisearch-error/Cargo.toml +++ b/meilisearch-error/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-error" -version = "0.18.1" +version = "0.19.0" authors = ["marin "] edition = "2018" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 0300f57f2..dd67fba2f 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "meilisearch-http" description = "MeiliSearch HTTP server" -version = "0.18.1" +version = "0.19.0" license = "MIT" authors = [ "Quentin de Quelen ", @@ -32,9 +32,9 @@ http = "0.2.2" indexmap = { version = "1.6.1", features = ["serde-1"] } log = "0.4.11" main_error = "0.1.1" -meilisearch-core = { path = "../meilisearch-core", version = "0.18.0" } -meilisearch-error = { path = "../meilisearch-error", version = "0.18.1" } -meilisearch-schema = { path = "../meilisearch-schema", version = "0.18.1" } +meilisearch-core = { path = "../meilisearch-core", version = "0.19.0" } +meilisearch-error = { path = "../meilisearch-error", version = "0.19.0" } +meilisearch-schema = { path = "../meilisearch-schema", version = "0.19.0" } mime = "0.3.16" once_cell = "1.5.2" rand = "0.8.1" diff --git a/meilisearch-schema/Cargo.toml b/meilisearch-schema/Cargo.toml index 063c39174..7b3b05f86 100644 --- a/meilisearch-schema/Cargo.toml +++ b/meilisearch-schema/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "meilisearch-schema" -version = "0.18.1" +version = "0.19.0" license = "MIT" authors = ["Kerollmops "] edition = "2018" [dependencies] indexmap = { version = "1.6.1", features = ["serde-1"] } -meilisearch-error = { path = "../meilisearch-error", version = "0.18.1" } +meilisearch-error = { path = "../meilisearch-error", version = "0.19.0" } serde = { version = "1.0.118", features = ["derive"] } serde_json = { version = "1.0.61", features = ["preserve_order"] } zerocopy = "0.3.0" diff --git a/meilisearch-tokenizer/Cargo.toml b/meilisearch-tokenizer/Cargo.toml index 0145dda22..166e14be0 100644 --- a/meilisearch-tokenizer/Cargo.toml +++ b/meilisearch-tokenizer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-tokenizer" -version = "0.18.1" +version = "0.19.0" license = "MIT" authors = ["Kerollmops "] edition = "2018" diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index b25c30d54..6189e8b3e 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-types" -version = "0.18.1" +version = "0.19.0" license = "MIT" authors = ["Clément Renault "] edition = "2018" From 97ba5e97c6b5bbe5181535fd7d427f0386da4c54 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 2 Feb 2021 14:32:04 +0100 Subject: [PATCH 13/15] update changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59ad5ac48..be24a0d28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## v0.19.0 + + - The snapshots are now created and then renamed in atomically (#1172) + - Fix a race condition when an update and a document addition are processed immediately one after the other (#1176) + - Latin synonyms are normalized during indexation (#1174) + ## v0.18.1 - Fix unexpected CORS error (#1185) @@ -64,7 +70,7 @@ ## v0.11.1 - - Fix facet cache on document update (#789) + - Fix facet cache on document update (#789) - Improvements on settings consistency (#778) ## v0.11.0 From a95a18afe4a9e766ac43315af6102f3cc17a1a98 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 3 Feb 2021 11:59:29 +0100 Subject: [PATCH 14/15] ignore primary key if it is already set --- meilisearch-http/src/routes/document.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/meilisearch-http/src/routes/document.rs b/meilisearch-http/src/routes/document.rs index 943351bd7..202575cc3 100644 --- a/meilisearch-http/src/routes/document.rs +++ b/meilisearch-http/src/routes/document.rs @@ -175,7 +175,6 @@ async fn update_multiple_documents( .ok_or(meilisearch_core::Error::SchemaMissing)?; match (params.into_inner().primary_key, schema.primary_key()) { - (Some(_), Some(_)) => return Err(meilisearch_schema::Error::PrimaryKeyAlreadyPresent)?, (Some(key), None) => document_addition.set_primary_key(key), (None, None) => { let key = body @@ -184,7 +183,7 @@ async fn update_multiple_documents( .ok_or(meilisearch_core::Error::MissingPrimaryKey)?; document_addition.set_primary_key(key); } - (None, Some(_)) => () + _ => () } for document in body.into_inner() { From 1df0fdf3e2d910433d795db5f606fe906dbf3bcb Mon Sep 17 00:00:00 2001 From: many Date: Wed, 3 Feb 2021 15:05:15 +0100 Subject: [PATCH 15/15] fix synonyms normalization Synonyms needs to be indexed in ascendant order, and the new normalization step for synonyms potentially changes this order which break the indexation process because "Harry Potter" > "HP" but "harry potter" < "hp" --- meilisearch-core/src/update/settings_update.rs | 15 +++++++++++---- meilisearch-http/tests/settings.rs | 4 ++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index e96d5702c..c9d40fa1b 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}}; use heed::Result as ZResult; -use fst::{set::OpBuilder, SetBuilder}; +use fst::{SetBuilder, set::OpBuilder}; use sdset::SetBuf; use meilisearch_schema::Schema; use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig}; @@ -298,16 +298,23 @@ pub fn apply_synonyms_update( .tokens() .fold(String::new(), |s, t| s + t.text()) } + + // normalize synonyms and reorder them creating a BTreeMap + let synonyms: BTreeMap> = synonyms.into_iter().map( |(word, alternatives)| { + let word = normalize(&analyzer, &word); + let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect(); + (word, alternatives) + }).collect(); + + // index synonyms, + // synyonyms have to be ordered by key before indexation let mut synonyms_builder = SetBuilder::memory(); synonyms_store.clear(writer)?; for (word, alternatives) in synonyms { - let word = normalize(&analyzer, &word); - synonyms_builder.insert(&word)?; let alternatives = { - let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect(); let alternatives = SetBuf::from_dirty(alternatives); let mut alternatives_builder = SetBuilder::memory(); alternatives_builder.extend_iter(alternatives)?; diff --git a/meilisearch-http/tests/settings.rs b/meilisearch-http/tests/settings.rs index 6b125c13a..98973b56f 100644 --- a/meilisearch-http/tests/settings.rs +++ b/meilisearch-http/tests/settings.rs @@ -171,6 +171,8 @@ async fn write_all_and_update() { "synonyms": { "road": ["street", "avenue"], "street": ["avenue"], + "HP": ["Harry Potter"], + "Harry Potter": ["HP"] }, "attributesForFaceting": ["title"], }); @@ -208,6 +210,8 @@ async fn write_all_and_update() { "synonyms": { "road": ["street", "avenue"], "street": ["avenue"], + "hp": ["harry potter"], + "harry potter": ["hp"] }, "attributesForFaceting": ["title"], });