From 26dcb9e66d5311feea92302e9cb0b57ac0c590f5 Mon Sep 17 00:00:00 2001 From: tamo Date: Thu, 6 May 2021 11:57:42 +0200 Subject: [PATCH] bump milli version and fix a performance issue for large dumps --- Cargo.lock | 6 ++++-- meilisearch-http/Cargo.toml | 2 +- .../src/index_controller/index_actor/actor.rs | 8 ++------ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1c109a79..26c53663a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "actix-codec" version = "0.4.0" @@ -1840,8 +1842,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.2.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.0#792225eaffce6b3682f9b30b7370b6a547c4757e" +version = "0.2.1" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.1#25f75d4d03732131e6edcf20f4d126210b159d43" dependencies = [ "anyhow", "bstr", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 7ac3ecb38..c9f8d63b7 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -51,7 +51,7 @@ main_error = "0.1.0" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.2" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.1" } mime = "0.3.16" once_cell = "1.5.2" oxidized-json-checker = "0.3.2" diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index ca23663b7..1f0091265 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -333,16 +333,12 @@ impl IndexActor { // Get write txn to wait for ongoing write transaction before dump. let txn = index.write_txn()?; - let documents_ids = index.documents_ids(&txn)?; - // TODO: TAMO: calling this function here can consume **a lot** of RAM, we should - // use some kind of iterators -> waiting for a milli release - let documents = index.documents(&txn, documents_ids)?; - let fields_ids_map = index.fields_ids_map(&txn)?; // we want to save **all** the fields in the dump. let fields_to_dump: Vec = fields_ids_map.iter().map(|(id, _)| id).collect(); - for (_doc_id, document) in documents { + for document in index.all_documents(&txn)? { + let (_doc_id, document) = document?; let json = milli::obkv_to_json(&fields_to_dump, &fields_ids_map, document)?; file.write_all(serde_json::to_string(&json)?.as_bytes())?; file.write_all(b"\n")?;