From feede0fad221f7699ee70573a2c2a54a1936f80b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 1 Oct 2024 12:07:17 +0200 Subject: [PATCH] Create a new export documents meilitool subcommand --- Cargo.lock | 1 + meilitool/Cargo.toml | 1 + meilitool/src/main.rs | 49 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bcca35173..494741649 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3521,6 +3521,7 @@ dependencies = [ "meilisearch-auth", "meilisearch-types", "serde", + "serde_json", "time", "uuid", ] diff --git a/meilitool/Cargo.toml b/meilitool/Cargo.toml index ce6c1ad5b..fdb601207 100644 --- a/meilitool/Cargo.toml +++ b/meilitool/Cargo.toml @@ -16,5 +16,6 @@ file-store = { path = "../file-store" } meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } serde = { version = "1.0.209", features = ["derive"] } +serde_json = { version = "1.0" } time = { version = "0.3.36", features = ["formatting"] } uuid = { version = "1.10.0", features = ["v4"], default-features = false } diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index 9dbff2486..faf18bd6e 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -1,5 +1,5 @@ use std::fs::{read_dir, read_to_string, remove_file, File}; -use std::io::BufWriter; +use std::io::{BufWriter, Write}; use std::path::PathBuf; use anyhow::{bail, Context}; @@ -64,6 +64,13 @@ enum Command { skip_enqueued_tasks: bool, }, + /// Exports the documents of an index from the Meilisearch database to stdout. + ExportDocuments { + /// The index name to export the documents from. + #[arg(long)] + index_name: String, + }, + /// Attempts to upgrade from one major version to the next without a dump. /// /// Make sure to run this commmand when Meilisearch is not running! @@ -89,6 +96,7 @@ fn main() -> anyhow::Result<()> { Command::ExportADump { dump_dir, skip_enqueued_tasks } => { export_a_dump(db_path, dump_dir, skip_enqueued_tasks) } + Command::ExportDocuments { index_name } => export_documents(db_path, index_name), Command::OfflineUpgrade { target_version } => { let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() @@ -605,7 +613,7 @@ fn export_a_dump( db_path: PathBuf, dump_dir: PathBuf, skip_enqueued_tasks: bool, -) -> Result<(), anyhow::Error> { +) -> anyhow::Result<()> { let started_at = OffsetDateTime::now_utc(); // 1. Extracts the instance UID from disk @@ -750,3 +758,40 @@ fn export_a_dump( Ok(()) } + +fn export_documents(db_path: PathBuf, index_name: String) -> anyhow::Result<()> { + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + let rtxn = env.read_txn()?; + let index_mapping: Database = + try_opening_database(&env, &rtxn, "index-mapping")?; + + for result in index_mapping.iter(&rtxn)? { + let (uid, uuid) = result?; + if uid == index_name { + let index_path = db_path.join("indexes").join(uuid.to_string()); + let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| { + format!("While trying to open the index at path {:?}", index_path.display()) + })?; + + let rtxn = index.read_txn()?; + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + let mut stdout = BufWriter::new(std::io::stdout()); + for ret in index.all_documents(&rtxn)? { + let (_id, doc) = ret?; + let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; + serde_json::to_writer(&mut stdout, &document)?; + } + + stdout.flush()?; + } else { + eprintln!("Found index {uid} but it's not the right index..."); + } + } + + Ok(()) +}