Create a new export documents meilitool subcommand

This commit is contained in:
Clément Renault 2024-10-01 12:07:17 +02:00
parent 30f3c30389
commit feede0fad2
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 49 additions and 2 deletions

1
Cargo.lock generated
View File

@ -3521,6 +3521,7 @@ dependencies = [
"meilisearch-auth",
"meilisearch-types",
"serde",
"serde_json",
"time",
"uuid",
]

View File

@ -16,5 +16,6 @@ file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
serde = { version = "1.0.209", features = ["derive"] }
serde_json = { version = "1.0" }
time = { version = "0.3.36", features = ["formatting"] }
uuid = { version = "1.10.0", features = ["v4"], default-features = false }

View File

@ -1,5 +1,5 @@
use std::fs::{read_dir, read_to_string, remove_file, File};
use std::io::BufWriter;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
use anyhow::{bail, Context};
@ -64,6 +64,13 @@ enum Command {
skip_enqueued_tasks: bool,
},
/// Exports the documents of an index from the Meilisearch database to stdout.
ExportDocuments {
/// The index name to export the documents from.
#[arg(long)]
index_name: String,
},
/// Attempts to upgrade from one major version to the next without a dump.
///
/// Make sure to run this commmand when Meilisearch is not running!
@ -89,6 +96,7 @@ fn main() -> anyhow::Result<()> {
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
}
Command::ExportDocuments { index_name } => export_documents(db_path, index_name),
Command::OfflineUpgrade { target_version } => {
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade()
@ -605,7 +613,7 @@ fn export_a_dump(
db_path: PathBuf,
dump_dir: PathBuf,
skip_enqueued_tasks: bool,
) -> Result<(), anyhow::Error> {
) -> anyhow::Result<()> {
let started_at = OffsetDateTime::now_utc();
// 1. Extracts the instance UID from disk
@ -750,3 +758,40 @@ fn export_a_dump(
Ok(())
}
fn export_documents(db_path: PathBuf, index_name: String) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let rtxn = env.read_txn()?;
let index_mapping: Database<Str, UuidCodec> =
try_opening_database(&env, &rtxn, "index-mapping")?;
for result in index_mapping.iter(&rtxn)? {
let (uid, uuid) = result?;
if uid == index_name {
let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
format!("While trying to open the index at path {:?}", index_path.display())
})?;
let rtxn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let mut stdout = BufWriter::new(std::io::stdout());
for ret in index.all_documents(&rtxn)? {
let (_id, doc) = ret?;
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
serde_json::to_writer(&mut stdout, &document)?;
}
stdout.flush()?;
} else {
eprintln!("Found index {uid} but it's not the right index...");
}
}
Ok(())
}