Add an option to ignore vectors

This commit is contained in:
Clément Renault 2024-10-09 10:41:58 +02:00
parent 750c988333
commit e63b33c6ba
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F

View File

@ -75,6 +75,10 @@ enum Command {
/// The index name to export the documents from. /// The index name to export the documents from.
#[arg(long)] #[arg(long)]
index_name: String, index_name: String,
/// Do not export vectors with the documents.
#[arg(long)]
ignore_vectors: bool,
}, },
/// Attempts to upgrade from one major version to the next without a dump. /// Attempts to upgrade from one major version to the next without a dump.
@ -102,7 +106,9 @@ fn main() -> anyhow::Result<()> {
Command::ExportADump { dump_dir, skip_enqueued_tasks } => { Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
export_a_dump(db_path, dump_dir, skip_enqueued_tasks) export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
} }
Command::ExportDocuments { index_name } => export_documents(db_path, index_name), Command::ExportDocuments { index_name, ignore_vectors } => {
export_documents(db_path, index_name, ignore_vectors)
}
Command::OfflineUpgrade { target_version } => { Command::OfflineUpgrade { target_version } => {
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade()
@ -765,7 +771,11 @@ fn export_a_dump(
Ok(()) Ok(())
} }
fn export_documents(db_path: PathBuf, index_name: String) -> anyhow::Result<()> { fn export_documents(
db_path: PathBuf,
index_name: String,
ignore_vectors: bool,
) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
@ -792,6 +802,7 @@ fn export_documents(db_path: PathBuf, index_name: String) -> anyhow::Result<()>
let (id, doc) = ret?; let (id, doc) = ret?;
let mut document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; let mut document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
if !ignore_vectors {
'inject_vectors: { 'inject_vectors: {
let embeddings = index.embeddings(&rtxn, id)?; let embeddings = index.embeddings(&rtxn, id)?;
@ -834,7 +845,9 @@ fn export_documents(db_path: PathBuf, index_name: String) -> anyhow::Result<()>
)), )),
regenerate: !user_provided, regenerate: !user_provided,
}; };
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap()); vectors
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
} }
} }