implement the upgrade from v1.10 to v1.11 in meilitool

This commit is contained in:
Tamo 2024-10-29 02:46:14 +01:00
parent 362836efb7
commit ddd03e9b37
6 changed files with 150 additions and 25 deletions

28
Cargo.lock generated
View File

@ -404,6 +404,25 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "arroy"
version = "0.5.0"
source = "git+https://github.com/meilisearch/arroy/?rev=3908c9e#3908c9edfba77ba18cc50bda41c88166ba5ebd37"
dependencies = [
"bytemuck",
"byteorder",
"heed",
"log",
"memmap2",
"nohash",
"ordered-float",
"rand",
"rayon",
"roaring",
"tempfile",
"thiserror",
]
[[package]] [[package]]
name = "assert-json-diff" name = "assert-json-diff"
version = "2.0.2" version = "2.0.2"
@ -707,9 +726,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.16.1" version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d"
dependencies = [ dependencies = [
"bytemuck_derive", "bytemuck_derive",
] ]
@ -2556,7 +2575,7 @@ name = "index-scheduler"
version = "1.11.0" version = "1.11.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy", "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"big_s", "big_s",
"bincode", "bincode",
"crossbeam", "crossbeam",
@ -3517,6 +3536,7 @@ name = "meilitool"
version = "1.11.0" version = "1.11.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=3908c9e)",
"clap", "clap",
"dump", "dump",
"file-store", "file-store",
@ -3547,7 +3567,7 @@ dependencies = [
name = "milli" name = "milli"
version = "1.11.0" version = "1.11.0"
dependencies = [ dependencies = [
"arroy", "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"big_s", "big_s",
"bimap", "bimap",
"bincode", "bincode",

View File

@ -18,3 +18,5 @@ meilisearch-types = { path = "../meilisearch-types" }
serde = { version = "1.0.209", features = ["derive"] } serde = { version = "1.0.209", features = ["derive"] }
time = { version = "0.3.36", features = ["formatting"] } time = { version = "0.3.36", features = ["formatting"] }
uuid = { version = "1.10.0", features = ["v4"], default-features = false } uuid = { version = "1.10.0", features = ["v4"], default-features = false }
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "3908c9e" }

View File

@ -15,7 +15,7 @@ license.workspace = true
bimap = { version = "0.6.3", features = ["serde"] } bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3" bincode = "1.3.3"
bstr = "1.9.1" bstr = "1.9.1"
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.9.1", default-features = false } charabia = { version = "0.9.1", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"

View File

@ -1,13 +1,16 @@
mod v1_10; mod v1_10;
mod v1_11;
mod v1_9; mod v1_9;
use std::path::PathBuf; use std::path::{Path, PathBuf};
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use meilisearch_types::versioning::create_version_file; use meilisearch_types::versioning::create_version_file;
use v1_10::v1_9_to_v1_10; use v1_10::v1_9_to_v1_10;
use crate::upgrade::v1_11::v1_10_to_v1_11;
pub struct OfflineUpgrade { pub struct OfflineUpgrade {
pub db_path: PathBuf, pub db_path: PathBuf,
pub current_version: (String, String, String), pub current_version: (String, String, String),
@ -16,29 +19,50 @@ pub struct OfflineUpgrade {
impl OfflineUpgrade { impl OfflineUpgrade {
pub fn upgrade(self) -> anyhow::Result<()> { pub fn upgrade(self) -> anyhow::Result<()> {
let upgrade_list = [
(v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
(v1_10_to_v1_11, "1", "11", "0"),
];
let (current_major, current_minor, current_patch) = &self.current_version; let (current_major, current_minor, current_patch) = &self.current_version;
let start_at = match (
current_major.as_str(),
current_minor.as_str(),
current_patch.as_str(),
) {
("1", "9", _) => 0,
("1", "10", _) => 1,
_ => {
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
}
};
let (target_major, target_minor, target_patch) = &self.target_version; let (target_major, target_minor, target_patch) = &self.target_version;
println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
("v1", "10", _) => 0,
("v1", "11", _) => 1,
_ => {
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.11")
}
};
match ( println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
(current_major.as_str(), current_minor.as_str(), current_patch.as_str()),
(target_major.as_str(), target_minor.as_str(), target_patch.as_str()), #[allow(clippy::needless_range_loop)]
) { for index in start_at..=ends_at {
(("1", "9", _), ("1", "10", _)) => v1_9_to_v1_10(&self.db_path)?, let (func, major, minor, patch) = upgrade_list[index];
((major, minor, _), _) if major != "1" && minor != "9" => (func)(&self.db_path)?;
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9"), println!("Done");
(_, (major, minor, _)) if major != "1" && minor != "10" => // We're writing the version file just in case an issue arise _while_ upgrading.
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10"), // We don't want the DB to fail in an unknown state.
_ => println!("Writing VERSION file");
bail!("Unsupported upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}. Can only upgrade from v1.9 to v1.10"),
create_version_file(&self.db_path, major, minor, patch)
.context("while writing VERSION file after the upgrade")?;
} }
println!("Writing VERSION file");
create_version_file(&self.db_path, target_major, target_minor, target_patch)
.context("while writing VERSION file after the upgrade")?;
println!("Success"); println!("Success");
Ok(()) Ok(())

View File

@ -79,7 +79,8 @@ fn update_index_stats(
let stats: Option<v1_9::IndexStats> = index_stats let stats: Option<v1_9::IndexStats> = index_stats
.remap_data_type::<SerdeJson<v1_9::IndexStats>>() .remap_data_type::<SerdeJson<v1_9::IndexStats>>()
.get(sched_wtxn, &index_uuid) .get(sched_wtxn, &index_uuid)
.with_context(ctx)?; .with_context(ctx)
.with_context(|| "While reading value")?;
if let Some(stats) = stats { if let Some(stats) = stats {
let stats: self::IndexStats = stats.into(); let stats: self::IndexStats = stats.into();
@ -87,7 +88,8 @@ fn update_index_stats(
index_stats index_stats
.remap_data_type::<SerdeJson<self::IndexStats>>() .remap_data_type::<SerdeJson<self::IndexStats>>()
.put(sched_wtxn, &index_uuid, &stats) .put(sched_wtxn, &index_uuid, &stats)
.with_context(ctx)?; .with_context(ctx)
.with_context(|| "While writing value")?;
} }
Ok(()) Ok(())
@ -155,6 +157,7 @@ fn date_round_trip(
} }
pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> { pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> {
println!("Upgrading from v1.9.0 to v1.10.0");
// 2 changes here // 2 changes here
// 1. date format. needs to be done before opening the Index // 1. date format. needs to be done before opening the Index

View File

@ -0,0 +1,76 @@
//! The breaking changes that happened between the v1.10 and the v1.11 are:
//! - Arroy went from the v0.4.0 to the v0.5.0, see this release note to get the whole context: https://github.com/meilisearch/arroy/releases/tag/v0.5.0
//! - The `angular` distance has been renamed to `cosine` => We only need to update the string in the metadata.
//! - Reorganize the `NodeId` to make the appending of vectors work => We'll have to update the keys of almost all items in the DB.
//! - Store the list of updated IDs directly in LMDBinstead of a roaring bitmap => This shouldn't be an issue since we are never supposed to commit this roaring bitmap, but it's not forbidden by arroy so ensuring it works is probably better than anything.
use std::path::Path;
use anyhow::Context;
use meilisearch_types::{
heed::{types::Str, Database, EnvOpenOptions},
milli::index::db_name,
};
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
println!("Upgrading from v1.10.0 to v1.11.0");
let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let sched_rtxn = env.read_txn()?;
let index_mapping: Database<Str, UuidCodec> =
try_opening_database(&env, &sched_rtxn, "index-mapping")?;
let index_count =
index_mapping.len(&sched_rtxn).context("while reading the number of indexes")?;
let indexes: Vec<_> = index_mapping
.iter(&sched_rtxn)?
.map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
.collect();
// check that update can take place
for (index_index, result) in indexes.into_iter().enumerate() {
let (uid, uuid) = result?;
let index_path = db_path.join("indexes").join(uuid.to_string());
println!(
"[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`",
index_index + 1,
index_path.display()
);
let index_env = unsafe {
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
};
let index_rtxn = index_env.read_txn().with_context(|| {
format!(
"while obtaining a read transaction for index {uid} at {}",
index_path.display()
)
})?;
let mut index_wtxn = index_env.write_txn().with_context(|| {
format!(
"while obtaining a write transaction for index {uid} at {}",
index_path.display()
)
})?;
let database = try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
.with_context(|| format!("while updating date format for index `{uid}`"))?;
arroy_v04_to_v05::ugrade_from_prev_version(&index_rtxn, &mut index_wtxn, database)?;
index_wtxn.commit()?;
}
Ok(())
}