diff --git a/Cargo.lock b/Cargo.lock index 156e3d146..3cd63cddd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -398,6 +398,24 @@ dependencies = [ "thiserror", ] +[[package]] +name = "arroy" +version = "0.4.0" +source = "git+https://github.com/meilisearch/arroy?branch=binary-quantization#facc8575222d3f5da5b9a94288e44896911e701f" +dependencies = [ + "bytemuck", + "byteorder", + "heed", + "log", + "memmap2 0.9.4", + "ordered-float", + "rand", + "rayon", + "roaring", + "tempfile", + "thiserror", +] + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -2455,7 +2473,7 @@ name = "index-scheduler" version = "1.9.0" dependencies = [ "anyhow", - "arroy", + "arroy 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "big_s", "bincode", "crossbeam", @@ -3448,7 +3466,7 @@ dependencies = [ name = "milli" version = "1.9.0" dependencies = [ - "arroy", + "arroy 0.4.0 (git+https://github.com/meilisearch/arroy?branch=binary-quantization)", "big_s", "bimap", "bincode", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index fd7bde99b..67bdaaba2 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", ] } tiktoken-rs = "0.5.8" liquid = "0.26.4" -arroy = "0.4.0" +arroy = { git = "https://github.com/meilisearch/arroy", branch = "binary-quantization" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.9.7", features = ["json"] } diff --git a/milli/src/index.rs b/milli/src/index.rs index 0a7a20ce0..6c66d5400 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -169,7 +169,7 @@ pub struct Index { /// Maps an embedder name to its id in the arroy store. pub embedder_category_id: Database, /// Vector store based on arroyâ„¢. - pub vector_arroy: arroy::Database, + pub vector_arroy: arroy::Database, /// Maps the document id to the document as an obkv store. pub(crate) documents: Database, @@ -1605,7 +1605,8 @@ impl Index { &'a self, rtxn: &'a RoTxn<'a>, embedder_id: u8, - ) -> impl Iterator>> + 'a { + ) -> impl Iterator>> + 'a + { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { arroy::Reader::open(rtxn, k, self.vector_arroy) .map(Some) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 1dff29a90..935d24fa4 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -990,7 +990,10 @@ impl<'a, 'i> Transform<'a, 'i> { }; let readers: Result< - BTreeMap<&str, (Vec>, &RoaringBitmap)>, + BTreeMap< + &str, + (Vec>, &RoaringBitmap), + >, > = settings_diff .embedding_config_updates .iter()