with binary quantization

This commit is contained in:
Tamo 2024-07-08 20:12:56 +02:00
parent 05cc2d1fac
commit 42bfc67871
4 changed files with 28 additions and 6 deletions

22
Cargo.lock generated
View File

@ -398,6 +398,24 @@ dependencies = [
"thiserror",
]
[[package]]
name = "arroy"
version = "0.4.0"
source = "git+https://github.com/meilisearch/arroy?branch=binary-quantization#facc8575222d3f5da5b9a94288e44896911e701f"
dependencies = [
"bytemuck",
"byteorder",
"heed",
"log",
"memmap2 0.9.4",
"ordered-float",
"rand",
"rayon",
"roaring",
"tempfile",
"thiserror",
]
[[package]]
name = "assert-json-diff"
version = "2.0.2"
@ -2455,7 +2473,7 @@ name = "index-scheduler"
version = "1.9.0"
dependencies = [
"anyhow",
"arroy",
"arroy 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"big_s",
"bincode",
"crossbeam",
@ -3448,7 +3466,7 @@ dependencies = [
name = "milli"
version = "1.9.0"
dependencies = [
"arroy",
"arroy 0.4.0 (git+https://github.com/meilisearch/arroy?branch=binary-quantization)",
"big_s",
"bimap",
"bincode",

View File

@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
] }
tiktoken-rs = "0.5.8"
liquid = "0.26.4"
arroy = "0.4.0"
arroy = { git = "https://github.com/meilisearch/arroy", branch = "binary-quantization" }
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.9.7", features = ["json"] }

View File

@ -169,7 +169,7 @@ pub struct Index {
/// Maps an embedder name to its id in the arroy store.
pub embedder_category_id: Database<Str, U8>,
/// Vector store based on arroy™.
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
pub vector_arroy: arroy::Database<arroy::distances::BinaryQuantizedEuclidean>,
/// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<BEU32, ObkvCodec>,
@ -1605,7 +1605,8 @@ impl Index {
&'a self,
rtxn: &'a RoTxn<'a>,
embedder_id: u8,
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::Angular>>> + 'a {
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::BinaryQuantizedEuclidean>>> + 'a
{
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
arroy::Reader::open(rtxn, k, self.vector_arroy)
.map(Some)

View File

@ -990,7 +990,10 @@ impl<'a, 'i> Transform<'a, 'i> {
};
let readers: Result<
BTreeMap<&str, (Vec<arroy::Reader<arroy::distances::Angular>>, &RoaringBitmap)>,
BTreeMap<
&str,
(Vec<arroy::Reader<arroy::distances::BinaryQuantizedEuclidean>>, &RoaringBitmap),
>,
> = settings_diff
.embedding_config_updates
.iter()