rename the embedder index for clarity

This commit is contained in:
Tamo 2024-09-24 10:36:28 +02:00
parent 1e4d4e69c4
commit 79d8a7a51a

View File

@ -32,17 +32,21 @@ pub const REQUEST_PARALLELISM: usize = 40;
pub struct ArroyWrapper { pub struct ArroyWrapper {
quantized: bool, quantized: bool,
index: u8, embedder_index: u8,
database: arroy::Database<Unspecified>, database: arroy::Database<Unspecified>,
} }
impl ArroyWrapper { impl ArroyWrapper {
pub fn new(database: arroy::Database<Unspecified>, index: u8, quantized: bool) -> Self { pub fn new(
Self { database, index, quantized } database: arroy::Database<Unspecified>,
embedder_index: u8,
quantized: bool,
) -> Self {
Self { database, embedder_index, quantized }
} }
pub fn index(&self) -> u8 { pub fn index(&self) -> u8 {
self.index self.embedder_index
} }
fn readers<'a, D: arroy::Distance>( fn readers<'a, D: arroy::Distance>(
@ -50,7 +54,7 @@ impl ArroyWrapper {
rtxn: &'a RoTxn<'a>, rtxn: &'a RoTxn<'a>,
db: arroy::Database<D>, db: arroy::Database<D>,
) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a { ) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a {
arroy_db_range_for_embedder(self.index).map_while(move |index| { arroy_db_range_for_embedder(self.embedder_index).map_while(move |index| {
match arroy::Reader::open(rtxn, index, db) { match arroy::Reader::open(rtxn, index, db) {
Ok(reader) => Some(Ok(reader)), Ok(reader) => Some(Ok(reader)),
Err(arroy::Error::MissingMetadata(_)) => None, Err(arroy::Error::MissingMetadata(_)) => None,
@ -60,7 +64,7 @@ impl ArroyWrapper {
} }
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> { pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
let first_id = arroy_db_range_for_embedder(self.index).next().unwrap(); let first_id = arroy_db_range_for_embedder(self.embedder_index).next().unwrap();
if self.quantized { if self.quantized {
Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions()) Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions())
} else { } else {
@ -70,7 +74,7 @@ impl ArroyWrapper {
pub fn quantize(&mut self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { pub fn quantize(&mut self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
if !self.quantized { if !self.quantized {
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
let writer = arroy::Writer::new(self.angular_db(), index, dimension); let writer = arroy::Writer::new(self.angular_db(), index, dimension);
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?; writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
} }
@ -81,7 +85,7 @@ impl ArroyWrapper {
// TODO: We can stop early when we find an empty DB // TODO: We can stop early when we find an empty DB
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> { pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
let need_build = if self.quantized { let need_build = if self.quantized {
arroy::Writer::new(self.quantized_db(), index, dimension).need_build(rtxn) arroy::Writer::new(self.quantized_db(), index, dimension).need_build(rtxn)
} else { } else {
@ -101,7 +105,7 @@ impl ArroyWrapper {
rng: &mut R, rng: &mut R,
dimension: usize, dimension: usize,
) -> Result<(), arroy::Error> { ) -> Result<(), arroy::Error> {
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
if self.quantized { if self.quantized {
arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)? arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)?
} else { } else {
@ -119,7 +123,9 @@ impl ArroyWrapper {
embeddings: &Embeddings<f32>, embeddings: &Embeddings<f32>,
) -> Result<(), arroy::Error> { ) -> Result<(), arroy::Error> {
let dimension = embeddings.dimension(); let dimension = embeddings.dimension();
for (index, vector) in arroy_db_range_for_embedder(self.index).zip(embeddings.iter()) { for (index, vector) in
arroy_db_range_for_embedder(self.embedder_index).zip(embeddings.iter())
{
if self.quantized { if self.quantized {
arroy::Writer::new(self.quantized_db(), index, dimension) arroy::Writer::new(self.quantized_db(), index, dimension)
.add_item(wtxn, item_id, vector)? .add_item(wtxn, item_id, vector)?
@ -154,7 +160,7 @@ impl ArroyWrapper {
) -> Result<(), arroy::Error> { ) -> Result<(), arroy::Error> {
let dimension = vector.len(); let dimension = vector.len();
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
let writer = arroy::Writer::new(db, index, dimension); let writer = arroy::Writer::new(db, index, dimension);
if !writer.contains_item(wtxn, item_id)? { if !writer.contains_item(wtxn, item_id)? {
writer.add_item(wtxn, item_id, vector)?; writer.add_item(wtxn, item_id, vector)?;
@ -172,7 +178,7 @@ impl ArroyWrapper {
dimension: usize, dimension: usize,
item_id: arroy::ItemId, item_id: arroy::ItemId,
) -> Result<bool, arroy::Error> { ) -> Result<bool, arroy::Error> {
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
if self.quantized { if self.quantized {
let writer = arroy::Writer::new(self.quantized_db(), index, dimension); let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
if writer.del_item(wtxn, item_id)? { if writer.del_item(wtxn, item_id)? {
@ -213,7 +219,7 @@ impl ArroyWrapper {
let dimension = vector.len(); let dimension = vector.len();
let mut deleted_index = None; let mut deleted_index = None;
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
let writer = arroy::Writer::new(db, index, dimension); let writer = arroy::Writer::new(db, index, dimension);
let Some(candidate) = writer.item_vector(wtxn, item_id)? else { let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
// uses invariant: vectors are packed in the first writers. // uses invariant: vectors are packed in the first writers.
@ -228,7 +234,9 @@ impl ArroyWrapper {
// 🥲 enforce invariant: vectors are packed in the first writers. // 🥲 enforce invariant: vectors are packed in the first writers.
if let Some(deleted_index) = deleted_index { if let Some(deleted_index) = deleted_index {
let mut last_index_with_a_vector = None; let mut last_index_with_a_vector = None;
for index in arroy_db_range_for_embedder(self.index).skip(deleted_index as usize) { for index in
arroy_db_range_for_embedder(self.embedder_index).skip(deleted_index as usize)
{
let writer = arroy::Writer::new(db, index, dimension); let writer = arroy::Writer::new(db, index, dimension);
let Some(candidate) = writer.item_vector(wtxn, item_id)? else { let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
break; break;
@ -247,7 +255,7 @@ impl ArroyWrapper {
} }
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
if self.quantized { if self.quantized {
arroy::Writer::new(self.quantized_db(), index, dimension).clear(wtxn)?; arroy::Writer::new(self.quantized_db(), index, dimension).clear(wtxn)?;
} else { } else {
@ -258,7 +266,7 @@ impl ArroyWrapper {
} }
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> { pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
let empty = if self.quantized { let empty = if self.quantized {
arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)? arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)?
} else { } else {
@ -277,7 +285,7 @@ impl ArroyWrapper {
dimension: usize, dimension: usize,
item: arroy::ItemId, item: arroy::ItemId,
) -> Result<bool, arroy::Error> { ) -> Result<bool, arroy::Error> {
for index in arroy_db_range_for_embedder(self.index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
let contains = if self.quantized { let contains = if self.quantized {
arroy::Writer::new(self.quantized_db(), index, dimension) arroy::Writer::new(self.quantized_db(), index, dimension)
.contains_item(rtxn, item)? .contains_item(rtxn, item)?