mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
merge the build and quantize method
This commit is contained in:
parent
b8a74e0464
commit
645a55317a
@ -713,10 +713,7 @@ where
|
|||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
||||||
if is_quantizing {
|
writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing)?;
|
||||||
writer.quantize(wtxn, dimension)?;
|
|
||||||
}
|
|
||||||
writer.build(wtxn, &mut rng, dimension)?;
|
|
||||||
Result::Ok(())
|
Result::Ok(())
|
||||||
})
|
})
|
||||||
.map_err(InternalError::from)??;
|
.map_err(InternalError::from)??;
|
||||||
|
@ -98,18 +98,37 @@ impl ArroyWrapper {
|
|||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO: We should early exit when it doesn't need to be built
|
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
||||||
pub fn build<R: rand::Rng + rand::SeedableRng>(
|
&mut self,
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
rng: &mut R,
|
rng: &mut R,
|
||||||
dimension: usize,
|
dimension: usize,
|
||||||
|
quantizing: bool,
|
||||||
) -> Result<(), arroy::Error> {
|
) -> Result<(), arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)?
|
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
||||||
|
if writer.need_build(wtxn)? {
|
||||||
|
writer.build(wtxn, rng, None)?
|
||||||
|
} else if writer.is_empty(wtxn)? {
|
||||||
|
break;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
arroy::Writer::new(self.angular_db(), index, dimension).build(wtxn, rng, None)?
|
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||||
|
// If we are quantizing the databases, we can't know from meilisearch
|
||||||
|
// if the db was empty but still contained the wrong metadata, thus we need
|
||||||
|
// to quantize everything and can't stop early. Since this operation can
|
||||||
|
// only happens once in the life of an embedder, it's not very performances
|
||||||
|
// sensitive.
|
||||||
|
if quantizing && !self.quantized {
|
||||||
|
let writer =
|
||||||
|
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
|
||||||
|
writer.build(wtxn, rng, None)?
|
||||||
|
} else if writer.need_build(wtxn)? {
|
||||||
|
writer.build(wtxn, rng, None)?
|
||||||
|
} else if writer.is_empty(wtxn)? {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -266,20 +285,6 @@ impl ArroyWrapper {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
|
||||||
let empty = if self.quantized {
|
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)?
|
|
||||||
} else {
|
|
||||||
arroy::Writer::new(self.angular_db(), index, dimension).is_empty(rtxn)?
|
|
||||||
};
|
|
||||||
if !empty {
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn contains_item(
|
pub fn contains_item(
|
||||||
&self,
|
&self,
|
||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
|
Loading…
Reference in New Issue
Block a user