mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Use the new ExternalDocumentsIds struct in the engine
This commit is contained in:
parent
fe82516f9f
commit
27f3ef5f7a
20
src/index.rs
20
src/index.rs
@ -7,11 +7,10 @@ use heed::types::*;
|
|||||||
use heed::{PolyDatabase, Database, RwTxn, RoTxn};
|
use heed::{PolyDatabase, Database, RwTxn, RoTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::external_documents_ids::ExternalDocumentsIds;
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::Search;
|
use crate::Search;
|
||||||
use crate::{BEU32, DocumentId};
|
use crate::{BEU32, DocumentId, ExternalDocumentsIds};
|
||||||
use crate::{
|
use crate::{
|
||||||
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
|
||||||
BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
||||||
@ -143,14 +142,15 @@ impl Index {
|
|||||||
pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<ExternalDocumentsIds<'t>> {
|
pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<ExternalDocumentsIds<'t>> {
|
||||||
let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
||||||
let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
||||||
match hard.zip(soft) {
|
let hard = match hard {
|
||||||
Some((hard, soft)) => {
|
Some(hard) => fst::Map::new(hard)?.map_data(Cow::Borrowed)?,
|
||||||
let hard = fst::Map::new(hard)?.map_data(Cow::Borrowed)?;
|
None => fst::Map::default().map_data(Cow::Owned)?,
|
||||||
let soft = fst::Map::new(soft)?.map_data(Cow::Borrowed)?;
|
};
|
||||||
Ok(ExternalDocumentsIds::new(hard, soft))
|
let soft = match soft {
|
||||||
},
|
Some(soft) => fst::Map::new(soft)?.map_data(Cow::Borrowed)?,
|
||||||
None => Ok(ExternalDocumentsIds::default()),
|
None => fst::Map::default().map_data(Cow::Owned)?,
|
||||||
}
|
};
|
||||||
|
Ok(ExternalDocumentsIds::new(hard, soft))
|
||||||
}
|
}
|
||||||
|
|
||||||
/* fields ids map */
|
/* fields ids map */
|
||||||
|
@ -21,6 +21,7 @@ use fxhash::{FxHasher32, FxHasher64};
|
|||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
pub use self::criterion::{Criterion, default_criteria};
|
pub use self::criterion::{Criterion, default_criteria};
|
||||||
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
pub use self::fields_ids_map::FieldsIdsMap;
|
pub use self::fields_ids_map::FieldsIdsMap;
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{Search, SearchResult};
|
pub use self::search::{Search, SearchResult};
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use crate::Index;
|
use crate::{ExternalDocumentsIds, Index};
|
||||||
|
|
||||||
pub struct ClearDocuments<'t, 'u, 'i> {
|
pub struct ClearDocuments<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -27,7 +27,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
|
|
||||||
// We clean some of the main engine datastructures.
|
// We clean some of the main engine datastructures.
|
||||||
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
||||||
self.index.put_external_documents_ids(self.wtxn, &fst::Map::default())?;
|
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
||||||
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
||||||
|
|
||||||
// Clear the other databases.
|
// Clear the other databases.
|
||||||
|
@ -1,16 +1,13 @@
|
|||||||
use std::borrow::Cow;
|
use fst::IntoStreamer;
|
||||||
use std::convert::TryFrom;
|
|
||||||
|
|
||||||
use fst::{IntoStreamer, Streamer};
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{Index, BEU32, SmallString32};
|
use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
|
||||||
use super::ClearDocuments;
|
use super::ClearDocuments;
|
||||||
|
|
||||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
external_documents_ids: fst::Map<Vec<u8>>,
|
external_documents_ids: ExternalDocumentsIds<'static>,
|
||||||
documents_ids: RoaringBitmap,
|
documents_ids: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,7 +19,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
{
|
{
|
||||||
let external_documents_ids = index
|
let external_documents_ids = index
|
||||||
.external_documents_ids(wtxn)?
|
.external_documents_ids(wtxn)?
|
||||||
.map_data(Cow::into_owned)?;
|
.into_static();
|
||||||
|
|
||||||
Ok(DeleteDocuments {
|
Ok(DeleteDocuments {
|
||||||
wtxn,
|
wtxn,
|
||||||
@ -41,7 +38,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> {
|
pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> {
|
||||||
let docid = self.external_documents_ids.get(external_id).map(|id| u32::try_from(id).unwrap())?;
|
let docid = self.external_documents_ids.get(external_id)?;
|
||||||
self.delete_document(docid);
|
self.delete_document(docid);
|
||||||
Some(docid)
|
Some(docid)
|
||||||
}
|
}
|
||||||
@ -112,26 +109,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// We create the FST map of the external ids that we must delete.
|
// We create the FST map of the external ids that we must delete.
|
||||||
external_ids.sort_unstable();
|
external_ids.sort_unstable();
|
||||||
let external_ids_to_delete = fst::Set::from_iter(external_ids.iter().map(AsRef::as_ref))?;
|
let external_ids_to_delete = fst::Set::from_iter(external_ids.iter().map(AsRef::as_ref))?;
|
||||||
let external_ids_to_delete = fst::Map::from(external_ids_to_delete.into_fst());
|
|
||||||
|
|
||||||
let new_external_documents_ids = {
|
// We acquire the current external documents ids map...
|
||||||
// We acquire the current external documents ids map and create
|
let mut new_external_documents_ids = self.index.external_documents_ids(self.wtxn)?;
|
||||||
// a difference operation between the current and to-delete external ids.
|
// ...and remove the to-delete external ids.
|
||||||
let external_documents_ids = self.index.external_documents_ids(self.wtxn)?;
|
new_external_documents_ids.delete_ids(external_ids_to_delete)?;
|
||||||
let difference = external_documents_ids.op().add(&external_ids_to_delete).difference();
|
|
||||||
|
|
||||||
// We stream the new external ids that does no more contains the to-delete external ids.
|
|
||||||
let mut iter = difference.into_stream();
|
|
||||||
let mut new_external_documents_ids_builder = fst::MapBuilder::memory();
|
|
||||||
while let Some((external_id, docids)) = iter.next() {
|
|
||||||
new_external_documents_ids_builder.insert(external_id, docids[0].value)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We create an FST map from the above builder.
|
|
||||||
new_external_documents_ids_builder.into_map()
|
|
||||||
};
|
|
||||||
|
|
||||||
// We write the new external ids into the main database.
|
// We write the new external ids into the main database.
|
||||||
|
let new_external_documents_ids = new_external_documents_ids.into_static();
|
||||||
self.index.put_external_documents_ids(self.wtxn, &new_external_documents_ids)?;
|
self.index.put_external_documents_ids(self.wtxn, &new_external_documents_ids)?;
|
||||||
|
|
||||||
// Maybe we can improve the get performance of the words
|
// Maybe we can improve the get performance of the words
|
||||||
|
@ -6,13 +6,12 @@ use std::iter::Peekable;
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::{anyhow, Context};
|
||||||
use fst::{IntoStreamer, Streamer};
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use log::info;
|
use log::info;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
use crate::{BEU32, MergeFn, Index, FieldsIdsMap};
|
use crate::{BEU32, MergeFn, Index, FieldsIdsMap, ExternalDocumentsIds};
|
||||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||||
use super::merge_function::merge_two_obkvs;
|
use super::merge_function::merge_two_obkvs;
|
||||||
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
||||||
@ -20,7 +19,7 @@ use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
|||||||
pub struct TransformOutput {
|
pub struct TransformOutput {
|
||||||
pub primary_key: u8,
|
pub primary_key: u8,
|
||||||
pub fields_ids_map: FieldsIdsMap,
|
pub fields_ids_map: FieldsIdsMap,
|
||||||
pub external_documents_ids: fst::Map<Vec<u8>>,
|
pub external_documents_ids: ExternalDocumentsIds<'static>,
|
||||||
pub new_documents_ids: RoaringBitmap,
|
pub new_documents_ids: RoaringBitmap,
|
||||||
pub replaced_documents_ids: RoaringBitmap,
|
pub replaced_documents_ids: RoaringBitmap,
|
||||||
pub documents_count: usize,
|
pub documents_count: usize,
|
||||||
@ -116,7 +115,7 @@ impl Transform<'_, '_> {
|
|||||||
return Ok(TransformOutput {
|
return Ok(TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
external_documents_ids: fst::Map::default(),
|
external_documents_ids: ExternalDocumentsIds::default(),
|
||||||
new_documents_ids: RoaringBitmap::new(),
|
new_documents_ids: RoaringBitmap::new(),
|
||||||
replaced_documents_ids: RoaringBitmap::new(),
|
replaced_documents_ids: RoaringBitmap::new(),
|
||||||
documents_count: 0,
|
documents_count: 0,
|
||||||
@ -370,7 +369,7 @@ impl Transform<'_, '_> {
|
|||||||
primary_key: u8,
|
primary_key: u8,
|
||||||
fields_ids_map: FieldsIdsMap,
|
fields_ids_map: FieldsIdsMap,
|
||||||
approximate_number_of_documents: usize,
|
approximate_number_of_documents: usize,
|
||||||
external_documents_ids: fst::Map<Cow<'_, [u8]>>,
|
mut external_documents_ids: ExternalDocumentsIds<'_>,
|
||||||
progress_callback: F,
|
progress_callback: F,
|
||||||
) -> anyhow::Result<TransformOutput>
|
) -> anyhow::Result<TransformOutput>
|
||||||
where
|
where
|
||||||
@ -457,28 +456,17 @@ impl Transform<'_, '_> {
|
|||||||
let mut documents_file = writer.into_inner()?;
|
let mut documents_file = writer.into_inner()?;
|
||||||
documents_file.seek(SeekFrom::Start(0))?;
|
documents_file.seek(SeekFrom::Start(0))?;
|
||||||
|
|
||||||
// We create the union between the existing external documents ids with the new ones.
|
|
||||||
let new_external_documents_ids = new_external_documents_ids_builder.into_map();
|
|
||||||
let union_op = fst::map::OpBuilder::new()
|
|
||||||
.add(&external_documents_ids)
|
|
||||||
.add(&new_external_documents_ids)
|
|
||||||
.r#union();
|
|
||||||
|
|
||||||
// We stream and merge the new external documents ids map with the existing one.
|
|
||||||
let before_docids_merging = Instant::now();
|
let before_docids_merging = Instant::now();
|
||||||
let mut external_documents_ids_builder = fst::MapBuilder::memory();
|
// We merge the new external ids with existing external documents ids.
|
||||||
let mut iter = union_op.into_stream();
|
let new_external_documents_ids = new_external_documents_ids_builder.into_map();
|
||||||
while let Some((external_id, vals)) = iter.next() {
|
external_documents_ids.insert_ids(&new_external_documents_ids)?;
|
||||||
assert_eq!(vals.len(), 1, "there must be exactly one document id");
|
|
||||||
external_documents_ids_builder.insert(external_id, vals[0].value)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("Documents external merging took {:.02?}", before_docids_merging.elapsed());
|
info!("Documents external merging took {:.02?}", before_docids_merging.elapsed());
|
||||||
|
|
||||||
Ok(TransformOutput {
|
Ok(TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
external_documents_ids: external_documents_ids_builder.into_map(),
|
external_documents_ids: external_documents_ids.into_static(),
|
||||||
new_documents_ids,
|
new_documents_ids,
|
||||||
replaced_documents_ids,
|
replaced_documents_ids,
|
||||||
documents_count,
|
documents_count,
|
||||||
@ -531,7 +519,7 @@ impl Transform<'_, '_> {
|
|||||||
Ok(TransformOutput {
|
Ok(TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
external_documents_ids: external_documents_ids.map_data(Cow::into_owned)?,
|
external_documents_ids: external_documents_ids.into_static(),
|
||||||
new_documents_ids: documents_ids,
|
new_documents_ids: documents_ids,
|
||||||
replaced_documents_ids: RoaringBitmap::default(),
|
replaced_documents_ids: RoaringBitmap::default(),
|
||||||
documents_count,
|
documents_count,
|
||||||
|
Loading…
Reference in New Issue
Block a user