mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:11:15 +08:00
Merge #723
723: Fix bug in handling of soft deleted documents when updating settings r=Kerollmops a=loiclec # Pull Request ## Related issue Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3021 ## What does this PR do? This PR fixes the bug where a `missing key in documents database` internal error message could appear when indexing documents. When updating the settings, before clearing the database and before creating the transform output, we now modify the `ExternalDocumentsIds` structure to get rid of all references to soft deleted document ids in its FSTs. It used to be that updating the settings would clear the soft-deleted document ids, but keep the original `ExternalDocumentsIds` structure. As a consequence of this, when processing a future document addition, we could wrongly believe that a document was being replaced when, in fact, it was a completely new document. See the tests `bug_3021_first`, `bug_3021_second`, and `bug_3021` for a minimal test case that would have reproduced the issue. We need to take special care to: - evaluate how users should update to v0.30.1 (containing this fix): dump? reimporting all documents from scratch? - understand IF/HOW this bug could have caused duplicate documents to be returned - and evaluate the correctness of the fix, of course :) Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
commit
0a301b5f88
@ -71,6 +71,30 @@ impl<'a> ExternalDocumentsIds<'a> {
|
||||
self.merge_soft_into_hard()
|
||||
}
|
||||
|
||||
/// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
|
||||
/// don't contain any soft deleted document id.
|
||||
pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
|
||||
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, docids)) = iter.next() {
|
||||
// prefer selecting the ids from soft, always
|
||||
let id = indexed_last_value(docids).unwrap();
|
||||
if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
|
||||
new_hard_builder.insert(external_id, id)?;
|
||||
}
|
||||
}
|
||||
drop(iter);
|
||||
|
||||
// Delete soft map completely
|
||||
self.soft = fst::Map::default().map_data(Cow::Owned)?;
|
||||
// We save the new map as the new hard map.
|
||||
self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
|
||||
let union_op = self.soft.op().add(other).r#union();
|
||||
|
||||
|
@ -1185,13 +1185,15 @@ pub(crate) mod tests {
|
||||
|
||||
use big_s::S;
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use maplit::hashset;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::documents::DocumentsBatchReader;
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::update::{
|
||||
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
|
||||
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
IndexerConfig, Settings,
|
||||
};
|
||||
use crate::{db_snap, obkv_to_json, Index};
|
||||
|
||||
@ -1485,7 +1487,7 @@ pub(crate) mod tests {
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
let mut index = TempIndex::new();
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
@ -1544,7 +1546,6 @@ pub(crate) mod tests {
|
||||
1 0 3 1 [3, 6, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index
|
||||
.add_documents(documents!([{ "id": 3, "doggo": 4 }, { "id": 3, "doggo": 5 },{ "id": 3, "doggo": 4 }]))
|
||||
.unwrap();
|
||||
@ -1568,7 +1569,6 @@ pub(crate) mod tests {
|
||||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_distinct_field("id".to_owned());
|
||||
@ -1578,12 +1578,11 @@ pub(crate) mod tests {
|
||||
db_snap!(index, documents_ids, @"[4, 5, 6, 7, ]");
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
3 7
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
3 7
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
||||
@ -1596,163 +1595,6 @@ pub(crate) mod tests {
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = true;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
3 7
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
let docid = delete.delete_external_id("3").unwrap();
|
||||
insta::assert_snapshot!(format!("{docid}"), @"7");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[4, 5, 6, ]");
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
3 7
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
|
||||
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
3 0
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [0, 7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 5 }])).unwrap();
|
||||
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
3 1
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [0, 1, 7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 7, ]
|
||||
1 0 5 1 [1, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 5, "id": 2, "doggo": 4 }])).unwrap();
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 2
|
||||
3 1
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [2, 6, ]
|
||||
0 0 3 1 [0, 1, 7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 2, 7, ]
|
||||
1 0 5 1 [1, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index
|
||||
.add_documents(documents!([{ "id": 4, "doggo": 5 }, { "id": 3, "doggo": 5 }]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
4 3
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 2
|
||||
3 1
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [2, 6, ]
|
||||
0 0 3 1 [0, 1, 7, ]
|
||||
0 0 4 1 [3, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 2, 7, ]
|
||||
1 0 5 1 [1, 3, ]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -2020,4 +1862,253 @@ pub(crate) mod tests {
|
||||
drop(rtxn);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bug_3021_first() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("primary_key".to_owned());
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "primary_key": 38 },
|
||||
{ "primary_key": 34 }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
db_snap!(index, external_documents_ids, 1, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
delete.delete_external_id("34");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 2, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_searchable_fields(vec![]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// The key point of the test is to verify that the external documents ids
|
||||
// do not contain any entry for previously soft-deleted document ids
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
hard:
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
|
||||
// So that this document addition works correctly now.
|
||||
// It would be wrongly interpreted as a replacement before
|
||||
index.add_documents(documents!({ "primary_key": 34 })).unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
|
||||
|
||||
// We do the test again, but deleting the document with id 0 instead of id 1 now
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
delete.delete_external_id("38");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[1, ]");
|
||||
db_snap!(index, external_documents_ids, 5, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 5, @"[0, ]");
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_searchable_fields(vec!["primary_key".to_owned()]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[1, ]");
|
||||
db_snap!(index, external_documents_ids, 6, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||
|
||||
// And adding lots of documents afterwards instead of just one.
|
||||
// These extra subtests don't add much, but it's better than nothing.
|
||||
index.add_documents(documents!([{ "primary_key": 38 }, { "primary_key": 39 }, { "primary_key": 41 }, { "primary_key": 40 }, { "primary_key": 41 }, { "primary_key": 42 }])).unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
||||
db_snap!(index, external_documents_ids, 7, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
39 2
|
||||
40 4
|
||||
41 3
|
||||
42 5
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 7, @"[]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bug_3021_second() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("primary_key".to_owned());
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "primary_key": 30 },
|
||||
{ "primary_key": 34 }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
db_snap!(index, external_documents_ids, 1, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
delete.delete_external_id("34");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 2, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_searchable_fields(vec![]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// The key point of the test is to verify that the external documents ids
|
||||
// do not contain any entry for previously soft-deleted document ids
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
|
||||
// So that when we add a new document
|
||||
index.add_documents(documents!({ "primary_key": 35, "b": 2 })).unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
// The external documents ids don't have several external ids pointing to the same
|
||||
// internal document id
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
35 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
|
||||
|
||||
// And when we add 34 again, we don't replace document 35
|
||||
index.add_documents(documents!({ "primary_key": 34, "a": 1 })).unwrap();
|
||||
|
||||
// And document 35 still exists, is not deleted
|
||||
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
||||
db_snap!(index, external_documents_ids, 5, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 2
|
||||
35 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 5, @"[]");
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0];
|
||||
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||
insta::assert_debug_snapshot!(json, @r###"
|
||||
{
|
||||
"primary_key": Number(30),
|
||||
}
|
||||
"###);
|
||||
|
||||
// Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
|
||||
let (_docid, obkv) = index.documents(&rtxn, [2]).unwrap()[0];
|
||||
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||
insta::assert_debug_snapshot!(json, @r###"
|
||||
{
|
||||
"primary_key": Number(34),
|
||||
"a": Number(1),
|
||||
}
|
||||
"###);
|
||||
|
||||
drop(rtxn);
|
||||
|
||||
// Add new documents again
|
||||
index
|
||||
.add_documents(
|
||||
documents!([{ "primary_key": 37 }, { "primary_key": 38 }, { "primary_key": 39 }]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
||||
db_snap!(index, external_documents_ids, 6, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 2
|
||||
35 1
|
||||
37 3
|
||||
38 4
|
||||
39 5
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ use super::{IndexDocumentsMethod, IndexerConfig};
|
||||
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
use crate::index::db_name;
|
||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
|
||||
use crate::{
|
||||
ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index,
|
||||
Result, BEU32,
|
||||
@ -546,12 +546,13 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a `TransformOutput` with a file that contains the documents of the index
|
||||
/// with the attributes reordered accordingly to the `FieldsIdsMap` given as argument.
|
||||
/// Clear all databases. Returns a `TransformOutput` with a file that contains the documents
|
||||
/// of the index with the attributes reordered accordingly to the `FieldsIdsMap` given as argument.
|
||||
///
|
||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||
pub fn remap_index_documents(
|
||||
pub fn prepare_for_documents_reindexing(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
wtxn: &mut heed::RwTxn<'i, '_>,
|
||||
old_fields_ids_map: FieldsIdsMap,
|
||||
mut new_fields_ids_map: FieldsIdsMap,
|
||||
) -> Result<TransformOutput> {
|
||||
@ -559,7 +560,14 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let primary_key =
|
||||
self.index.primary_key(wtxn)?.ok_or(UserError::MissingPrimaryKey)?.to_string();
|
||||
let field_distribution = self.index.field_distribution(wtxn)?;
|
||||
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
||||
|
||||
// Delete the soft deleted document ids from the maps inside the external_document_ids structure
|
||||
let new_external_documents_ids = {
|
||||
let mut external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
||||
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
|
||||
external_documents_ids
|
||||
};
|
||||
|
||||
let documents_ids = self.index.documents_ids(wtxn)?;
|
||||
let documents_count = documents_ids.len() as usize;
|
||||
|
||||
@ -638,17 +646,25 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let mut flattened_documents = flattened_writer.into_inner()?;
|
||||
flattened_documents.seek(SeekFrom::Start(0))?;
|
||||
|
||||
Ok(TransformOutput {
|
||||
let output = TransformOutput {
|
||||
primary_key,
|
||||
fields_ids_map: new_fields_ids_map,
|
||||
field_distribution,
|
||||
external_documents_ids: external_documents_ids.into_static(),
|
||||
external_documents_ids: new_external_documents_ids.into_static(),
|
||||
new_documents_ids: documents_ids,
|
||||
replaced_documents_ids: RoaringBitmap::default(),
|
||||
documents_count,
|
||||
original_documents,
|
||||
flattened_documents,
|
||||
})
|
||||
};
|
||||
|
||||
let new_facets = output.compute_real_facets(wtxn, self.index)?;
|
||||
self.index.put_faceted_fields(wtxn, &new_facets)?;
|
||||
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(wtxn, self.index).execute()?;
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@ use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::update::index_documents::IndexDocumentsMethod;
|
||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||
use crate::{FieldsIdsMap, Index, Result};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
@ -291,15 +291,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
false,
|
||||
)?;
|
||||
|
||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
||||
let output =
|
||||
transform.remap_index_documents(self.wtxn, old_fields_ids_map, fields_ids_map)?;
|
||||
|
||||
let new_facets = output.compute_real_facets(self.wtxn, self.index)?;
|
||||
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
|
||||
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||
// We clear the databases and remap the documents fields based on the new `FieldsIdsMap`.
|
||||
let output = transform.prepare_for_documents_reindexing(
|
||||
self.wtxn,
|
||||
old_fields_ids_map,
|
||||
fields_ids_map,
|
||||
)?;
|
||||
|
||||
// We index the generated `TransformOutput` which must contain
|
||||
// all the documents with fields in the newly defined searchable order.
|
||||
@ -736,7 +733,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::update::DeleteDocuments;
|
||||
use crate::update::{ClearDocuments, DeleteDocuments};
|
||||
use crate::{Criterion, Filter, SearchResult};
|
||||
|
||||
#[test]
|
||||
|
Loading…
Reference in New Issue
Block a user