2021-03-12 01:42:21 +08:00
|
|
|
use chrono::Utc;
|
2020-10-26 17:57:34 +08:00
|
|
|
use roaring::RoaringBitmap;
|
2021-04-01 15:07:16 +08:00
|
|
|
use crate::{ExternalDocumentsIds, Index, FieldsDistribution};
|
2020-10-26 17:57:34 +08:00
|
|
|
|
|
|
|
pub struct ClearDocuments<'t, 'u, 'i> {
|
2020-10-30 18:42:00 +08:00
|
|
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
2020-10-26 17:57:34 +08:00
|
|
|
index: &'i Index,
|
2020-12-22 23:21:07 +08:00
|
|
|
_update_id: u64,
|
2020-10-26 17:57:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
2020-12-22 23:21:07 +08:00
|
|
|
pub fn new(
|
|
|
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
|
|
|
index: &'i Index,
|
|
|
|
update_id: u64
|
|
|
|
) -> ClearDocuments<'t, 'u, 'i> {
|
|
|
|
|
|
|
|
ClearDocuments { wtxn, index, _update_id: update_id }
|
2020-10-26 17:57:34 +08:00
|
|
|
}
|
|
|
|
|
2021-03-09 17:24:27 +08:00
|
|
|
pub fn execute(self) -> anyhow::Result<u64> {
|
2021-03-12 01:42:21 +08:00
|
|
|
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
2020-10-26 17:57:34 +08:00
|
|
|
let Index {
|
2020-10-30 17:56:35 +08:00
|
|
|
env: _env,
|
2020-10-26 17:57:34 +08:00
|
|
|
main: _main,
|
|
|
|
word_docids,
|
2021-02-03 17:30:33 +08:00
|
|
|
word_prefix_docids,
|
2020-10-26 17:57:34 +08:00
|
|
|
docid_word_positions,
|
|
|
|
word_pair_proximity_docids,
|
2021-02-10 17:28:15 +08:00
|
|
|
word_prefix_pair_proximity_docids,
|
2020-11-11 23:04:04 +08:00
|
|
|
facet_field_id_value_docids,
|
2020-12-03 01:31:41 +08:00
|
|
|
field_id_docid_facet_values,
|
2020-10-26 17:57:34 +08:00
|
|
|
documents,
|
|
|
|
} = self.index;
|
|
|
|
|
2020-11-11 23:04:04 +08:00
|
|
|
// We retrieve the number of documents ids that we are deleting.
|
|
|
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
2021-01-21 00:27:43 +08:00
|
|
|
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
2020-10-26 17:57:34 +08:00
|
|
|
|
2020-11-11 23:04:04 +08:00
|
|
|
// We clean some of the main engine datastructures.
|
|
|
|
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
2021-02-03 17:36:07 +08:00
|
|
|
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
|
2020-11-23 00:53:33 +08:00
|
|
|
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
2020-10-26 17:57:34 +08:00
|
|
|
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
2021-04-01 15:07:16 +08:00
|
|
|
self.index.put_fields_distribution(self.wtxn, &FieldsDistribution::default())?;
|
2020-10-26 17:57:34 +08:00
|
|
|
|
2020-11-23 20:08:57 +08:00
|
|
|
// We clean all the faceted documents ids.
|
|
|
|
for (field_id, _) in faceted_fields {
|
|
|
|
self.index.put_faceted_documents_ids(self.wtxn, field_id, &RoaringBitmap::default())?;
|
|
|
|
}
|
|
|
|
|
2020-11-11 23:04:04 +08:00
|
|
|
// Clear the other databases.
|
2020-10-26 17:57:34 +08:00
|
|
|
word_docids.clear(self.wtxn)?;
|
2021-02-03 17:36:07 +08:00
|
|
|
word_prefix_docids.clear(self.wtxn)?;
|
2020-10-26 17:57:34 +08:00
|
|
|
docid_word_positions.clear(self.wtxn)?;
|
|
|
|
word_pair_proximity_docids.clear(self.wtxn)?;
|
2021-02-10 17:28:15 +08:00
|
|
|
word_prefix_pair_proximity_docids.clear(self.wtxn)?;
|
2020-11-11 23:04:04 +08:00
|
|
|
facet_field_id_value_docids.clear(self.wtxn)?;
|
2020-12-03 01:31:41 +08:00
|
|
|
field_id_docid_facet_values.clear(self.wtxn)?;
|
2020-10-26 17:57:34 +08:00
|
|
|
documents.clear(self.wtxn)?;
|
|
|
|
|
2020-11-11 23:04:04 +08:00
|
|
|
Ok(number_of_documents)
|
2020-10-26 17:57:34 +08:00
|
|
|
}
|
|
|
|
}
|
2021-04-01 15:07:16 +08:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use heed::EnvOpenOptions;
|
|
|
|
|
|
|
|
use crate::update::{IndexDocuments, UpdateFormat};
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn clear_documents() {
|
|
|
|
let path = tempfile::tempdir().unwrap();
|
|
|
|
let mut options = EnvOpenOptions::new();
|
|
|
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
|
|
|
let index = Index::new(options, &path).unwrap();
|
|
|
|
|
|
|
|
let mut wtxn = index.write_txn().unwrap();
|
|
|
|
let content = &br#"[
|
|
|
|
{ "id": 0, "name": "kevin", "age": 20 },
|
|
|
|
{ "id": 1, "name": "kevina" },
|
|
|
|
{ "id": 2, "name": "benoit", "country": "France" }
|
|
|
|
]"#[..];
|
|
|
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
|
|
|
builder.update_format(UpdateFormat::Json);
|
|
|
|
builder.execute(content, |_, _| ()).unwrap();
|
|
|
|
|
|
|
|
// Clear all documents from the database.
|
|
|
|
let builder = ClearDocuments::new(&mut wtxn, &index, 1);
|
|
|
|
assert_eq!(builder.execute().unwrap(), 3);
|
|
|
|
|
|
|
|
wtxn.commit().unwrap();
|
|
|
|
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
|
|
|
|
|
|
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 4);
|
|
|
|
|
|
|
|
assert!(index.words_fst(&rtxn).unwrap().is_empty());
|
|
|
|
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
|
|
|
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
|
|
|
|
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
|
|
|
assert!(index.fields_distribution(&rtxn).unwrap().is_empty());
|
|
|
|
|
|
|
|
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.docid_word_positions.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.word_prefix_pair_proximity_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.facet_field_id_value_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.field_id_docid_facet_values.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.documents.is_empty(&rtxn).unwrap());
|
|
|
|
}
|
|
|
|
}
|