2023-11-22 18:21:19 +01:00
|
|
|
use heed::RwTxn;
|
2020-10-26 10:57:34 +01:00
|
|
|
use roaring::RoaringBitmap;
|
2022-02-15 11:41:55 +01:00
|
|
|
use time::OffsetDateTime;
|
2021-06-14 16:46:19 +02:00
|
|
|
|
2023-10-28 12:56:46 +02:00
|
|
|
use crate::{FieldDistribution, Index, Result};
|
2020-10-26 10:57:34 +01:00
|
|
|
|
2023-11-22 18:21:19 +01:00
|
|
|
pub struct ClearDocuments<'t, 'i> {
|
|
|
|
wtxn: &'t mut RwTxn<'i>,
|
2020-10-26 10:57:34 +01:00
|
|
|
index: &'i Index,
|
|
|
|
}
|
|
|
|
|
2023-11-22 18:21:19 +01:00
|
|
|
impl<'t, 'i> ClearDocuments<'t, 'i> {
|
|
|
|
pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> {
|
2021-11-03 13:12:01 +01:00
|
|
|
ClearDocuments { wtxn, index }
|
2020-10-26 10:57:34 +01:00
|
|
|
}
|
|
|
|
|
2024-01-23 09:42:48 +01:00
|
|
|
#[tracing::instrument(
|
|
|
|
level = "trace",
|
|
|
|
skip(self),
|
|
|
|
target = "indexing::documents",
|
|
|
|
name = "clear_documents"
|
|
|
|
)]
|
2021-06-14 16:46:19 +02:00
|
|
|
pub fn execute(self) -> Result<u64> {
|
2022-02-15 11:41:55 +01:00
|
|
|
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
2020-10-26 10:57:34 +01:00
|
|
|
let Index {
|
2020-10-30 10:56:35 +01:00
|
|
|
env: _env,
|
2020-10-26 10:57:34 +01:00
|
|
|
main: _main,
|
2023-10-28 12:56:46 +02:00
|
|
|
external_documents_ids,
|
2020-10-26 10:57:34 +01:00
|
|
|
word_docids,
|
2022-03-24 15:22:57 +01:00
|
|
|
exact_word_docids,
|
2021-02-03 10:30:33 +01:00
|
|
|
word_prefix_docids,
|
2022-03-25 10:49:34 +01:00
|
|
|
exact_word_prefix_docids,
|
2020-10-26 10:57:34 +01:00
|
|
|
word_pair_proximity_docids,
|
2021-10-05 11:18:42 +02:00
|
|
|
word_position_docids,
|
2023-03-23 09:22:01 +01:00
|
|
|
word_fid_docids,
|
2021-05-27 15:27:41 +02:00
|
|
|
field_id_word_count_docids,
|
2021-10-05 11:18:42 +02:00
|
|
|
word_prefix_position_docids,
|
2023-03-23 09:22:01 +01:00
|
|
|
word_prefix_fid_docids,
|
2022-10-12 06:28:36 -05:00
|
|
|
script_language_docids,
|
2021-04-21 15:43:44 +02:00
|
|
|
facet_id_f64_docids,
|
|
|
|
facet_id_string_docids,
|
2023-07-20 17:57:07 +02:00
|
|
|
facet_id_normalized_string_strings,
|
2023-06-28 15:36:32 +02:00
|
|
|
facet_id_string_fst,
|
2022-07-19 09:30:19 +02:00
|
|
|
facet_id_exists_docids,
|
2023-03-08 16:14:00 +01:00
|
|
|
facet_id_is_null_docids,
|
2023-03-14 18:08:12 +01:00
|
|
|
facet_id_is_empty_docids,
|
2021-04-21 15:43:44 +02:00
|
|
|
field_id_docid_facet_f64s,
|
|
|
|
field_id_docid_facet_strings,
|
2023-12-07 13:33:15 +01:00
|
|
|
vector_arroy,
|
|
|
|
embedder_category_id: _,
|
2020-10-26 10:57:34 +01:00
|
|
|
documents,
|
|
|
|
} = self.index;
|
|
|
|
|
2022-06-13 17:59:34 +02:00
|
|
|
let empty_roaring = RoaringBitmap::default();
|
|
|
|
|
2020-11-11 16:04:04 +01:00
|
|
|
// We retrieve the number of documents ids that we are deleting.
|
|
|
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
2020-10-26 10:57:34 +01:00
|
|
|
|
2020-11-11 16:04:04 +01:00
|
|
|
// We clean some of the main engine datastructures.
|
|
|
|
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
2021-02-03 10:36:07 +01:00
|
|
|
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
|
2022-06-13 17:59:34 +02:00
|
|
|
self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
|
2021-06-21 15:57:41 +02:00
|
|
|
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
2021-08-25 15:32:41 +02:00
|
|
|
self.index.delete_geo_rtree(self.wtxn)?;
|
2021-08-26 17:49:50 +02:00
|
|
|
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
|
2020-10-26 10:57:34 +01:00
|
|
|
|
2024-06-11 09:42:45 +02:00
|
|
|
// Remove all user-provided bits from the configs
|
|
|
|
let mut configs = self.index.embedding_configs(self.wtxn)?;
|
|
|
|
for config in configs.iter_mut() {
|
|
|
|
config.user_provided.clear();
|
|
|
|
}
|
|
|
|
self.index.put_embedding_configs(self.wtxn, configs)?;
|
|
|
|
|
2020-11-11 16:04:04 +01:00
|
|
|
// Clear the other databases.
|
2023-10-28 12:56:46 +02:00
|
|
|
external_documents_ids.clear(self.wtxn)?;
|
2020-10-26 10:57:34 +01:00
|
|
|
word_docids.clear(self.wtxn)?;
|
2022-03-24 15:22:57 +01:00
|
|
|
exact_word_docids.clear(self.wtxn)?;
|
2021-02-03 10:36:07 +01:00
|
|
|
word_prefix_docids.clear(self.wtxn)?;
|
2022-03-25 10:49:34 +01:00
|
|
|
exact_word_prefix_docids.clear(self.wtxn)?;
|
2020-10-26 10:57:34 +01:00
|
|
|
word_pair_proximity_docids.clear(self.wtxn)?;
|
2021-10-05 11:18:42 +02:00
|
|
|
word_position_docids.clear(self.wtxn)?;
|
2023-03-23 09:22:01 +01:00
|
|
|
word_fid_docids.clear(self.wtxn)?;
|
2021-05-27 15:27:41 +02:00
|
|
|
field_id_word_count_docids.clear(self.wtxn)?;
|
2021-10-05 11:18:42 +02:00
|
|
|
word_prefix_position_docids.clear(self.wtxn)?;
|
2023-03-23 09:22:01 +01:00
|
|
|
word_prefix_fid_docids.clear(self.wtxn)?;
|
2022-10-12 06:28:36 -05:00
|
|
|
script_language_docids.clear(self.wtxn)?;
|
2021-04-21 15:43:44 +02:00
|
|
|
facet_id_f64_docids.clear(self.wtxn)?;
|
2023-07-20 17:57:07 +02:00
|
|
|
facet_id_normalized_string_strings.clear(self.wtxn)?;
|
2023-06-28 15:36:32 +02:00
|
|
|
facet_id_string_fst.clear(self.wtxn)?;
|
2022-07-19 09:30:19 +02:00
|
|
|
facet_id_exists_docids.clear(self.wtxn)?;
|
2023-03-08 16:14:00 +01:00
|
|
|
facet_id_is_null_docids.clear(self.wtxn)?;
|
2023-03-14 18:08:12 +01:00
|
|
|
facet_id_is_empty_docids.clear(self.wtxn)?;
|
2021-04-21 15:43:44 +02:00
|
|
|
facet_id_string_docids.clear(self.wtxn)?;
|
|
|
|
field_id_docid_facet_f64s.clear(self.wtxn)?;
|
|
|
|
field_id_docid_facet_strings.clear(self.wtxn)?;
|
2023-12-07 13:33:15 +01:00
|
|
|
// vector
|
|
|
|
vector_arroy.clear(self.wtxn)?;
|
|
|
|
|
2020-10-26 10:57:34 +01:00
|
|
|
documents.clear(self.wtxn)?;
|
|
|
|
|
2020-11-11 16:04:04 +01:00
|
|
|
Ok(number_of_documents)
|
2020-10-26 10:57:34 +01:00
|
|
|
}
|
|
|
|
}
|
2021-04-01 10:07:16 +03:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2022-08-02 15:13:06 +02:00
|
|
|
use crate::index::tests::TempIndex;
|
2021-04-01 10:07:16 +03:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn clear_documents() {
|
2022-08-02 15:13:06 +02:00
|
|
|
let index = TempIndex::new();
|
2021-04-01 10:07:16 +03:00
|
|
|
|
|
|
|
let mut wtxn = index.write_txn().unwrap();
|
2022-08-02 15:13:06 +02:00
|
|
|
index
|
|
|
|
.add_documents_using_wtxn(&mut wtxn, documents!([
|
|
|
|
{ "id": 0, "name": "kevin", "age": 20 },
|
|
|
|
{ "id": 1, "name": "kevina" },
|
|
|
|
{ "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } }
|
|
|
|
]))
|
|
|
|
.unwrap();
|
2021-04-01 10:07:16 +03:00
|
|
|
|
|
|
|
// Clear all documents from the database.
|
2021-11-03 13:12:01 +01:00
|
|
|
let builder = ClearDocuments::new(&mut wtxn, &index);
|
2021-04-01 10:07:16 +03:00
|
|
|
assert_eq!(builder.execute().unwrap(), 3);
|
|
|
|
wtxn.commit().unwrap();
|
|
|
|
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
|
|
|
2022-03-23 17:28:41 +01:00
|
|
|
// the value is 7 because there is `[id, name, age, country, _geo, _geo.lng, _geo.lat]`
|
|
|
|
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 7);
|
2021-04-01 10:07:16 +03:00
|
|
|
|
|
|
|
assert!(index.words_fst(&rtxn).unwrap().is_empty());
|
|
|
|
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
2023-10-30 13:34:07 +01:00
|
|
|
assert!(index.external_documents_ids().is_empty(&rtxn).unwrap());
|
2021-04-01 10:07:16 +03:00
|
|
|
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
2021-06-17 15:16:20 +02:00
|
|
|
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
2021-08-25 15:32:41 +02:00
|
|
|
assert!(index.geo_rtree(&rtxn).unwrap().is_none());
|
2021-08-26 17:49:50 +02:00
|
|
|
assert!(index.geo_faceted_documents_ids(&rtxn).unwrap().is_empty());
|
2021-04-01 10:07:16 +03:00
|
|
|
|
|
|
|
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
|
2021-05-27 15:27:41 +02:00
|
|
|
assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap());
|
2021-05-03 15:58:47 +02:00
|
|
|
assert!(index.facet_id_f64_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.facet_id_string_docids.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
|
|
|
|
assert!(index.field_id_docid_facet_strings.is_empty(&rtxn).unwrap());
|
2021-04-01 10:07:16 +03:00
|
|
|
assert!(index.documents.is_empty(&rtxn).unwrap());
|
|
|
|
}
|
|
|
|
}
|