From a36c99189716b27a9136a6e9dff25ce8c33330f2 Mon Sep 17 00:00:00 2001 From: qdequele Date: Wed, 18 Sep 2019 15:04:13 +0200 Subject: [PATCH] feat: add a method to get an iterator over all documents ids --- .../src/database/index/documents_index.rs | 24 +++++++++++++++++++ meilidb-data/src/database/index/mod.rs | 5 ++++ meilidb-data/tests/updates.rs | 24 +++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/meilidb-data/src/database/index/documents_index.rs b/meilidb-data/src/database/index/documents_index.rs index bde8531c7..013b23f09 100644 --- a/meilidb-data/src/database/index/documents_index.rs +++ b/meilidb-data/src/database/index/documents_index.rs @@ -55,6 +55,11 @@ impl DocumentsIndex { Ok(DocumentFieldsIter(iter)) } + pub fn documents_ids(&self) -> RocksDbResult { + let iter = DocumentsKeysIter(self.0.iter()?); + Ok(DocumentsIdsIter { inner: iter, last: None }) + } + pub fn documents_fields_repartition(&self, schema: Schema) -> RocksDbResult> { let iter = self.0.iter()?; let mut repartition_attributes_id = HashMap::new(); @@ -120,3 +125,22 @@ impl Iterator for DocumentsKeysIter<'_> { } } } + +pub struct DocumentsIdsIter<'a> { + inner: DocumentsKeysIter<'a>, + last: Option, +} + +impl Iterator for DocumentsIdsIter<'_> { + type Item = DocumentId; + + fn next(&mut self) -> Option { + for DocumentAttrKey { document_id, .. } in &mut self.inner { + if self.last != Some(document_id) { + self.last = Some(document_id); + return Some(document_id) + } + } + None + } +} diff --git a/meilidb-data/src/database/index/mod.rs b/meilidb-data/src/database/index/mod.rs index 5ecfedafa..9ee2e6804 100644 --- a/meilidb-data/src/database/index/mod.rs +++ b/meilidb-data/src/database/index/mod.rs @@ -19,6 +19,7 @@ use crate::serde::{Deserializer, DeserializerError}; pub use self::custom_settings_index::{CustomSettingsIndex, RankingOrdering, StopWords, RankingOrder, DistinctField, RankingRules}; pub use self::common_index::CommonIndex; +pub use self::documents_index::DocumentsIdsIter; use self::docs_words_index::DocsWordsIndex; use self::documents_index::DocumentsIndex; use self::main_index::MainIndex; @@ -374,6 +375,10 @@ impl Index { Ok(self.update_status(update_id)?.unwrap()) } + pub fn documents_ids(&self) -> Result { + Ok(self.documents_index.documents_ids()?) + } + pub fn document( &self, fields: Option<&HashSet<&str>>, diff --git a/meilidb-data/tests/updates.rs b/meilidb-data/tests/updates.rs index 8654b8ff5..d106769cb 100644 --- a/meilidb-data/tests/updates.rs +++ b/meilidb-data/tests/updates.rs @@ -189,3 +189,27 @@ fn custom_settings() { assert_eq!(ret_distinct_field, distinct_field); assert_eq!(ret_ranking_rules, ranking_rules); } + +#[test] +fn documents_ids() { + let tmp_dir = tempfile::tempdir().unwrap(); + let database = Database::open(&tmp_dir).unwrap(); + + let schema = simple_schema(); + let index = database.create_index("hello", schema).unwrap(); + + let doc1 = json!({ "objectId": 123, "title": "hello" }); + let doc2 = json!({ "objectId": 456, "title": "world" }); + let doc3 = json!({ "objectId": 789 }); + + let mut addition = index.documents_addition(); + addition.update_document(&doc1); + addition.update_document(&doc2); + addition.update_document(&doc3); + let update_id = addition.finalize().unwrap(); + let status = index.update_status_blocking(update_id).unwrap(); + assert!(status.result.is_ok()); + + let documents_ids_count = index.documents_ids().unwrap().count(); + assert_eq!(documents_ids_count, 3); +}