From 80d0f9c49d16ee94c6ac10471977d8617e9f884d Mon Sep 17 00:00:00 2001 From: mpostma Date: Thu, 11 Mar 2021 18:32:04 +0100 Subject: [PATCH 1/3] methods to update index time metadata --- Cargo.lock | 2 ++ milli/Cargo.toml | 1 + milli/src/index.rs | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 930ace50f..0f21f2a83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -302,6 +302,7 @@ dependencies = [ "libc", "num-integer", "num-traits", + "serde", "time", "winapi 0.3.9", ] @@ -1277,6 +1278,7 @@ dependencies = [ "anyhow", "bstr", "byteorder", + "chrono", "criterion", "crossbeam-channel", "csv", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 2eb40dc94..b63e34b32 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" anyhow = "1.0.38" bstr = "0.2.15" byteorder = "1.4.2" +chrono = { version = "0.4.19", features = ["serde"] } crossbeam-channel = "0.5.0" csv = "1.1.5" either = "1.6.1" diff --git a/milli/src/index.rs b/milli/src/index.rs index c0a00080e..c7a855e1f 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -6,6 +6,7 @@ use anyhow::Context; use heed::types::*; use heed::{PolyDatabase, Database, RwTxn, RoTxn}; use roaring::RoaringBitmap; +use chrono::{Utc, DateTime}; use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; @@ -28,6 +29,8 @@ pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; pub const WORDS_FST_KEY: &str = "words-fst"; pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; +const CREATED_AT_KEY: &str = "created-at"; +const UPDATED_AT_KEY: &str ="updated-at"; #[derive(Clone)] pub struct Index { @@ -68,6 +71,17 @@ impl Index { let field_id_docid_facet_values = env.create_database(Some("field-id-docid-facet-values"))?; let documents = env.create_database(Some("documents"))?; + { + let mut txn = env.write_txn()?; + // The db was just created, we update its metadata with the relevant information. + if main.get::<_, Str, SerdeJson>>(&txn, CREATED_AT_KEY)?.is_none() { + let now = Utc::now(); + main.put::<_, Str, SerdeJson>>(&mut txn, UPDATED_AT_KEY, &now)?; + main.put::<_, Str, SerdeJson>>(&mut txn, CREATED_AT_KEY, &now)?; + txn.commit()?; + } + } + Ok(Index { env, main, @@ -393,4 +407,24 @@ impl Index { pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> { Search::new(rtxn, self) } + + /// Returns the index creation time. + pub fn created_at(&self, rtxn: &RoTxn) -> heed::Result> { + let time = self.main + .get::<_, Str, SerdeJson>>(rtxn, CREATED_AT_KEY)? + .expect("Index without creation time"); + Ok(time) + } + + /// Returns the index creation time. + pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result> { + let time = self.main + .get::<_, Str, SerdeJson>>(rtxn, UPDATED_AT_KEY)? + .expect("Index without update time"); + Ok(time) + } + + pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime) -> heed::Result<()> { + self.main.put::<_, Str, SerdeJson>>(wtxn, UPDATED_AT_KEY, &time) + } } From 615fe095e160e46fe51306a3a90f6c635abbef47 Mon Sep 17 00:00:00 2001 From: mpostma Date: Thu, 11 Mar 2021 18:42:21 +0100 Subject: [PATCH 2/3] update index updated at on index writes --- milli/src/index.rs | 4 ++-- milli/src/update/clear_documents.rs | 2 ++ milli/src/update/delete_documents.rs | 2 ++ milli/src/update/facets.rs | 2 ++ milli/src/update/index_documents/mod.rs | 2 ++ milli/src/update/settings.rs | 2 ++ milli/src/update/words_prefixes.rs | 2 ++ 7 files changed, 14 insertions(+), 2 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index c7a855e1f..cf31b54a8 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -30,7 +30,7 @@ pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; pub const WORDS_FST_KEY: &str = "words-fst"; pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; const CREATED_AT_KEY: &str = "created-at"; -const UPDATED_AT_KEY: &str ="updated-at"; +const UPDATED_AT_KEY: &str = "updated-at"; #[derive(Clone)] pub struct Index { @@ -416,7 +416,7 @@ impl Index { Ok(time) } - /// Returns the index creation time. + /// Returns the index last updated time. pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result> { let time = self.main .get::<_, Str, SerdeJson>>(rtxn, UPDATED_AT_KEY)? diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 82e35d703..5ae3680d3 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -1,3 +1,4 @@ +use chrono::Utc; use roaring::RoaringBitmap; use crate::{ExternalDocumentsIds, Index}; @@ -18,6 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { } pub fn execute(self) -> anyhow::Result { + self.index.set_updated_at(self.wtxn, &Utc::now())?; let Index { env: _env, main: _main, diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index d1007376a..0b112ceb1 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -1,4 +1,5 @@ use anyhow::anyhow; +use chrono::Utc; use fst::IntoStreamer; use heed::types::ByteSlice; use roaring::RoaringBitmap; @@ -52,6 +53,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { } pub fn execute(self) -> anyhow::Result { + self.index.set_updated_at(self.wtxn, &Utc::now())?; // We retrieve the current documents ids that are in the database. let mut documents_ids = self.index.documents_ids(self.wtxn)?; diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index bac5f3c86..62da5af7e 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -2,6 +2,7 @@ use std::cmp; use std::fs::File; use std::num::NonZeroUsize; +use chrono::Utc; use grenad::{CompressionType, Reader, Writer, FileFuse}; use heed::types::{ByteSlice, DecodeIgnore}; use heed::{BytesEncode, Error}; @@ -57,6 +58,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { } pub fn execute(self) -> anyhow::Result<()> { + self.index.set_updated_at(self.wtxn, &Utc::now())?; // We get the faceted fields to be able to create the facet levels. let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index d55f421dc..ccbd95c7f 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -8,6 +8,7 @@ use std::time::Instant; use anyhow::Context; use bstr::ByteSlice as _; +use chrono::Utc; use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType}; use heed::types::ByteSlice; use log::{debug, info, error}; @@ -316,6 +317,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { R: io::Read, F: Fn(UpdateIndexingStep, u64) + Sync, { + self.index.set_updated_at(self.wtxn, &Utc::now())?; let before_transform = Instant::now(); let update_id = self.update_id; let progress_callback = |step| progress_callback(step, update_id); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index fd91d3468..7ce8b98c1 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::str::FromStr; use anyhow::Context; +use chrono::Utc; use grenad::CompressionType; use itertools::Itertools; use rayon::ThreadPool; @@ -249,6 +250,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { where F: Fn(UpdateIndexingStep, u64) + Sync { + self.index.set_updated_at(self.wtxn, &Utc::now())?; let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; self.update_displayed()?; let facets_updated = self.update_facets()?; diff --git a/milli/src/update/words_prefixes.rs b/milli/src/update/words_prefixes.rs index 70b82b217..f2fe526a2 100644 --- a/milli/src/update/words_prefixes.rs +++ b/milli/src/update/words_prefixes.rs @@ -1,6 +1,7 @@ use std::iter::FromIterator; use std::str; +use chrono::Utc; use fst::automaton::Str; use fst::{Automaton, Streamer, IntoStreamer}; use grenad::CompressionType; @@ -68,6 +69,7 @@ impl<'t, 'u, 'i> WordsPrefixes<'t, 'u, 'i> { } pub fn execute(self) -> anyhow::Result<()> { + self.index.set_updated_at(self.wtxn, &Utc::now())?; // Clear the words prefixes datastructures. self.index.word_prefix_docids.clear(self.wtxn)?; self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?; From f0210453a60c96b33b24aef4c04730773c97b446 Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 12 Mar 2021 14:43:17 +0100 Subject: [PATCH 3/3] add updated at on put primary key --- milli/src/index.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/milli/src/index.rs b/milli/src/index.rs index cf31b54a8..a14747788 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -142,6 +142,7 @@ impl Index { /// Writes the documents primary key, this is the field name that is used to store the id. pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { + self.set_updated_at(wtxn, &Utc::now())?; self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key) }