From b3a21d5a5003689fbb5e549695fe8df1b7fcb067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 3 Feb 2021 10:30:33 +0100 Subject: [PATCH] Introduce the getters and setters for the words prefixes FST --- infos/src/main.rs | 1 + milli/src/index.rs | 24 +++++++++++++++++++++++- milli/src/update/clear_documents.rs | 1 + milli/src/update/delete_documents.rs | 1 + 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/infos/src/main.rs b/infos/src/main.rs index e874385e6..916b5ba50 100644 --- a/infos/src/main.rs +++ b/infos/src/main.rs @@ -311,6 +311,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho env: _env, main, word_docids, + word_prefix_docids, docid_word_positions, word_pair_proximity_docids, facet_field_id_value_docids, diff --git a/milli/src/index.rs b/milli/src/index.rs index c0dd22986..5763f78ee 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -27,6 +27,7 @@ pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields"; pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; pub const WORDS_FST_KEY: &str = "words-fst"; +pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; #[derive(Clone)] pub struct Index { @@ -36,6 +37,8 @@ pub struct Index { pub main: PolyDatabase, /// A word and all the documents ids containing the word. pub word_docids: Database, + /// A prefix of word and all the documents ids containing this prefix. + pub word_prefix_docids: Database, /// Maps a word and a document id (u32) to all the positions where the given word appears. pub docid_word_positions: Database, /// Maps the proximity between a pair of words with all the docids where this relation appears. @@ -50,11 +53,12 @@ pub struct Index { impl Index { pub fn new>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result { - options.max_dbs(7); + options.max_dbs(8); let env = options.open(path)?; let main = env.create_poly_database(Some("main"))?; let word_docids = env.create_database(Some("word-docids"))?; + let word_prefix_docids = env.create_database(Some("word-prefix-docids"))?; let docid_word_positions = env.create_database(Some("docid-word-positions"))?; let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?; let facet_field_id_value_docids = env.create_database(Some("facet-field-id-value-docids"))?; @@ -65,6 +69,7 @@ impl Index { env, main, word_docids, + word_prefix_docids, docid_word_positions, word_pair_proximity_docids, facet_field_id_value_docids, @@ -328,6 +333,23 @@ impl Index { } } + /* words prefixes fst */ + + /// Writes the FST which is the words prefixes dictionnary of the engine. + pub fn put_words_prefixes_fst>(&self, wtxn: &mut RwTxn, fst: &fst::Set) -> heed::Result<()> { + self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes()) + } + + /// Returns the FST which is the words prefixes dictionnary of the engine. + pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result>> { + match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? { + Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), + None => Ok(fst::Set::default().map_data(Cow::Owned)?), + } + } + + /* documents */ + /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing. pub fn documents<'t>( &self, diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index a84596901..6f0d457b7 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -22,6 +22,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { env: _env, main: _main, word_docids, + word_prefix_docids, docid_word_positions, word_pair_proximity_docids, facet_field_id_value_docids, diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 932589dd7..2efed359f 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -79,6 +79,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { env: _env, main: _main, word_docids, + word_prefix_docids, docid_word_positions, word_pair_proximity_docids, facet_field_id_value_docids,