From 130fb74928fc06f64ea3413d9d860a4dc99b511b Mon Sep 17 00:00:00 2001 From: qdequele Date: Mon, 13 Jan 2020 19:10:58 +0100 Subject: [PATCH] introduce a new schemaless way --- datasets/movies/settings.json | 3 +- meilisearch-core/examples/from_file.rs | 16 +- meilisearch-core/src/criterion/exact.rs | 4 +- .../src/criterion/sort_by_attr.rs | 2 +- meilisearch-core/src/error.rs | 13 +- meilisearch-core/src/query_builder.rs | 6 +- meilisearch-core/src/raw_indexer.rs | 18 +- meilisearch-core/src/serde/deserializer.rs | 2 +- meilisearch-core/src/serde/mod.rs | 2 +- meilisearch-core/src/serde/serializer.rs | 90 +++---- .../src/store/documents_fields.rs | 30 +-- .../src/store/documents_fields_counts.rs | 43 ++-- meilisearch-core/src/store/main.rs | 22 +- meilisearch-core/src/store/mod.rs | 46 +++- .../src/update/documents_addition.rs | 70 +----- .../src/update/documents_deletion.rs | 17 +- .../src/update/settings_update.rs | 220 ++++-------------- meilisearch-schema/src/error.rs | 2 + meilisearch-schema/src/fields_map.rs | 23 +- meilisearch-schema/src/lib.rs | 76 ++++-- meilisearch-schema/src/schema.rs | 77 +++++- meilisearch-types/src/lib.rs | 1 + 22 files changed, 365 insertions(+), 418 deletions(-) diff --git a/datasets/movies/settings.json b/datasets/movies/settings.json index 126a7b403..c920e3220 100644 --- a/datasets/movies/settings.json +++ b/datasets/movies/settings.json @@ -7,6 +7,5 @@ "overview", "release_date", "poster" - ], - "attributes_ranked": ["release_date"] + ] } diff --git a/meilisearch-core/examples/from_file.rs b/meilisearch-core/examples/from_file.rs index 984c36e1f..8870b999c 100644 --- a/meilisearch-core/examples/from_file.rs +++ b/meilisearch-core/examples/from_file.rs @@ -13,7 +13,8 @@ use structopt::StructOpt; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use meilisearch_core::{Database, Highlight, ProcessedUpdateResult}; -use meilisearch_schema::SchemaAttr; +use meilisearch_core::settings::Settings; +use meilisearch_schema::FieldId; // #[cfg(target_os = "linux")] #[global_allocator] @@ -121,7 +122,8 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box Result<(), Box< }; let attr = schema - .attribute(&filter) + .get_id(filter) .expect("Could not find filtered attribute"); builder.with_filter(move |document_id| { @@ -388,11 +390,11 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box< for (name, text) in document.0 { print!("{}: ", name); - let attr = schema.attribute(&name).unwrap(); + let attr = schema.get_id(&name).unwrap(); let highlights = doc .highlights .iter() - .filter(|m| SchemaAttr::new(m.attribute) == attr) + .filter(|m| FieldId::new(m.attribute) == attr) .cloned(); let (text, highlights) = crop_text(&text, highlights, command.char_context); @@ -407,8 +409,8 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box< let mut matching_attributes = HashSet::new(); for highlight in doc.highlights { - let attr = SchemaAttr::new(highlight.attribute); - let name = schema.attribute_name(attr); + let attr = FieldId::new(highlight.attribute); + let name = schema.get_name(attr); matching_attributes.insert(name); } diff --git a/meilisearch-core/src/criterion/exact.rs b/meilisearch-core/src/criterion/exact.rs index 93729ee58..b221498ac 100644 --- a/meilisearch-core/src/criterion/exact.rs +++ b/meilisearch-core/src/criterion/exact.rs @@ -1,6 +1,6 @@ use std::cmp::{Ordering, Reverse}; use std::collections::hash_map::{HashMap, Entry}; -use meilisearch_schema::SchemaAttr; +use meilisearch_schema::IndexedPos; use slice_group_by::GroupBy; use crate::{RawDocument, MResult}; use crate::bucket_sort::BareMatch; @@ -32,7 +32,7 @@ impl Criterion for Exact { for bm in group { for di in ctx.postings_lists[bm.postings_list].as_ref() { - let attr = SchemaAttr(di.attribute); + let attr = IndexedPos(di.attribute); let count = match fields_counts.entry(attr) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { diff --git a/meilisearch-core/src/criterion/sort_by_attr.rs b/meilisearch-core/src/criterion/sort_by_attr.rs index 3f2fb9461..7183c9b49 100644 --- a/meilisearch-core/src/criterion/sort_by_attr.rs +++ b/meilisearch-core/src/criterion/sort_by_attr.rs @@ -69,7 +69,7 @@ impl<'a> SortByAttr<'a> { reversed: bool, ) -> Result, SortByAttrError> { let field_id = match schema.get_id(attr_name) { - Some(field_id) => *field_id, + Some(field_id) => field_id, None => return Err(SortByAttrError::AttributeNotFound), }; diff --git a/meilisearch-core/src/error.rs b/meilisearch-core/src/error.rs index 5ebd1c947..739b8281a 100644 --- a/meilisearch-core/src/error.rs +++ b/meilisearch-core/src/error.rs @@ -8,11 +8,12 @@ pub type MResult = Result; pub enum Error { Io(io::Error), IndexAlreadyExists, - SchemaDiffer, + MissingSchemaIdentifier, SchemaMissing, WordIndexMissing, MissingDocumentId, MaxFieldsLimitExceeded, + Schema(meilisearch_schema::Error), Zlmdb(heed::Error), Fst(fst::Error), SerdeJson(SerdeJsonError), @@ -28,6 +29,12 @@ impl From for Error { } } +impl From for Error { + fn from(error: meilisearch_schema::Error) -> Error { + Error::Schema(error) + } +} + impl From for Error { fn from(error: heed::Error) -> Error { Error::Zlmdb(error) @@ -76,10 +83,12 @@ impl fmt::Display for Error { match self { Io(e) => write!(f, "{}", e), IndexAlreadyExists => write!(f, "index already exists"), - SchemaDiffer => write!(f, "schemas differ"), + MissingSchemaIdentifier => write!(f, "schema cannot be build without identifier"), SchemaMissing => write!(f, "this index does not have a schema"), WordIndexMissing => write!(f, "this index does not have a word index"), MissingDocumentId => write!(f, "document id is missing"), + MaxFieldsLimitExceeded => write!(f, "maximum field in a document is exceeded"), + Schema(e) => write!(f, "schemas error; {}", e), Zlmdb(e) => write!(f, "heed error; {}", e), Fst(e) => write!(f, "fst error; {}", e), SerdeJson(e) => write!(f, "serde json error; {}", e), diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index 52753b01a..16b1645ae 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -136,7 +136,7 @@ mod tests { use std::iter::FromIterator; use fst::{IntoStreamer, Set}; - use meilisearch_schema::SchemaAttr; + use meilisearch_schema::IndexedPos; use sdset::SetBuf; use tempfile::TempDir; @@ -295,14 +295,14 @@ mod tests { for ((docid, attr, _), count) in fields_counts { let prev = index .documents_fields_counts - .document_field_count(&mut writer, docid, SchemaAttr(attr)) + .document_field_count(&mut writer, docid, IndexedPos(attr)) .unwrap(); let prev = prev.unwrap_or(0); index .documents_fields_counts - .put_document_field_count(&mut writer, docid, SchemaAttr(attr), prev + count) + .put_document_field_count(&mut writer, docid, IndexedPos(attr), prev + count) .unwrap(); } diff --git a/meilisearch-core/src/raw_indexer.rs b/meilisearch-core/src/raw_indexer.rs index b573300cd..f20088ac0 100644 --- a/meilisearch-core/src/raw_indexer.rs +++ b/meilisearch-core/src/raw_indexer.rs @@ -180,16 +180,16 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O mod tests { use super::*; - use meilisearch_schema::SchemaAttr; + use meilisearch_schema::IndexedPos; #[test] fn strange_apostrophe() { let mut indexer = RawIndexer::new(fst::Set::default()); let docid = DocumentId(0); - let attr = SchemaAttr(0); + let indexed_pos = IndexedPos(0); let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !"; - indexer.index_text(docid, attr, text); + indexer.index_text(docid, indexed_pos, text); let Indexed { words_doc_indexes, .. @@ -209,9 +209,9 @@ mod tests { let mut indexer = RawIndexer::new(fst::Set::default()); let docid = DocumentId(0); - let attr = SchemaAttr(0); + let indexed_pos = IndexedPos(0); let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"]; - indexer.index_text_seq(docid, attr, text); + indexer.index_text_seq(docid, indexed_pos, text); let Indexed { words_doc_indexes, .. @@ -234,9 +234,9 @@ mod tests { let mut indexer = RawIndexer::new(stop_words); let docid = DocumentId(0); - let attr = SchemaAttr(0); + let indexed_pos = IndexedPos(0); let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !"; - indexer.index_text(docid, attr, text); + indexer.index_text(docid, indexed_pos, text); let Indexed { words_doc_indexes, .. @@ -258,9 +258,9 @@ mod tests { let mut indexer = RawIndexer::new(fst::Set::default()); let docid = DocumentId(0); - let attr = SchemaAttr(0); + let indexed_pos = IndexedPos(0); let text = "🇯🇵"; - indexer.index_text(docid, attr, text); + indexer.index_text(docid, indexed_pos, text); let Indexed { words_doc_indexes, .. diff --git a/meilisearch-core/src/serde/deserializer.rs b/meilisearch-core/src/serde/deserializer.rs index b180209b0..1ef0b85c8 100644 --- a/meilisearch-core/src/serde/deserializer.rs +++ b/meilisearch-core/src/serde/deserializer.rs @@ -99,7 +99,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> { let ioread = SerdeJsonIoRead::new(cursor); let value = Value(SerdeJsonDeserializer::new(ioread)); - Some((*attribute_name, value)) + Some((attribute_name, value)) } else { None } diff --git a/meilisearch-core/src/serde/mod.rs b/meilisearch-core/src/serde/mod.rs index 0e2d58a2c..9a32d7843 100644 --- a/meilisearch-core/src/serde/mod.rs +++ b/meilisearch-core/src/serde/mod.rs @@ -20,7 +20,7 @@ pub use self::convert_to_string::ConvertToString; pub use self::deserializer::{Deserializer, DeserializerError}; pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string}; pub use self::indexer::Indexer; -pub use self::serializer::{serialize_value, Serializer}; +pub use self::serializer::{serialize_value, serialize_value_with_id, Serializer}; use std::{error::Error, fmt}; diff --git a/meilisearch-core/src/serde/serializer.rs b/meilisearch-core/src/serde/serializer.rs index cf3be929b..e909b38b4 100644 --- a/meilisearch-core/src/serde/serializer.rs +++ b/meilisearch-core/src/serde/serializer.rs @@ -1,4 +1,4 @@ -use meilisearch_schema::{Schema, FieldsMap}; +use meilisearch_schema::{Schema, FieldId}; use serde::ser; use crate::database::MainT; @@ -10,12 +10,11 @@ use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError}; pub struct Serializer<'a, 'b> { pub txn: &'a mut heed::RwTxn<'b, MainT>, - pub schema: &'a Schema, + pub schema: &'a mut Schema, pub document_store: DocumentsFields, pub document_fields_counts: DocumentsFieldsCounts, pub indexer: &'a mut RawIndexer, pub ranked_map: &'a mut RankedMap, - pub fields_map: &'a mut FieldsMap, pub document_id: DocumentId, } @@ -159,7 +158,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { document_fields_counts: self.document_fields_counts, indexer: self.indexer, ranked_map: self.ranked_map, - fields_map: self.fields_map, current_key_name: None, }) } @@ -177,7 +175,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { document_fields_counts: self.document_fields_counts, indexer: self.indexer, ranked_map: self.ranked_map, - fields_map: self.fields_map, }) } @@ -196,13 +193,12 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { pub struct MapSerializer<'a, 'b> { txn: &'a mut heed::RwTxn<'b, MainT>, - schema: &'a Schema, + schema: &'a mut Schema, document_id: DocumentId, document_store: DocumentsFields, document_fields_counts: DocumentsFieldsCounts, indexer: &'a mut RawIndexer, ranked_map: &'a mut RankedMap, - fields_map: &'a mut FieldsMap, current_key_name: Option, } @@ -237,21 +233,17 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> { V: ser::Serialize, { let key = key.serialize(ConvertToString)?; - match self.schema.attribute(&key) { - Some(attribute) => serialize_value( - self.txn, - attribute, - self.schema.props(attribute), - self.document_id, - self.document_store, - self.document_fields_counts, - self.indexer, - self.ranked_map, - self.fields_map, - value, - ), - None => Ok(()), - } + serialize_value( + self.txn, + key, + self.schema, + self.document_id, + self.document_store, + self.document_fields_counts, + self.indexer, + self.ranked_map, + value, + ) } fn end(self) -> Result { @@ -261,13 +253,12 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> { pub struct StructSerializer<'a, 'b> { txn: &'a mut heed::RwTxn<'b, MainT>, - schema: &'a Schema, + schema: &'a mut Schema, document_id: DocumentId, document_store: DocumentsFields, document_fields_counts: DocumentsFieldsCounts, indexer: &'a mut RawIndexer, ranked_map: &'a mut RankedMap, - fields_map: &'a mut FieldsMap, } impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { @@ -282,19 +273,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { where T: ser::Serialize, { - // let id = fields_map.insert(key)?; - - // let attribute = match self.schema.attribute(id) { - // Some(attribute) => attribute, - // None => { - - // }, - // } - serialize_value( self.txn, - attribute, - self.schema.props(attribute), + key.to_string(), + self.schema, self.document_id, self.document_store, self.document_fields_counts, @@ -311,7 +293,36 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { pub fn serialize_value<'a, T: ?Sized>( txn: &mut heed::RwTxn, - attribute: &'static str, + attribute: String, + schema: &'a mut Schema, + document_id: DocumentId, + document_store: DocumentsFields, + documents_fields_counts: DocumentsFieldsCounts, + indexer: &mut RawIndexer, + ranked_map: &mut RankedMap, + value: &T, +) -> Result<(), SerializerError> +where + T: ser::Serialize, +{ + let field_id = schema.get_or_create(attribute)?; + + serialize_value_with_id( + txn, + field_id, + schema, + document_id, + document_store, + documents_fields_counts, + indexer, + ranked_map, + value + ) +} + +pub fn serialize_value_with_id<'a, T: ?Sized>( + txn: &mut heed::RwTxn, + field_id: FieldId, schema: &'a Schema, document_id: DocumentId, document_store: DocumentsFields, @@ -324,12 +335,11 @@ where T: ser::Serialize, { let serialized = serde_json::to_vec(value)?; - let field_id = schema.get_or_create(attribute)?; document_store.put_document_field(txn, document_id, field_id, &serialized)?; if let Some(indexed_pos) = schema.id_is_indexed(field_id) { let indexer = Indexer { - field_id, + pos: *indexed_pos, indexer, document_id, }; @@ -337,13 +347,13 @@ where documents_fields_counts.put_document_field_count( txn, document_id, - field_id, + *indexed_pos, number_of_words as u16, )?; } } - if let Some(field_id) = schema.id_is_ranked(field_id) { + if schema.id_is_ranked(field_id) { let number = value.serialize(ConvertToNumber)?; ranked_map.insert(document_id, field_id, number); } diff --git a/meilisearch-core/src/store/documents_fields.rs b/meilisearch-core/src/store/documents_fields.rs index b217ecd31..05c2b7089 100644 --- a/meilisearch-core/src/store/documents_fields.rs +++ b/meilisearch-core/src/store/documents_fields.rs @@ -1,14 +1,14 @@ use heed::types::{ByteSlice, OwnedType}; use crate::database::MainT; use heed::Result as ZResult; -use meilisearch_schema::SchemaAttr; +use meilisearch_schema::FieldId; -use super::DocumentAttrKey; +use super::DocumentFieldStoredKey; use crate::DocumentId; #[derive(Copy, Clone)] pub struct DocumentsFields { - pub(crate) documents_fields: heed::Database, ByteSlice>, + pub(crate) documents_fields: heed::Database, ByteSlice>, } impl DocumentsFields { @@ -16,10 +16,10 @@ impl DocumentsFields { self, writer: &mut heed::RwTxn, document_id: DocumentId, - attribute: SchemaAttr, + attribute: FieldId, value: &[u8], ) -> ZResult<()> { - let key = DocumentAttrKey::new(document_id, attribute); + let key = DocumentFieldStoredKey::new(document_id, attribute); self.documents_fields.put(writer, &key, value) } @@ -28,8 +28,8 @@ impl DocumentsFields { writer: &mut heed::RwTxn, document_id: DocumentId, ) -> ZResult { - let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); - let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); + let start = DocumentFieldStoredKey::new(document_id, FieldId::min()); + let end = DocumentFieldStoredKey::new(document_id, FieldId::max()); self.documents_fields.delete_range(writer, &(start..=end)) } @@ -41,9 +41,9 @@ impl DocumentsFields { self, reader: &'txn heed::RoTxn, document_id: DocumentId, - attribute: SchemaAttr, + attribute: FieldId, ) -> ZResult> { - let key = DocumentAttrKey::new(document_id, attribute); + let key = DocumentFieldStoredKey::new(document_id, attribute); self.documents_fields.get(reader, &key) } @@ -52,25 +52,25 @@ impl DocumentsFields { reader: &'txn heed::RoTxn, document_id: DocumentId, ) -> ZResult> { - let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); - let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); + let start = DocumentFieldStoredKey::new(document_id, FieldId::min()); + let end = DocumentFieldStoredKey::new(document_id, FieldId::max()); let iter = self.documents_fields.range(reader, &(start..=end))?; Ok(DocumentFieldsIter { iter }) } } pub struct DocumentFieldsIter<'txn> { - iter: heed::RoRange<'txn, OwnedType, ByteSlice>, + iter: heed::RoRange<'txn, OwnedType, ByteSlice>, } impl<'txn> Iterator for DocumentFieldsIter<'txn> { - type Item = ZResult<(SchemaAttr, &'txn [u8])>; + type Item = ZResult<(FieldId, &'txn [u8])>; fn next(&mut self) -> Option { match self.iter.next() { Some(Ok((key, bytes))) => { - let attr = SchemaAttr(key.attr.get()); - Some(Ok((attr, bytes))) + let field_id = FieldId(key.field_id.get()); + Some(Ok((field_id, bytes))) } Some(Err(e)) => Some(Err(e)), None => None, diff --git a/meilisearch-core/src/store/documents_fields_counts.rs b/meilisearch-core/src/store/documents_fields_counts.rs index 87e3e8fb0..69bacd5f7 100644 --- a/meilisearch-core/src/store/documents_fields_counts.rs +++ b/meilisearch-core/src/store/documents_fields_counts.rs @@ -1,13 +1,13 @@ -use super::DocumentAttrKey; +use super::DocumentFieldIndexedKey; use crate::database::MainT; use crate::DocumentId; use heed::types::OwnedType; use heed::Result as ZResult; -use meilisearch_schema::FieldId; +use meilisearch_schema::IndexedPos; #[derive(Copy, Clone)] pub struct DocumentsFieldsCounts { - pub(crate) documents_fields_counts: heed::Database, OwnedType>, + pub(crate) documents_fields_counts: heed::Database, OwnedType>, } impl DocumentsFieldsCounts { @@ -15,10 +15,10 @@ impl DocumentsFieldsCounts { self, writer: &mut heed::RwTxn, document_id: DocumentId, - attribute: FieldId, + attribute: IndexedPos, value: u16, ) -> ZResult<()> { - let key = DocumentAttrKey::new(document_id, attribute); + let key = DocumentFieldIndexedKey::new(document_id, attribute); self.documents_fields_counts.put(writer, &key, &value) } @@ -27,10 +27,9 @@ impl DocumentsFieldsCounts { writer: &mut heed::RwTxn, document_id: DocumentId, ) -> ZResult { - let start = DocumentAttrKey::new(document_id, FieldId::min()); - let end = DocumentAttrKey::new(document_id, FieldId::max()); - self.documents_fields_counts - .delete_range(writer, &(start..=end)) + let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min()); + let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max()); + self.documents_fields_counts.delete_range(writer, &(start..=end)) } pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> { @@ -41,9 +40,9 @@ impl DocumentsFieldsCounts { self, reader: &heed::RoTxn, document_id: DocumentId, - attribute: FieldId, + attribute: IndexedPos, ) -> ZResult> { - let key = DocumentAttrKey::new(document_id, attribute); + let key = DocumentFieldIndexedKey::new(document_id, attribute); match self.documents_fields_counts.get(reader, &key)? { Some(count) => Ok(Some(count)), None => Ok(None), @@ -55,8 +54,8 @@ impl DocumentsFieldsCounts { reader: &'txn heed::RoTxn, document_id: DocumentId, ) -> ZResult> { - let start = DocumentAttrKey::new(document_id, FieldId::min()); - let end = DocumentAttrKey::new(document_id, FieldId::max()); + let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min()); + let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max()); let iter = self.documents_fields_counts.range(reader, &(start..=end))?; Ok(DocumentFieldsCountsIter { iter }) } @@ -79,17 +78,17 @@ impl DocumentsFieldsCounts { } pub struct DocumentFieldsCountsIter<'txn> { - iter: heed::RoRange<'txn, OwnedType, OwnedType>, + iter: heed::RoRange<'txn, OwnedType, OwnedType>, } impl Iterator for DocumentFieldsCountsIter<'_> { - type Item = ZResult<(FieldId, u16)>; + type Item = ZResult<(IndexedPos, u16)>; fn next(&mut self) -> Option { match self.iter.next() { Some(Ok((key, count))) => { - let attr = FieldId(key.attr.get()); - Some(Ok((attr, count))) + let indexed_pos = IndexedPos(key.indexed_pos.get()); + Some(Ok((indexed_pos, count))) } Some(Err(e)) => Some(Err(e)), None => None, @@ -99,7 +98,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> { pub struct DocumentsIdsIter<'txn> { last_seen_id: Option, - iter: heed::RoIter<'txn, OwnedType, OwnedType>, + iter: heed::RoIter<'txn, OwnedType, OwnedType>, } impl Iterator for DocumentsIdsIter<'_> { @@ -123,18 +122,18 @@ impl Iterator for DocumentsIdsIter<'_> { } pub struct AllDocumentsFieldsCountsIter<'txn> { - iter: heed::RoIter<'txn, OwnedType, OwnedType>, + iter: heed::RoIter<'txn, OwnedType, OwnedType>, } impl Iterator for AllDocumentsFieldsCountsIter<'_> { - type Item = ZResult<(DocumentId, FieldId, u16)>; + type Item = ZResult<(DocumentId, IndexedPos, u16)>; fn next(&mut self) -> Option { match self.iter.next() { Some(Ok((key, count))) => { let docid = DocumentId(key.docid.get()); - let attr = FieldId(key.attr.get()); - Some(Ok((docid, attr, count))) + let indexed_pos = IndexedPos(key.indexed_pos.get()); + Some(Ok((docid, indexed_pos, count))) } Some(Err(e)) => Some(Err(e)), None => None, diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 7eb1c73a1..eec225503 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -1,12 +1,13 @@ -use crate::fields_map::FieldsMap; -use crate::database::MainT; -use crate::RankedMap; +use std::sync::Arc; +use std::collections::{HashMap, BTreeMap, BTreeSet}; + use chrono::{DateTime, Utc}; use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str}; use heed::Result as ZResult; use meilisearch_schema::Schema; -use std::collections::{HashMap, BTreeMap, BTreeSet}; -use std::sync::Arc; + +use crate::database::MainT; +use crate::RankedMap; const CREATED_AT_KEY: &str = "created-at"; const RANKING_RULES_KEY: &str = "ranking-rules-key"; @@ -18,7 +19,6 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency"; const NAME_KEY: &str = "name"; const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; const RANKED_MAP_KEY: &str = "ranked-map"; -const FIELDS_MAP_KEY: &str = "fields-map"; const SCHEMA_KEY: &str = "schema"; const UPDATED_AT_KEY: &str = "updated-at"; const WORDS_KEY: &str = "words"; @@ -114,16 +114,6 @@ impl Main { .get::<_, Str, SerdeBincode>(reader, RANKED_MAP_KEY) } - pub fn put_fields_map(self, writer: &mut heed::RwTxn, fields_map: &FieldsMap) -> ZResult<()> { - self.main - .put::<_, Str, SerdeBincode>(writer, FIELDS_MAP_KEY, &fields_map) - } - - pub fn fields_map(self, reader: &heed::RoTxn) -> ZResult> { - self.main - .get::<_, Str, SerdeBincode>(reader, FIELDS_MAP_KEY) - } - pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> { let bytes = fst.as_fst().as_bytes(); self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes) diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index 5aa26e1ea..1c6eb5a91 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -43,18 +43,50 @@ use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MR type BEU64 = zerocopy::U64; type BEU16 = zerocopy::U16; +// #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] +// #[repr(C)] +// pub struct DocumentAttrKey { +// docid: BEU64, +// indexed_pos: BEU16, +// } + +// impl DocumentAttrKey { +// fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentAttrKey { +// DocumentAttrKey { +// docid: BEU64::new(docid.0), +// indexed_pos: BEU16::new(indexed_pos.0), +// } +// } +// } + #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] #[repr(C)] -pub struct DocumentAttrKey { +pub struct DocumentFieldIndexedKey { docid: BEU64, - attr: BEU16, + indexed_pos: BEU16, } -impl DocumentAttrKey { - fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey { - DocumentAttrKey { +impl DocumentFieldIndexedKey { + fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey { + DocumentFieldIndexedKey { docid: BEU64::new(docid.0), - attr: BEU16::new(attr.0), + indexed_pos: BEU16::new(indexed_pos.0), + } + } +} + +#[derive(Debug, Copy, Clone, AsBytes, FromBytes)] +#[repr(C)] +pub struct DocumentFieldStoredKey { + docid: BEU64, + field_id: BEU16, +} + +impl DocumentFieldStoredKey { + fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey { + DocumentFieldStoredKey { + docid: BEU64::new(docid.0), + field_id: BEU16::new(field_id.0), } } } @@ -228,7 +260,7 @@ impl Index { &self, reader: &heed::RoTxn, document_id: DocumentId, - attribute: SchemaAttr, + attribute: FieldId, ) -> MResult> { let bytes = self .documents_fields diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index 0431b0fbc..825335c4e 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -1,14 +1,13 @@ -use std::collections::{HashMap, BTreeSet}; +use std::collections::HashMap; use fst::{set::OpBuilder, SetBuilder}; use sdset::{duo::Union, SetOperation}; use serde::{Deserialize, Serialize}; -use meilisearch_schema::{Schema, DISPLAYED, INDEXED}; use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::raw_indexer::RawIndexer; -use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer}; +use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer}; use crate::store; use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; use crate::{Error, MResult, RankedMap}; @@ -115,16 +114,11 @@ pub fn apply_documents_addition<'a, 'b>( None => return Err(Error::SchemaMissing), }; - if let Some(new_schema) = lazy_new_schema(&schema, &addition) { - main_store.put_schema(writer, &new_schema)?; - schema = new_schema; - } - - let identifier = schema.identifier_name(); + let identifier = schema.identifier(); // 1. store documents ids for future deletion for document in addition { - let document_id = match extract_document_id(identifier, &document)? { + let document_id = match extract_document_id(&identifier, &document)? { Some(id) => id, None => return Err(Error::MissingDocumentId), }; @@ -147,8 +141,6 @@ pub fn apply_documents_addition<'a, 'b>( None => fst::Set::default(), }; - let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default(); - // 3. index the documents fields in the stores let mut indexer = RawIndexer::new(stop_words); @@ -160,7 +152,6 @@ pub fn apply_documents_addition<'a, 'b>( document_fields_counts: index.documents_fields_counts, indexer: &mut indexer, ranked_map: &mut ranked_map, - fields_map: &mut fields_map, document_id, }; @@ -192,16 +183,11 @@ pub fn apply_documents_partial_addition<'a, 'b>( None => return Err(Error::SchemaMissing), }; - if let Some(new_schema) = lazy_new_schema(&schema, &addition) { - main_store.put_schema(writer, &new_schema)?; - schema = new_schema; - } - - let identifier = schema.identifier_name(); + let identifier = schema.identifier(); // 1. store documents ids for future deletion for mut document in addition { - let document_id = match extract_document_id(identifier, &document)? { + let document_id = match extract_document_id(&identifier, &document)? { Some(id) => id, None => return Err(Error::MissingDocumentId), }; @@ -241,8 +227,6 @@ pub fn apply_documents_partial_addition<'a, 'b>( None => fst::Set::default(), }; - let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default(); - // 3. index the documents fields in the stores let mut indexer = RawIndexer::new(stop_words); @@ -254,7 +238,6 @@ pub fn apply_documents_partial_addition<'a, 'b>( document_fields_counts: index.documents_fields_counts, indexer: &mut indexer, ranked_map: &mut ranked_map, - fields_map: &mut fields_map, document_id, }; @@ -281,7 +264,6 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind }; let mut ranked_map = RankedMap::default(); - let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default(); // 1. retrieve all documents ids let mut documents_ids_to_reindex = Vec::new(); @@ -312,21 +294,20 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind for result in index.documents_fields.document_fields(writer, *document_id)? { let (attr, bytes) = result?; let value: serde_json::Value = serde_json::from_slice(bytes)?; - ram_store.insert((document_id, attr), value); + ram_store.insert((document_id, field_id), value); } - for ((docid, attr), value) in ram_store.drain() { - serialize_value( + for ((docid, field_id), value) in ram_store.drain() { + serialize_value_with_id( writer, - attr, - schema.props(attr), + field_id, + &schema, *docid, index.documents_fields, index.documents_fields_counts, &mut indexer, &mut ranked_map, - &mut fields_map, - &value, + &value )?; } } @@ -401,30 +382,3 @@ pub fn write_documents_addition_index( Ok(()) } - -pub fn lazy_new_schema( - schema: &Schema, - documents: &[HashMap], -) -> Option { - let mut attributes_to_add = BTreeSet::new(); - - for document in documents { - for (key, _) in document { - if schema.attribute(key).is_none() { - attributes_to_add.insert(key); - } - } - } - - if attributes_to_add.is_empty() { - return None - } - - let mut schema_builder = schema.to_builder(); - for attribute in attributes_to_add { - schema_builder.new_attribute(attribute, DISPLAYED | INDEXED); - } - let schema = schema_builder.build(); - - Some(schema) -} diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index 6efa9bf01..5627ee0fd 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -40,8 +40,8 @@ impl DocumentsDeletion { where D: serde::Serialize, { - let identifier = schema.identifier_name(); - let document_id = match extract_document_id(identifier, &document)? { + let identifier = schema.identifier(); + let document_id = match extract_document_id(&identifier, &document)? { Some(id) => id, None => return Err(Error::MissingDocumentId), }; @@ -101,18 +101,7 @@ pub fn apply_documents_deletion( }; // collect the ranked attributes according to the schema - let ranked_attrs: Vec<_> = schema - .iter() - .filter_map( - |(_, attr, prop)| { - if prop.is_ranked() { - Some(attr) - } else { - None - } - }, - ) - .collect(); + let ranked_attrs = schema.get_ranked(); let mut words_document_ids = HashMap::new(); for id in idset { diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index 7d9326a43..13b8a1167 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -1,16 +1,15 @@ -use std::collections::{HashMap, BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet}; use heed::Result as ZResult; use fst::{set::OpBuilder, SetBuilder}; use sdset::SetBuf; - -use meilisearch_schema::{Schema, SchemaAttr, diff_transposition, generate_schema}; +use meilisearch_schema::Schema; use crate::database::{MainT, UpdateT}; use crate::settings::{UpdateState, SettingsUpdate}; use crate::update::documents_addition::reindex_all_documents; use crate::update::{next_update_id, Update}; -use crate::{store, MResult}; +use crate::{store, MResult, Error}; pub fn push_settings_update( writer: &mut heed::RwTxn, @@ -35,7 +34,17 @@ pub fn apply_settings_update( let mut must_reindex = false; - let old_schema = index.main.schema(writer)?; + let mut schema = match index.main.schema(writer)? { + Some(schema) => schema, + None => { + match settings.attribute_identifier.clone() { + UpdateState::Update(id) => Schema::with_identifier(id), + _ => return Err(Error::MissingSchemaIdentifier) + } + } + }; + + println!("settings: {:?}", settings); match settings.ranking_rules { UpdateState::Update(v) => { @@ -55,157 +64,69 @@ pub fn apply_settings_update( }, _ => (), } - let identifier = match settings.attribute_identifier.clone() { - UpdateState::Update(v) => v, - _ => { - old_schema.clone().unwrap().identifier_name().to_owned() - }, + + if let UpdateState::Update(id) = settings.attribute_identifier { + schema.set_identifier(id)?; }; - let attributes_searchable: Vec = match settings.attributes_searchable.clone() { - UpdateState::Update(v) => v, - UpdateState::Clear => Vec::new(), - UpdateState::Nothing => { - match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_indexed()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - } + + match settings.attributes_searchable.clone() { + UpdateState::Update(v) => schema.update_indexed(v)?, + UpdateState::Clear => { + let clear: Vec = Vec::new(); + schema.update_indexed(clear)?; }, + UpdateState::Nothing => (), UpdateState::Add(attrs) => { - let mut old_attrs = match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_indexed()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - }; for attr in attrs { - if !old_attrs.contains(&attr) { - old_attrs.push(attr); - } + schema.set_indexed(attr)?; } - old_attrs }, UpdateState::Delete(attrs) => { - let mut old_attrs = match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_indexed()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - }; for attr in attrs { - old_attrs.retain(|x| *x == attr) + schema.remove_indexed(attr); } - old_attrs } }; - let attributes_displayed: Vec = match settings.attributes_displayed.clone() { - UpdateState::Update(v) => v, - UpdateState::Clear => Vec::new(), - UpdateState::Nothing => { - match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_displayed()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - } + match settings.attributes_displayed.clone() { + UpdateState::Update(v) => schema.update_displayed(v)?, + UpdateState::Clear => { + let clear: Vec = Vec::new(); + schema.update_displayed(clear)?; }, + UpdateState::Nothing => (), UpdateState::Add(attrs) => { - let mut old_attrs = match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_displayed()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - }; for attr in attrs { - if !old_attrs.contains(&attr) { - old_attrs.push(attr); - } + schema.set_displayed(attr)?; } - old_attrs }, UpdateState::Delete(attrs) => { - let mut old_attrs = match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_displayed()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - }; for attr in attrs { - old_attrs.retain(|x| *x == attr) + schema.remove_displayed(attr); } - old_attrs } }; - let attributes_ranked: Vec = match settings.attributes_ranked.clone() { - UpdateState::Update(v) => v, - UpdateState::Clear => Vec::new(), - UpdateState::Nothing => { - match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_ranked()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - } + match settings.attributes_ranked.clone() { + UpdateState::Update(v) => schema.update_ranked(v)?, + UpdateState::Clear => { + let clear: Vec = Vec::new(); + schema.update_ranked(clear)?; }, + UpdateState::Nothing => (), UpdateState::Add(attrs) => { - let mut old_attrs = match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_ranked()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - }; for attr in attrs { - if !old_attrs.contains(&attr) { - old_attrs.push(attr); - } + schema.set_ranked(attr)?; } - old_attrs }, UpdateState::Delete(attrs) => { - let mut old_attrs = match old_schema.clone() { - Some(schema) => { - schema.into_iter() - .filter(|(_, props)| props.is_ranked()) - .map(|(name, _)| name) - .collect() - }, - None => Vec::new(), - }; for attr in attrs { - old_attrs.retain(|x| *x == attr) + schema.remove_ranked(attr); } - old_attrs } }; - let new_schema = generate_schema(identifier, attributes_searchable, attributes_displayed, attributes_ranked); + index.main.put_schema(writer, &schema)?; - index.main.put_schema(writer, &new_schema)?; + println!("schema: {:?}", schema); match settings.stop_words { UpdateState::Update(stop_words) => { @@ -233,16 +154,6 @@ pub fn apply_settings_update( let postings_lists_store = index.postings_lists; let docs_words_store = index.docs_words; - if settings.attribute_identifier.is_changed() || - settings.attributes_ranked.is_changed() || - settings.attributes_searchable.is_changed() || - settings.attributes_displayed.is_changed() - { - if let Some(old_schema) = old_schema { - rewrite_all_documents(writer, index, &old_schema, &new_schema)?; - must_reindex = true; - } - } if must_reindex { reindex_all_documents( writer, @@ -438,46 +349,3 @@ pub fn apply_synonyms_update( Ok(()) } - -pub fn rewrite_all_documents( - writer: &mut heed::RwTxn, - index: &store::Index, - old_schema: &Schema, - new_schema: &Schema, -) -> MResult<()> { - - let mut documents_ids_to_reindex = Vec::new(); - - // Retrieve all documents present on the database - for result in index.documents_fields_counts.documents_ids(writer)? { - let document_id = result?; - documents_ids_to_reindex.push(document_id); - } - - let transpotition = diff_transposition(old_schema, new_schema); - - // Rewrite all documents one by one - for id in documents_ids_to_reindex { - let mut document: HashMap> = HashMap::new(); - - // Retrieve the old document - for item in index.documents_fields.document_fields(writer, id)? { - if let Ok(item) = item { - if let Some(pos) = transpotition[(item.0).0 as usize] { - // Save the current document with the new SchemaAttr - document.insert(SchemaAttr::new(pos), item.1.to_vec()); - } - } - } - // Remove the current document - index.documents_fields.del_all_document_fields(writer, id)?; - - // Rewrite the new document - // TODO: use cursor to not do memory jump at each call - for (key, value) in document { - index.documents_fields.put_document_field(writer, id, key, &value)?; - } - } - - Ok(()) -} diff --git a/meilisearch-schema/src/error.rs b/meilisearch-schema/src/error.rs index e37f12fdb..57f73050a 100644 --- a/meilisearch-schema/src/error.rs +++ b/meilisearch-schema/src/error.rs @@ -5,6 +5,7 @@ pub type SResult = Result; #[derive(Debug)] pub enum Error { + FieldNameNotFound(String), MaxFieldsLimitExceeded, } @@ -12,6 +13,7 @@ impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use self::Error::*; match self { + FieldNameNotFound(field) => write!(f, "The field {} doesn't exist", field), MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"), } } diff --git a/meilisearch-schema/src/fields_map.rs b/meilisearch-schema/src/fields_map.rs index 642eea9b9..d81a6d245 100644 --- a/meilisearch-schema/src/fields_map.rs +++ b/meilisearch-schema/src/fields_map.rs @@ -3,9 +3,8 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; -use crate::{SResult, SchemaAttr}; +use crate::{SResult, FieldId}; -pub type FieldId = SchemaAttr; #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FieldsMap { @@ -43,13 +42,13 @@ impl FieldsMap { self.name_map.remove(&name); } - pub fn get_id>(&self, name: S) -> Option<&FieldId> { + pub fn get_id>(&self, name: S) -> Option { let name = name.into(); - self.name_map.get(&name) + self.name_map.get(&name).map(|s| *s) } - pub fn get_name>(&self, id: I) -> Option<&String> { - self.id_map.get(&id.into()) + pub fn get_name>(&self, id: I) -> Option { + self.id_map.get(&id.into()).map(|s| s.to_string()) } pub fn read_from_bin(reader: R) -> bincode::Result { @@ -74,14 +73,14 @@ mod tests { assert_eq!(fields_map.insert("id").unwrap(), 0.into()); assert_eq!(fields_map.insert("title").unwrap(), 1.into()); assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into()); - assert_eq!(fields_map.get_id("id"), Some(&0.into())); - assert_eq!(fields_map.get_id("title"), Some(&1.into())); - assert_eq!(fields_map.get_id("descritpion"), Some(&2.into())); + assert_eq!(fields_map.get_id("id"), Some(0.into())); + assert_eq!(fields_map.get_id("title"), Some(1.into())); + assert_eq!(fields_map.get_id("descritpion"), Some(2.into())); assert_eq!(fields_map.get_id("date"), None); assert_eq!(fields_map.len(), 3); - assert_eq!(fields_map.get_name(0), Some(&"id".to_owned())); - assert_eq!(fields_map.get_name(1), Some(&"title".to_owned())); - assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned())); + assert_eq!(fields_map.get_name(0), Some("id".to_owned())); + assert_eq!(fields_map.get_name(1), Some("title".to_owned())); + assert_eq!(fields_map.get_name(2), Some("descritpion".to_owned())); assert_eq!(fields_map.get_name(4), None); fields_map.remove("title"); assert_eq!(fields_map.get_id("title"), None); diff --git a/meilisearch-schema/src/lib.rs b/meilisearch-schema/src/lib.rs index 502b96828..8090221f1 100644 --- a/meilisearch-schema/src/lib.rs +++ b/meilisearch-schema/src/lib.rs @@ -3,48 +3,88 @@ mod fields_map; mod schema; pub use error::{Error, SResult}; -pub use fields_map::{FieldsMap, FieldId}; -pub use schema::{Schema, IndexedPos}; +pub use fields_map::FieldsMap; +pub use schema::Schema; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)] -pub struct SchemaAttr(pub u16); +pub struct IndexedPos(pub u16); -impl SchemaAttr { - pub const fn new(value: u16) -> SchemaAttr { - SchemaAttr(value) +impl IndexedPos { + pub const fn new(value: u16) -> IndexedPos { + IndexedPos(value) } - pub const fn min() -> SchemaAttr { - SchemaAttr(u16::min_value()) + pub const fn min() -> IndexedPos { + IndexedPos(u16::min_value()) } - pub const fn max() -> SchemaAttr { - SchemaAttr(u16::max_value()) + pub const fn max() -> IndexedPos { + IndexedPos(u16::max_value()) } - pub fn next(self) -> SResult { - self.0.checked_add(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded) + pub fn next(self) -> SResult { + self.0.checked_add(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded) } - pub fn prev(self) -> SResult { - self.0.checked_sub(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded) + pub fn prev(self) -> SResult { + self.0.checked_sub(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded) } } -impl From for SchemaAttr { - fn from(value: u16) -> SchemaAttr { - SchemaAttr(value) +impl From for IndexedPos { + fn from(value: u16) -> IndexedPos { + IndexedPos(value) } } -impl Into for SchemaAttr { +impl Into for IndexedPos { fn into(self) -> u16 { self.0 } } +#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)] +pub struct FieldId(pub u16); + +impl FieldId { + pub const fn new(value: u16) -> FieldId { + FieldId(value) + } + + pub const fn min() -> FieldId { + FieldId(u16::min_value()) + } + + pub const fn max() -> FieldId { + FieldId(u16::max_value()) + } + + pub fn next(self) -> SResult { + self.0.checked_add(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded) + } + + pub fn prev(self) -> SResult { + self.0.checked_sub(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded) + } +} + +impl From for FieldId { + fn from(value: u16) -> FieldId { + FieldId(value) + } +} + +impl Into for FieldId { + fn into(self) -> u16 { + self.0 + } +} + + + + // use std::collections::{BTreeMap, HashMap}; // use std::ops::BitOr; diff --git a/meilisearch-schema/src/schema.rs b/meilisearch-schema/src/schema.rs index a0c738cfa..b3b62fbea 100644 --- a/meilisearch-schema/src/schema.rs +++ b/meilisearch-schema/src/schema.rs @@ -1,10 +1,10 @@ use std::collections::{HashMap, HashSet}; -use crate::{FieldsMap, FieldId, SResult, SchemaAttr}; +use serde::{Serialize, Deserialize}; -pub type IndexedPos = SchemaAttr; +use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos}; -#[derive(Default)] +#[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct Schema { fields_map: FieldsMap, @@ -30,11 +30,21 @@ impl Schema { self.fields_map.get_name(self.identifier).unwrap().to_string() } - pub fn get_id>(&self, name: S) -> Option<&FieldId> { + pub fn set_identifier(&mut self, id: String) -> SResult<()> { + match self.get_id(id.clone()) { + Some(id) => { + self.identifier = id; + Ok(()) + }, + None => Err(Error::FieldNameNotFound(id)) + } + } + + pub fn get_id>(&self, name: S) -> Option { self.fields_map.get_id(name) } - pub fn get_name>(&self, id: I) -> Option<&String> { + pub fn get_name>(&self, id: I) -> Option { self.fields_map.get_name(id) } @@ -52,7 +62,7 @@ impl Schema { pub fn get_or_create + std::clone::Clone>(&mut self, name: S) -> SResult { match self.fields_map.get_id(name.clone()) { Some(id) => { - Ok(*id) + Ok(id) } None => { self.set_indexed(name.clone())?; @@ -61,6 +71,30 @@ impl Schema { } } + pub fn get_ranked(&self) -> HashSet { + self.ranked.clone() + } + + pub fn get_ranked_name(&self) -> HashSet { + self.ranked.iter().filter_map(|a| self.get_name(*a)).collect() + } + + pub fn get_displayed(&self) -> HashSet { + self.displayed.clone() + } + + pub fn get_displayed_name(&self) -> HashSet { + self.displayed.iter().filter_map(|a| self.get_name(*a)).collect() + } + + pub fn get_indexed(&self) -> Vec { + self.indexed.clone() + } + + pub fn get_indexed_name(&self) -> Vec { + self.indexed.iter().filter_map(|a| self.get_name(*a)).collect() + } + pub fn set_ranked>(&mut self, name: S) -> SResult { let id = self.fields_map.insert(name.into())?; self.ranked.insert(id); @@ -81,23 +115,42 @@ impl Schema { Ok((id, pos.into())) } - pub fn is_ranked>(&self, name: S) -> Option<&FieldId> { + pub fn remove_ranked>(&mut self, name: S) { + if let Some(id) = self.fields_map.get_id(name.into()) { + self.ranked.remove(&id); + } + } + + pub fn remove_displayed>(&mut self, name: S) { + if let Some(id) = self.fields_map.get_id(name.into()) { + self.displayed.remove(&id); + } + } + + pub fn remove_indexed>(&mut self, name: S) { + if let Some(id) = self.fields_map.get_id(name.into()) { + self.indexed_map.remove(&id); + self.indexed.retain(|x| *x != id); + } + } + + pub fn is_ranked>(&self, name: S) -> Option { match self.fields_map.get_id(name.into()) { - Some(id) => self.ranked.get(id), + Some(id) => self.ranked.get(&id).map(|s| *s), None => None, } } - pub fn is_displayed>(&self, name: S) -> Option<&FieldId> { + pub fn is_displayed>(&self, name: S) -> Option { match self.fields_map.get_id(name.into()) { - Some(id) => self.displayed.get(id), + Some(id) => self.displayed.get(&id).map(|s| *s), None => None, } } - pub fn is_indexed>(&self, name: S) -> Option<&IndexedPos> { + pub fn is_indexed>(&self, name: S) -> Option { match self.fields_map.get_id(name.into()) { - Some(id) => self.indexed_map.get(id), + Some(id) => self.indexed_map.get(&id).map(|s| *s), None => None, } } diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs index d37618eb9..54202edbc 100644 --- a/meilisearch-types/src/lib.rs +++ b/meilisearch-types/src/lib.rs @@ -28,6 +28,7 @@ pub struct DocIndex { /// The attribute in the document where the word was found /// along with the index in it. + /// Is an IndexedPos and not FieldId. Must be convert each time. pub attribute: u16, pub word_index: u16,