introduce a new schemaless way

This commit is contained in:
qdequele 2020-01-13 19:10:58 +01:00
parent bbe1845f66
commit 130fb74928
No known key found for this signature in database
GPG Key ID: B3F0A000EBF11745
22 changed files with 365 additions and 418 deletions

View File

@ -7,6 +7,5 @@
"overview", "overview",
"release_date", "release_date",
"poster" "poster"
], ]
"attributes_ranked": ["release_date"]
} }

View File

@ -13,7 +13,8 @@ use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult}; use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
use meilisearch_schema::SchemaAttr; use meilisearch_core::settings::Settings;
use meilisearch_schema::FieldId;
// #[cfg(target_os = "linux")] // #[cfg(target_os = "linux")]
#[global_allocator] #[global_allocator]
@ -121,7 +122,8 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
let settings = { let settings = {
let string = fs::read_to_string(&command.settings)?; let string = fs::read_to_string(&command.settings)?;
serde_json::from_str(&string).unwrap() let settings: Settings = serde_json::from_str(&string).unwrap();
settings.into()
}; };
let mut update_writer = db.update_write_txn().unwrap(); let mut update_writer = db.update_write_txn().unwrap();
@ -357,7 +359,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
}; };
let attr = schema let attr = schema
.attribute(&filter) .get_id(filter)
.expect("Could not find filtered attribute"); .expect("Could not find filtered attribute");
builder.with_filter(move |document_id| { builder.with_filter(move |document_id| {
@ -388,11 +390,11 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
for (name, text) in document.0 { for (name, text) in document.0 {
print!("{}: ", name); print!("{}: ", name);
let attr = schema.attribute(&name).unwrap(); let attr = schema.get_id(&name).unwrap();
let highlights = doc let highlights = doc
.highlights .highlights
.iter() .iter()
.filter(|m| SchemaAttr::new(m.attribute) == attr) .filter(|m| FieldId::new(m.attribute) == attr)
.cloned(); .cloned();
let (text, highlights) = let (text, highlights) =
crop_text(&text, highlights, command.char_context); crop_text(&text, highlights, command.char_context);
@ -407,8 +409,8 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
let mut matching_attributes = HashSet::new(); let mut matching_attributes = HashSet::new();
for highlight in doc.highlights { for highlight in doc.highlights {
let attr = SchemaAttr::new(highlight.attribute); let attr = FieldId::new(highlight.attribute);
let name = schema.attribute_name(attr); let name = schema.get_name(attr);
matching_attributes.insert(name); matching_attributes.insert(name);
} }

View File

@ -1,6 +1,6 @@
use std::cmp::{Ordering, Reverse}; use std::cmp::{Ordering, Reverse};
use std::collections::hash_map::{HashMap, Entry}; use std::collections::hash_map::{HashMap, Entry};
use meilisearch_schema::SchemaAttr; use meilisearch_schema::IndexedPos;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::{RawDocument, MResult}; use crate::{RawDocument, MResult};
use crate::bucket_sort::BareMatch; use crate::bucket_sort::BareMatch;
@ -32,7 +32,7 @@ impl Criterion for Exact {
for bm in group { for bm in group {
for di in ctx.postings_lists[bm.postings_list].as_ref() { for di in ctx.postings_lists[bm.postings_list].as_ref() {
let attr = SchemaAttr(di.attribute); let attr = IndexedPos(di.attribute);
let count = match fields_counts.entry(attr) { let count = match fields_counts.entry(attr) {
Entry::Occupied(entry) => *entry.get(), Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {

View File

@ -69,7 +69,7 @@ impl<'a> SortByAttr<'a> {
reversed: bool, reversed: bool,
) -> Result<SortByAttr<'a>, SortByAttrError> { ) -> Result<SortByAttr<'a>, SortByAttrError> {
let field_id = match schema.get_id(attr_name) { let field_id = match schema.get_id(attr_name) {
Some(field_id) => *field_id, Some(field_id) => field_id,
None => return Err(SortByAttrError::AttributeNotFound), None => return Err(SortByAttrError::AttributeNotFound),
}; };

View File

@ -8,11 +8,12 @@ pub type MResult<T> = Result<T, Error>;
pub enum Error { pub enum Error {
Io(io::Error), Io(io::Error),
IndexAlreadyExists, IndexAlreadyExists,
SchemaDiffer, MissingSchemaIdentifier,
SchemaMissing, SchemaMissing,
WordIndexMissing, WordIndexMissing,
MissingDocumentId, MissingDocumentId,
MaxFieldsLimitExceeded, MaxFieldsLimitExceeded,
Schema(meilisearch_schema::Error),
Zlmdb(heed::Error), Zlmdb(heed::Error),
Fst(fst::Error), Fst(fst::Error),
SerdeJson(SerdeJsonError), SerdeJson(SerdeJsonError),
@ -28,6 +29,12 @@ impl From<io::Error> for Error {
} }
} }
impl From<meilisearch_schema::Error> for Error {
fn from(error: meilisearch_schema::Error) -> Error {
Error::Schema(error)
}
}
impl From<heed::Error> for Error { impl From<heed::Error> for Error {
fn from(error: heed::Error) -> Error { fn from(error: heed::Error) -> Error {
Error::Zlmdb(error) Error::Zlmdb(error)
@ -76,10 +83,12 @@ impl fmt::Display for Error {
match self { match self {
Io(e) => write!(f, "{}", e), Io(e) => write!(f, "{}", e),
IndexAlreadyExists => write!(f, "index already exists"), IndexAlreadyExists => write!(f, "index already exists"),
SchemaDiffer => write!(f, "schemas differ"), MissingSchemaIdentifier => write!(f, "schema cannot be build without identifier"),
SchemaMissing => write!(f, "this index does not have a schema"), SchemaMissing => write!(f, "this index does not have a schema"),
WordIndexMissing => write!(f, "this index does not have a word index"), WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"), MissingDocumentId => write!(f, "document id is missing"),
MaxFieldsLimitExceeded => write!(f, "maximum field in a document is exceeded"),
Schema(e) => write!(f, "schemas error; {}", e),
Zlmdb(e) => write!(f, "heed error; {}", e), Zlmdb(e) => write!(f, "heed error; {}", e),
Fst(e) => write!(f, "fst error; {}", e), Fst(e) => write!(f, "fst error; {}", e),
SerdeJson(e) => write!(f, "serde json error; {}", e), SerdeJson(e) => write!(f, "serde json error; {}", e),

View File

@ -136,7 +136,7 @@ mod tests {
use std::iter::FromIterator; use std::iter::FromIterator;
use fst::{IntoStreamer, Set}; use fst::{IntoStreamer, Set};
use meilisearch_schema::SchemaAttr; use meilisearch_schema::IndexedPos;
use sdset::SetBuf; use sdset::SetBuf;
use tempfile::TempDir; use tempfile::TempDir;
@ -295,14 +295,14 @@ mod tests {
for ((docid, attr, _), count) in fields_counts { for ((docid, attr, _), count) in fields_counts {
let prev = index let prev = index
.documents_fields_counts .documents_fields_counts
.document_field_count(&mut writer, docid, SchemaAttr(attr)) .document_field_count(&mut writer, docid, IndexedPos(attr))
.unwrap(); .unwrap();
let prev = prev.unwrap_or(0); let prev = prev.unwrap_or(0);
index index
.documents_fields_counts .documents_fields_counts
.put_document_field_count(&mut writer, docid, SchemaAttr(attr), prev + count) .put_document_field_count(&mut writer, docid, IndexedPos(attr), prev + count)
.unwrap(); .unwrap();
} }

View File

@ -180,16 +180,16 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O
mod tests { mod tests {
use super::*; use super::*;
use meilisearch_schema::SchemaAttr; use meilisearch_schema::IndexedPos;
#[test] #[test]
fn strange_apostrophe() { fn strange_apostrophe() {
let mut indexer = RawIndexer::new(fst::Set::default()); let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0); let docid = DocumentId(0);
let attr = SchemaAttr(0); let indexed_pos = IndexedPos(0);
let text = "Zut, laspirateur, jai oublié de léteindre !"; let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text); indexer.index_text(docid, indexed_pos, text);
let Indexed { let Indexed {
words_doc_indexes, .. words_doc_indexes, ..
@ -209,9 +209,9 @@ mod tests {
let mut indexer = RawIndexer::new(fst::Set::default()); let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0); let docid = DocumentId(0);
let attr = SchemaAttr(0); let indexed_pos = IndexedPos(0);
let text = vec!["Zut, laspirateur, jai oublié de léteindre !"]; let text = vec!["Zut, laspirateur, jai oublié de léteindre !"];
indexer.index_text_seq(docid, attr, text); indexer.index_text_seq(docid, indexed_pos, text);
let Indexed { let Indexed {
words_doc_indexes, .. words_doc_indexes, ..
@ -234,9 +234,9 @@ mod tests {
let mut indexer = RawIndexer::new(stop_words); let mut indexer = RawIndexer::new(stop_words);
let docid = DocumentId(0); let docid = DocumentId(0);
let attr = SchemaAttr(0); let indexed_pos = IndexedPos(0);
let text = "Zut, laspirateur, jai oublié de léteindre !"; let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text); indexer.index_text(docid, indexed_pos, text);
let Indexed { let Indexed {
words_doc_indexes, .. words_doc_indexes, ..
@ -258,9 +258,9 @@ mod tests {
let mut indexer = RawIndexer::new(fst::Set::default()); let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0); let docid = DocumentId(0);
let attr = SchemaAttr(0); let indexed_pos = IndexedPos(0);
let text = "🇯🇵"; let text = "🇯🇵";
indexer.index_text(docid, attr, text); indexer.index_text(docid, indexed_pos, text);
let Indexed { let Indexed {
words_doc_indexes, .. words_doc_indexes, ..

View File

@ -99,7 +99,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
let ioread = SerdeJsonIoRead::new(cursor); let ioread = SerdeJsonIoRead::new(cursor);
let value = Value(SerdeJsonDeserializer::new(ioread)); let value = Value(SerdeJsonDeserializer::new(ioread));
Some((*attribute_name, value)) Some((attribute_name, value))
} else { } else {
None None
} }

View File

@ -20,7 +20,7 @@ pub use self::convert_to_string::ConvertToString;
pub use self::deserializer::{Deserializer, DeserializerError}; pub use self::deserializer::{Deserializer, DeserializerError};
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string}; pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
pub use self::indexer::Indexer; pub use self::indexer::Indexer;
pub use self::serializer::{serialize_value, Serializer}; pub use self::serializer::{serialize_value, serialize_value_with_id, Serializer};
use std::{error::Error, fmt}; use std::{error::Error, fmt};

View File

@ -1,4 +1,4 @@
use meilisearch_schema::{Schema, FieldsMap}; use meilisearch_schema::{Schema, FieldId};
use serde::ser; use serde::ser;
use crate::database::MainT; use crate::database::MainT;
@ -10,12 +10,11 @@ use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
pub struct Serializer<'a, 'b> { pub struct Serializer<'a, 'b> {
pub txn: &'a mut heed::RwTxn<'b, MainT>, pub txn: &'a mut heed::RwTxn<'b, MainT>,
pub schema: &'a Schema, pub schema: &'a mut Schema,
pub document_store: DocumentsFields, pub document_store: DocumentsFields,
pub document_fields_counts: DocumentsFieldsCounts, pub document_fields_counts: DocumentsFieldsCounts,
pub indexer: &'a mut RawIndexer, pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap, pub ranked_map: &'a mut RankedMap,
pub fields_map: &'a mut FieldsMap,
pub document_id: DocumentId, pub document_id: DocumentId,
} }
@ -159,7 +158,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts, document_fields_counts: self.document_fields_counts,
indexer: self.indexer, indexer: self.indexer,
ranked_map: self.ranked_map, ranked_map: self.ranked_map,
fields_map: self.fields_map,
current_key_name: None, current_key_name: None,
}) })
} }
@ -177,7 +175,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts, document_fields_counts: self.document_fields_counts,
indexer: self.indexer, indexer: self.indexer,
ranked_map: self.ranked_map, ranked_map: self.ranked_map,
fields_map: self.fields_map,
}) })
} }
@ -196,13 +193,12 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
pub struct MapSerializer<'a, 'b> { pub struct MapSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b, MainT>, txn: &'a mut heed::RwTxn<'b, MainT>,
schema: &'a Schema, schema: &'a mut Schema,
document_id: DocumentId, document_id: DocumentId,
document_store: DocumentsFields, document_store: DocumentsFields,
document_fields_counts: DocumentsFieldsCounts, document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer, indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap, ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
current_key_name: Option<String>, current_key_name: Option<String>,
} }
@ -237,21 +233,17 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
V: ser::Serialize, V: ser::Serialize,
{ {
let key = key.serialize(ConvertToString)?; let key = key.serialize(ConvertToString)?;
match self.schema.attribute(&key) { serialize_value(
Some(attribute) => serialize_value(
self.txn, self.txn,
attribute, key,
self.schema.props(attribute), self.schema,
self.document_id, self.document_id,
self.document_store, self.document_store,
self.document_fields_counts, self.document_fields_counts,
self.indexer, self.indexer,
self.ranked_map, self.ranked_map,
self.fields_map,
value, value,
), )
None => Ok(()),
}
} }
fn end(self) -> Result<Self::Ok, Self::Error> { fn end(self) -> Result<Self::Ok, Self::Error> {
@ -261,13 +253,12 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
pub struct StructSerializer<'a, 'b> { pub struct StructSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b, MainT>, txn: &'a mut heed::RwTxn<'b, MainT>,
schema: &'a Schema, schema: &'a mut Schema,
document_id: DocumentId, document_id: DocumentId,
document_store: DocumentsFields, document_store: DocumentsFields,
document_fields_counts: DocumentsFieldsCounts, document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer, indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap, ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
} }
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
@ -282,19 +273,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
where where
T: ser::Serialize, T: ser::Serialize,
{ {
// let id = fields_map.insert(key)?;
// let attribute = match self.schema.attribute(id) {
// Some(attribute) => attribute,
// None => {
// },
// }
serialize_value( serialize_value(
self.txn, self.txn,
attribute, key.to_string(),
self.schema.props(attribute), self.schema,
self.document_id, self.document_id,
self.document_store, self.document_store,
self.document_fields_counts, self.document_fields_counts,
@ -311,7 +293,36 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
pub fn serialize_value<'a, T: ?Sized>( pub fn serialize_value<'a, T: ?Sized>(
txn: &mut heed::RwTxn<MainT>, txn: &mut heed::RwTxn<MainT>,
attribute: &'static str, attribute: String,
schema: &'a mut Schema,
document_id: DocumentId,
document_store: DocumentsFields,
documents_fields_counts: DocumentsFieldsCounts,
indexer: &mut RawIndexer,
ranked_map: &mut RankedMap,
value: &T,
) -> Result<(), SerializerError>
where
T: ser::Serialize,
{
let field_id = schema.get_or_create(attribute)?;
serialize_value_with_id(
txn,
field_id,
schema,
document_id,
document_store,
documents_fields_counts,
indexer,
ranked_map,
value
)
}
pub fn serialize_value_with_id<'a, T: ?Sized>(
txn: &mut heed::RwTxn<MainT>,
field_id: FieldId,
schema: &'a Schema, schema: &'a Schema,
document_id: DocumentId, document_id: DocumentId,
document_store: DocumentsFields, document_store: DocumentsFields,
@ -324,12 +335,11 @@ where
T: ser::Serialize, T: ser::Serialize,
{ {
let serialized = serde_json::to_vec(value)?; let serialized = serde_json::to_vec(value)?;
let field_id = schema.get_or_create(attribute)?;
document_store.put_document_field(txn, document_id, field_id, &serialized)?; document_store.put_document_field(txn, document_id, field_id, &serialized)?;
if let Some(indexed_pos) = schema.id_is_indexed(field_id) { if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
let indexer = Indexer { let indexer = Indexer {
field_id, pos: *indexed_pos,
indexer, indexer,
document_id, document_id,
}; };
@ -337,13 +347,13 @@ where
documents_fields_counts.put_document_field_count( documents_fields_counts.put_document_field_count(
txn, txn,
document_id, document_id,
field_id, *indexed_pos,
number_of_words as u16, number_of_words as u16,
)?; )?;
} }
} }
if let Some(field_id) = schema.id_is_ranked(field_id) { if schema.id_is_ranked(field_id) {
let number = value.serialize(ConvertToNumber)?; let number = value.serialize(ConvertToNumber)?;
ranked_map.insert(document_id, field_id, number); ranked_map.insert(document_id, field_id, number);
} }

View File

@ -1,14 +1,14 @@
use heed::types::{ByteSlice, OwnedType}; use heed::types::{ByteSlice, OwnedType};
use crate::database::MainT; use crate::database::MainT;
use heed::Result as ZResult; use heed::Result as ZResult;
use meilisearch_schema::SchemaAttr; use meilisearch_schema::FieldId;
use super::DocumentAttrKey; use super::DocumentFieldStoredKey;
use crate::DocumentId; use crate::DocumentId;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct DocumentsFields { pub struct DocumentsFields {
pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>, pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>,
} }
impl DocumentsFields { impl DocumentsFields {
@ -16,10 +16,10 @@ impl DocumentsFields {
self, self,
writer: &mut heed::RwTxn<MainT>, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: FieldId,
value: &[u8], value: &[u8],
) -> ZResult<()> { ) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute); let key = DocumentFieldStoredKey::new(document_id, attribute);
self.documents_fields.put(writer, &key, value) self.documents_fields.put(writer, &key, value)
} }
@ -28,8 +28,8 @@ impl DocumentsFields {
writer: &mut heed::RwTxn<MainT>, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<usize> { ) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
self.documents_fields.delete_range(writer, &(start..=end)) self.documents_fields.delete_range(writer, &(start..=end))
} }
@ -41,9 +41,9 @@ impl DocumentsFields {
self, self,
reader: &'txn heed::RoTxn<MainT>, reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: FieldId,
) -> ZResult<Option<&'txn [u8]>> { ) -> ZResult<Option<&'txn [u8]>> {
let key = DocumentAttrKey::new(document_id, attribute); let key = DocumentFieldStoredKey::new(document_id, attribute);
self.documents_fields.get(reader, &key) self.documents_fields.get(reader, &key)
} }
@ -52,25 +52,25 @@ impl DocumentsFields {
reader: &'txn heed::RoTxn<MainT>, reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<DocumentFieldsIter<'txn>> { ) -> ZResult<DocumentFieldsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
let iter = self.documents_fields.range(reader, &(start..=end))?; let iter = self.documents_fields.range(reader, &(start..=end))?;
Ok(DocumentFieldsIter { iter }) Ok(DocumentFieldsIter { iter })
} }
} }
pub struct DocumentFieldsIter<'txn> { pub struct DocumentFieldsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>, iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>,
} }
impl<'txn> Iterator for DocumentFieldsIter<'txn> { impl<'txn> Iterator for DocumentFieldsIter<'txn> {
type Item = ZResult<(SchemaAttr, &'txn [u8])>; type Item = ZResult<(FieldId, &'txn [u8])>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() { match self.iter.next() {
Some(Ok((key, bytes))) => { Some(Ok((key, bytes))) => {
let attr = SchemaAttr(key.attr.get()); let field_id = FieldId(key.field_id.get());
Some(Ok((attr, bytes))) Some(Ok((field_id, bytes)))
} }
Some(Err(e)) => Some(Err(e)), Some(Err(e)) => Some(Err(e)),
None => None, None => None,

View File

@ -1,13 +1,13 @@
use super::DocumentAttrKey; use super::DocumentFieldIndexedKey;
use crate::database::MainT; use crate::database::MainT;
use crate::DocumentId; use crate::DocumentId;
use heed::types::OwnedType; use heed::types::OwnedType;
use heed::Result as ZResult; use heed::Result as ZResult;
use meilisearch_schema::FieldId; use meilisearch_schema::IndexedPos;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts { pub struct DocumentsFieldsCounts {
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u16>>, pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
} }
impl DocumentsFieldsCounts { impl DocumentsFieldsCounts {
@ -15,10 +15,10 @@ impl DocumentsFieldsCounts {
self, self,
writer: &mut heed::RwTxn<MainT>, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: FieldId, attribute: IndexedPos,
value: u16, value: u16,
) -> ZResult<()> { ) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute); let key = DocumentFieldIndexedKey::new(document_id, attribute);
self.documents_fields_counts.put(writer, &key, &value) self.documents_fields_counts.put(writer, &key, &value)
} }
@ -27,10 +27,9 @@ impl DocumentsFieldsCounts {
writer: &mut heed::RwTxn<MainT>, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<usize> { ) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, FieldId::min()); let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
let end = DocumentAttrKey::new(document_id, FieldId::max()); let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
self.documents_fields_counts self.documents_fields_counts.delete_range(writer, &(start..=end))
.delete_range(writer, &(start..=end))
} }
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> { pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
@ -41,9 +40,9 @@ impl DocumentsFieldsCounts {
self, self,
reader: &heed::RoTxn<MainT>, reader: &heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: FieldId, attribute: IndexedPos,
) -> ZResult<Option<u16>> { ) -> ZResult<Option<u16>> {
let key = DocumentAttrKey::new(document_id, attribute); let key = DocumentFieldIndexedKey::new(document_id, attribute);
match self.documents_fields_counts.get(reader, &key)? { match self.documents_fields_counts.get(reader, &key)? {
Some(count) => Ok(Some(count)), Some(count) => Ok(Some(count)),
None => Ok(None), None => Ok(None),
@ -55,8 +54,8 @@ impl DocumentsFieldsCounts {
reader: &'txn heed::RoTxn<MainT>, reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<DocumentFieldsCountsIter<'txn>> { ) -> ZResult<DocumentFieldsCountsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, FieldId::min()); let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
let end = DocumentAttrKey::new(document_id, FieldId::max()); let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
let iter = self.documents_fields_counts.range(reader, &(start..=end))?; let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
Ok(DocumentFieldsCountsIter { iter }) Ok(DocumentFieldsCountsIter { iter })
} }
@ -79,17 +78,17 @@ impl DocumentsFieldsCounts {
} }
pub struct DocumentFieldsCountsIter<'txn> { pub struct DocumentFieldsCountsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>, iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
} }
impl Iterator for DocumentFieldsCountsIter<'_> { impl Iterator for DocumentFieldsCountsIter<'_> {
type Item = ZResult<(FieldId, u16)>; type Item = ZResult<(IndexedPos, u16)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() { match self.iter.next() {
Some(Ok((key, count))) => { Some(Ok((key, count))) => {
let attr = FieldId(key.attr.get()); let indexed_pos = IndexedPos(key.indexed_pos.get());
Some(Ok((attr, count))) Some(Ok((indexed_pos, count)))
} }
Some(Err(e)) => Some(Err(e)), Some(Err(e)) => Some(Err(e)),
None => None, None => None,
@ -99,7 +98,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> {
pub struct DocumentsIdsIter<'txn> { pub struct DocumentsIdsIter<'txn> {
last_seen_id: Option<DocumentId>, last_seen_id: Option<DocumentId>,
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>, iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
} }
impl Iterator for DocumentsIdsIter<'_> { impl Iterator for DocumentsIdsIter<'_> {
@ -123,18 +122,18 @@ impl Iterator for DocumentsIdsIter<'_> {
} }
pub struct AllDocumentsFieldsCountsIter<'txn> { pub struct AllDocumentsFieldsCountsIter<'txn> {
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>, iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
} }
impl Iterator for AllDocumentsFieldsCountsIter<'_> { impl Iterator for AllDocumentsFieldsCountsIter<'_> {
type Item = ZResult<(DocumentId, FieldId, u16)>; type Item = ZResult<(DocumentId, IndexedPos, u16)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() { match self.iter.next() {
Some(Ok((key, count))) => { Some(Ok((key, count))) => {
let docid = DocumentId(key.docid.get()); let docid = DocumentId(key.docid.get());
let attr = FieldId(key.attr.get()); let indexed_pos = IndexedPos(key.indexed_pos.get());
Some(Ok((docid, attr, count))) Some(Ok((docid, indexed_pos, count)))
} }
Some(Err(e)) => Some(Err(e)), Some(Err(e)) => Some(Err(e)),
None => None, None => None,

View File

@ -1,12 +1,13 @@
use crate::fields_map::FieldsMap; use std::sync::Arc;
use crate::database::MainT; use std::collections::{HashMap, BTreeMap, BTreeSet};
use crate::RankedMap;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str}; use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
use heed::Result as ZResult; use heed::Result as ZResult;
use meilisearch_schema::Schema; use meilisearch_schema::Schema;
use std::collections::{HashMap, BTreeMap, BTreeSet};
use std::sync::Arc; use crate::database::MainT;
use crate::RankedMap;
const CREATED_AT_KEY: &str = "created-at"; const CREATED_AT_KEY: &str = "created-at";
const RANKING_RULES_KEY: &str = "ranking-rules-key"; const RANKING_RULES_KEY: &str = "ranking-rules-key";
@ -18,7 +19,6 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
const NAME_KEY: &str = "name"; const NAME_KEY: &str = "name";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map"; const RANKED_MAP_KEY: &str = "ranked-map";
const FIELDS_MAP_KEY: &str = "fields-map";
const SCHEMA_KEY: &str = "schema"; const SCHEMA_KEY: &str = "schema";
const UPDATED_AT_KEY: &str = "updated-at"; const UPDATED_AT_KEY: &str = "updated-at";
const WORDS_KEY: &str = "words"; const WORDS_KEY: &str = "words";
@ -114,16 +114,6 @@ impl Main {
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY) .get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
} }
pub fn put_fields_map(self, writer: &mut heed::RwTxn<MainT>, fields_map: &FieldsMap) -> ZResult<()> {
self.main
.put::<_, Str, SerdeBincode<FieldsMap>>(writer, FIELDS_MAP_KEY, &fields_map)
}
pub fn fields_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FieldsMap>> {
self.main
.get::<_, Str, SerdeBincode<FieldsMap>>(reader, FIELDS_MAP_KEY)
}
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> { pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes(); let bytes = fst.as_fst().as_bytes();
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes) self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)

View File

@ -43,18 +43,50 @@ use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MR
type BEU64 = zerocopy::U64<byteorder::BigEndian>; type BEU64 = zerocopy::U64<byteorder::BigEndian>;
type BEU16 = zerocopy::U16<byteorder::BigEndian>; type BEU16 = zerocopy::U16<byteorder::BigEndian>;
// #[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
// #[repr(C)]
// pub struct DocumentAttrKey {
// docid: BEU64,
// indexed_pos: BEU16,
// }
// impl DocumentAttrKey {
// fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentAttrKey {
// DocumentAttrKey {
// docid: BEU64::new(docid.0),
// indexed_pos: BEU16::new(indexed_pos.0),
// }
// }
// }
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)] #[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)] #[repr(C)]
pub struct DocumentAttrKey { pub struct DocumentFieldIndexedKey {
docid: BEU64, docid: BEU64,
attr: BEU16, indexed_pos: BEU16,
} }
impl DocumentAttrKey { impl DocumentFieldIndexedKey {
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey { fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
DocumentAttrKey { DocumentFieldIndexedKey {
docid: BEU64::new(docid.0), docid: BEU64::new(docid.0),
attr: BEU16::new(attr.0), indexed_pos: BEU16::new(indexed_pos.0),
}
}
}
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentFieldStoredKey {
docid: BEU64,
field_id: BEU16,
}
impl DocumentFieldStoredKey {
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
DocumentFieldStoredKey {
docid: BEU64::new(docid.0),
field_id: BEU16::new(field_id.0),
} }
} }
} }
@ -228,7 +260,7 @@ impl Index {
&self, &self,
reader: &heed::RoTxn<MainT>, reader: &heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: FieldId,
) -> MResult<Option<T>> { ) -> MResult<Option<T>> {
let bytes = self let bytes = self
.documents_fields .documents_fields

View File

@ -1,14 +1,13 @@
use std::collections::{HashMap, BTreeSet}; use std::collections::HashMap;
use fst::{set::OpBuilder, SetBuilder}; use fst::{set::OpBuilder, SetBuilder};
use sdset::{duo::Union, SetOperation}; use sdset::{duo::Union, SetOperation};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use meilisearch_schema::{Schema, DISPLAYED, INDEXED};
use crate::database::{MainT, UpdateT}; use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::raw_indexer::RawIndexer; use crate::raw_indexer::RawIndexer;
use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer}; use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer};
use crate::store; use crate::store;
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
use crate::{Error, MResult, RankedMap}; use crate::{Error, MResult, RankedMap};
@ -115,16 +114,11 @@ pub fn apply_documents_addition<'a, 'b>(
None => return Err(Error::SchemaMissing), None => return Err(Error::SchemaMissing),
}; };
if let Some(new_schema) = lazy_new_schema(&schema, &addition) { let identifier = schema.identifier();
main_store.put_schema(writer, &new_schema)?;
schema = new_schema;
}
let identifier = schema.identifier_name();
// 1. store documents ids for future deletion // 1. store documents ids for future deletion
for document in addition { for document in addition {
let document_id = match extract_document_id(identifier, &document)? { let document_id = match extract_document_id(&identifier, &document)? {
Some(id) => id, Some(id) => id,
None => return Err(Error::MissingDocumentId), None => return Err(Error::MissingDocumentId),
}; };
@ -147,8 +141,6 @@ pub fn apply_documents_addition<'a, 'b>(
None => fst::Set::default(), None => fst::Set::default(),
}; };
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 3. index the documents fields in the stores // 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words); let mut indexer = RawIndexer::new(stop_words);
@ -160,7 +152,6 @@ pub fn apply_documents_addition<'a, 'b>(
document_fields_counts: index.documents_fields_counts, document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer, indexer: &mut indexer,
ranked_map: &mut ranked_map, ranked_map: &mut ranked_map,
fields_map: &mut fields_map,
document_id, document_id,
}; };
@ -192,16 +183,11 @@ pub fn apply_documents_partial_addition<'a, 'b>(
None => return Err(Error::SchemaMissing), None => return Err(Error::SchemaMissing),
}; };
if let Some(new_schema) = lazy_new_schema(&schema, &addition) { let identifier = schema.identifier();
main_store.put_schema(writer, &new_schema)?;
schema = new_schema;
}
let identifier = schema.identifier_name();
// 1. store documents ids for future deletion // 1. store documents ids for future deletion
for mut document in addition { for mut document in addition {
let document_id = match extract_document_id(identifier, &document)? { let document_id = match extract_document_id(&identifier, &document)? {
Some(id) => id, Some(id) => id,
None => return Err(Error::MissingDocumentId), None => return Err(Error::MissingDocumentId),
}; };
@ -241,8 +227,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
None => fst::Set::default(), None => fst::Set::default(),
}; };
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 3. index the documents fields in the stores // 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words); let mut indexer = RawIndexer::new(stop_words);
@ -254,7 +238,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
document_fields_counts: index.documents_fields_counts, document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer, indexer: &mut indexer,
ranked_map: &mut ranked_map, ranked_map: &mut ranked_map,
fields_map: &mut fields_map,
document_id, document_id,
}; };
@ -281,7 +264,6 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
}; };
let mut ranked_map = RankedMap::default(); let mut ranked_map = RankedMap::default();
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 1. retrieve all documents ids // 1. retrieve all documents ids
let mut documents_ids_to_reindex = Vec::new(); let mut documents_ids_to_reindex = Vec::new();
@ -312,21 +294,20 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
for result in index.documents_fields.document_fields(writer, *document_id)? { for result in index.documents_fields.document_fields(writer, *document_id)? {
let (attr, bytes) = result?; let (attr, bytes) = result?;
let value: serde_json::Value = serde_json::from_slice(bytes)?; let value: serde_json::Value = serde_json::from_slice(bytes)?;
ram_store.insert((document_id, attr), value); ram_store.insert((document_id, field_id), value);
} }
for ((docid, attr), value) in ram_store.drain() { for ((docid, field_id), value) in ram_store.drain() {
serialize_value( serialize_value_with_id(
writer, writer,
attr, field_id,
schema.props(attr), &schema,
*docid, *docid,
index.documents_fields, index.documents_fields,
index.documents_fields_counts, index.documents_fields_counts,
&mut indexer, &mut indexer,
&mut ranked_map, &mut ranked_map,
&mut fields_map, &value
&value,
)?; )?;
} }
} }
@ -401,30 +382,3 @@ pub fn write_documents_addition_index(
Ok(()) Ok(())
} }
pub fn lazy_new_schema(
schema: &Schema,
documents: &[HashMap<String, serde_json::Value>],
) -> Option<Schema> {
let mut attributes_to_add = BTreeSet::new();
for document in documents {
for (key, _) in document {
if schema.attribute(key).is_none() {
attributes_to_add.insert(key);
}
}
}
if attributes_to_add.is_empty() {
return None
}
let mut schema_builder = schema.to_builder();
for attribute in attributes_to_add {
schema_builder.new_attribute(attribute, DISPLAYED | INDEXED);
}
let schema = schema_builder.build();
Some(schema)
}

View File

@ -40,8 +40,8 @@ impl DocumentsDeletion {
where where
D: serde::Serialize, D: serde::Serialize,
{ {
let identifier = schema.identifier_name(); let identifier = schema.identifier();
let document_id = match extract_document_id(identifier, &document)? { let document_id = match extract_document_id(&identifier, &document)? {
Some(id) => id, Some(id) => id,
None => return Err(Error::MissingDocumentId), None => return Err(Error::MissingDocumentId),
}; };
@ -101,18 +101,7 @@ pub fn apply_documents_deletion(
}; };
// collect the ranked attributes according to the schema // collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema let ranked_attrs = schema.get_ranked();
.iter()
.filter_map(
|(_, attr, prop)| {
if prop.is_ranked() {
Some(attr)
} else {
None
}
},
)
.collect();
let mut words_document_ids = HashMap::new(); let mut words_document_ids = HashMap::new();
for id in idset { for id in idset {

View File

@ -1,16 +1,15 @@
use std::collections::{HashMap, BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use heed::Result as ZResult; use heed::Result as ZResult;
use fst::{set::OpBuilder, SetBuilder}; use fst::{set::OpBuilder, SetBuilder};
use sdset::SetBuf; use sdset::SetBuf;
use meilisearch_schema::Schema;
use meilisearch_schema::{Schema, SchemaAttr, diff_transposition, generate_schema};
use crate::database::{MainT, UpdateT}; use crate::database::{MainT, UpdateT};
use crate::settings::{UpdateState, SettingsUpdate}; use crate::settings::{UpdateState, SettingsUpdate};
use crate::update::documents_addition::reindex_all_documents; use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update}; use crate::update::{next_update_id, Update};
use crate::{store, MResult}; use crate::{store, MResult, Error};
pub fn push_settings_update( pub fn push_settings_update(
writer: &mut heed::RwTxn<UpdateT>, writer: &mut heed::RwTxn<UpdateT>,
@ -35,7 +34,17 @@ pub fn apply_settings_update(
let mut must_reindex = false; let mut must_reindex = false;
let old_schema = index.main.schema(writer)?; let mut schema = match index.main.schema(writer)? {
Some(schema) => schema,
None => {
match settings.attribute_identifier.clone() {
UpdateState::Update(id) => Schema::with_identifier(id),
_ => return Err(Error::MissingSchemaIdentifier)
}
}
};
println!("settings: {:?}", settings);
match settings.ranking_rules { match settings.ranking_rules {
UpdateState::Update(v) => { UpdateState::Update(v) => {
@ -55,157 +64,69 @@ pub fn apply_settings_update(
}, },
_ => (), _ => (),
} }
let identifier = match settings.attribute_identifier.clone() {
UpdateState::Update(v) => v, if let UpdateState::Update(id) = settings.attribute_identifier {
_ => { schema.set_identifier(id)?;
old_schema.clone().unwrap().identifier_name().to_owned()
},
}; };
let attributes_searchable: Vec<String> = match settings.attributes_searchable.clone() {
UpdateState::Update(v) => v, match settings.attributes_searchable.clone() {
UpdateState::Clear => Vec::new(), UpdateState::Update(v) => schema.update_indexed(v)?,
UpdateState::Nothing => { UpdateState::Clear => {
match old_schema.clone() { let clear: Vec<String> = Vec::new();
Some(schema) => { schema.update_indexed(clear)?;
schema.into_iter()
.filter(|(_, props)| props.is_indexed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
}
}, },
UpdateState::Nothing => (),
UpdateState::Add(attrs) => { UpdateState::Add(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_indexed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs { for attr in attrs {
if !old_attrs.contains(&attr) { schema.set_indexed(attr)?;
old_attrs.push(attr);
} }
}
old_attrs
}, },
UpdateState::Delete(attrs) => { UpdateState::Delete(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_indexed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs { for attr in attrs {
old_attrs.retain(|x| *x == attr) schema.remove_indexed(attr);
} }
old_attrs
} }
}; };
let attributes_displayed: Vec<String> = match settings.attributes_displayed.clone() { match settings.attributes_displayed.clone() {
UpdateState::Update(v) => v, UpdateState::Update(v) => schema.update_displayed(v)?,
UpdateState::Clear => Vec::new(), UpdateState::Clear => {
UpdateState::Nothing => { let clear: Vec<String> = Vec::new();
match old_schema.clone() { schema.update_displayed(clear)?;
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_displayed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
}
}, },
UpdateState::Nothing => (),
UpdateState::Add(attrs) => { UpdateState::Add(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_displayed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs { for attr in attrs {
if !old_attrs.contains(&attr) { schema.set_displayed(attr)?;
old_attrs.push(attr);
} }
}
old_attrs
}, },
UpdateState::Delete(attrs) => { UpdateState::Delete(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_displayed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs { for attr in attrs {
old_attrs.retain(|x| *x == attr) schema.remove_displayed(attr);
} }
old_attrs
} }
}; };
let attributes_ranked: Vec<String> = match settings.attributes_ranked.clone() { match settings.attributes_ranked.clone() {
UpdateState::Update(v) => v, UpdateState::Update(v) => schema.update_ranked(v)?,
UpdateState::Clear => Vec::new(), UpdateState::Clear => {
UpdateState::Nothing => { let clear: Vec<String> = Vec::new();
match old_schema.clone() { schema.update_ranked(clear)?;
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_ranked())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
}
}, },
UpdateState::Nothing => (),
UpdateState::Add(attrs) => { UpdateState::Add(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_ranked())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs { for attr in attrs {
if !old_attrs.contains(&attr) { schema.set_ranked(attr)?;
old_attrs.push(attr);
} }
}
old_attrs
}, },
UpdateState::Delete(attrs) => { UpdateState::Delete(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_ranked())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs { for attr in attrs {
old_attrs.retain(|x| *x == attr) schema.remove_ranked(attr);
} }
old_attrs
} }
}; };
let new_schema = generate_schema(identifier, attributes_searchable, attributes_displayed, attributes_ranked); index.main.put_schema(writer, &schema)?;
index.main.put_schema(writer, &new_schema)?; println!("schema: {:?}", schema);
match settings.stop_words { match settings.stop_words {
UpdateState::Update(stop_words) => { UpdateState::Update(stop_words) => {
@ -233,16 +154,6 @@ pub fn apply_settings_update(
let postings_lists_store = index.postings_lists; let postings_lists_store = index.postings_lists;
let docs_words_store = index.docs_words; let docs_words_store = index.docs_words;
if settings.attribute_identifier.is_changed() ||
settings.attributes_ranked.is_changed() ||
settings.attributes_searchable.is_changed() ||
settings.attributes_displayed.is_changed()
{
if let Some(old_schema) = old_schema {
rewrite_all_documents(writer, index, &old_schema, &new_schema)?;
must_reindex = true;
}
}
if must_reindex { if must_reindex {
reindex_all_documents( reindex_all_documents(
writer, writer,
@ -438,46 +349,3 @@ pub fn apply_synonyms_update(
Ok(()) Ok(())
} }
pub fn rewrite_all_documents(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
old_schema: &Schema,
new_schema: &Schema,
) -> MResult<()> {
let mut documents_ids_to_reindex = Vec::new();
// Retrieve all documents present on the database
for result in index.documents_fields_counts.documents_ids(writer)? {
let document_id = result?;
documents_ids_to_reindex.push(document_id);
}
let transpotition = diff_transposition(old_schema, new_schema);
// Rewrite all documents one by one
for id in documents_ids_to_reindex {
let mut document: HashMap<SchemaAttr, Vec<u8>> = HashMap::new();
// Retrieve the old document
for item in index.documents_fields.document_fields(writer, id)? {
if let Ok(item) = item {
if let Some(pos) = transpotition[(item.0).0 as usize] {
// Save the current document with the new SchemaAttr
document.insert(SchemaAttr::new(pos), item.1.to_vec());
}
}
}
// Remove the current document
index.documents_fields.del_all_document_fields(writer, id)?;
// Rewrite the new document
// TODO: use cursor to not do memory jump at each call
for (key, value) in document {
index.documents_fields.put_document_field(writer, id, key, &value)?;
}
}
Ok(())
}

View File

@ -5,6 +5,7 @@ pub type SResult<T> = Result<T, Error>;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
FieldNameNotFound(String),
MaxFieldsLimitExceeded, MaxFieldsLimitExceeded,
} }
@ -12,6 +13,7 @@ impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*; use self::Error::*;
match self { match self {
FieldNameNotFound(field) => write!(f, "The field {} doesn't exist", field),
MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"), MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"),
} }
} }

View File

@ -3,9 +3,8 @@ use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{SResult, SchemaAttr}; use crate::{SResult, FieldId};
pub type FieldId = SchemaAttr;
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FieldsMap { pub struct FieldsMap {
@ -43,13 +42,13 @@ impl FieldsMap {
self.name_map.remove(&name); self.name_map.remove(&name);
} }
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> { pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
let name = name.into(); let name = name.into();
self.name_map.get(&name) self.name_map.get(&name).map(|s| *s)
} }
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> { pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
self.id_map.get(&id.into()) self.id_map.get(&id.into()).map(|s| s.to_string())
} }
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> { pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
@ -74,14 +73,14 @@ mod tests {
assert_eq!(fields_map.insert("id").unwrap(), 0.into()); assert_eq!(fields_map.insert("id").unwrap(), 0.into());
assert_eq!(fields_map.insert("title").unwrap(), 1.into()); assert_eq!(fields_map.insert("title").unwrap(), 1.into());
assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into()); assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into());
assert_eq!(fields_map.get_id("id"), Some(&0.into())); assert_eq!(fields_map.get_id("id"), Some(0.into()));
assert_eq!(fields_map.get_id("title"), Some(&1.into())); assert_eq!(fields_map.get_id("title"), Some(1.into()));
assert_eq!(fields_map.get_id("descritpion"), Some(&2.into())); assert_eq!(fields_map.get_id("descritpion"), Some(2.into()));
assert_eq!(fields_map.get_id("date"), None); assert_eq!(fields_map.get_id("date"), None);
assert_eq!(fields_map.len(), 3); assert_eq!(fields_map.len(), 3);
assert_eq!(fields_map.get_name(0), Some(&"id".to_owned())); assert_eq!(fields_map.get_name(0), Some("id".to_owned()));
assert_eq!(fields_map.get_name(1), Some(&"title".to_owned())); assert_eq!(fields_map.get_name(1), Some("title".to_owned()));
assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned())); assert_eq!(fields_map.get_name(2), Some("descritpion".to_owned()));
assert_eq!(fields_map.get_name(4), None); assert_eq!(fields_map.get_name(4), None);
fields_map.remove("title"); fields_map.remove("title");
assert_eq!(fields_map.get_id("title"), None); assert_eq!(fields_map.get_id("title"), None);

View File

@ -3,48 +3,88 @@ mod fields_map;
mod schema; mod schema;
pub use error::{Error, SResult}; pub use error::{Error, SResult};
pub use fields_map::{FieldsMap, FieldId}; pub use fields_map::FieldsMap;
pub use schema::{Schema, IndexedPos}; pub use schema::Schema;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)] #[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct SchemaAttr(pub u16); pub struct IndexedPos(pub u16);
impl SchemaAttr { impl IndexedPos {
pub const fn new(value: u16) -> SchemaAttr { pub const fn new(value: u16) -> IndexedPos {
SchemaAttr(value) IndexedPos(value)
} }
pub const fn min() -> SchemaAttr { pub const fn min() -> IndexedPos {
SchemaAttr(u16::min_value()) IndexedPos(u16::min_value())
} }
pub const fn max() -> SchemaAttr { pub const fn max() -> IndexedPos {
SchemaAttr(u16::max_value()) IndexedPos(u16::max_value())
} }
pub fn next(self) -> SResult<SchemaAttr> { pub fn next(self) -> SResult<IndexedPos> {
self.0.checked_add(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded) self.0.checked_add(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
} }
pub fn prev(self) -> SResult<SchemaAttr> { pub fn prev(self) -> SResult<IndexedPos> {
self.0.checked_sub(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded) self.0.checked_sub(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
} }
} }
impl From<u16> for SchemaAttr { impl From<u16> for IndexedPos {
fn from(value: u16) -> SchemaAttr { fn from(value: u16) -> IndexedPos {
SchemaAttr(value) IndexedPos(value)
} }
} }
impl Into<u16> for SchemaAttr { impl Into<u16> for IndexedPos {
fn into(self) -> u16 { fn into(self) -> u16 {
self.0 self.0
} }
} }
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct FieldId(pub u16);
impl FieldId {
pub const fn new(value: u16) -> FieldId {
FieldId(value)
}
pub const fn min() -> FieldId {
FieldId(u16::min_value())
}
pub const fn max() -> FieldId {
FieldId(u16::max_value())
}
pub fn next(self) -> SResult<FieldId> {
self.0.checked_add(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
}
pub fn prev(self) -> SResult<FieldId> {
self.0.checked_sub(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
}
}
impl From<u16> for FieldId {
fn from(value: u16) -> FieldId {
FieldId(value)
}
}
impl Into<u16> for FieldId {
fn into(self) -> u16 {
self.0
}
}
// use std::collections::{BTreeMap, HashMap}; // use std::collections::{BTreeMap, HashMap};
// use std::ops::BitOr; // use std::ops::BitOr;

View File

@ -1,10 +1,10 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use crate::{FieldsMap, FieldId, SResult, SchemaAttr}; use serde::{Serialize, Deserialize};
pub type IndexedPos = SchemaAttr; use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos};
#[derive(Default)] #[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct Schema { pub struct Schema {
fields_map: FieldsMap, fields_map: FieldsMap,
@ -30,11 +30,21 @@ impl Schema {
self.fields_map.get_name(self.identifier).unwrap().to_string() self.fields_map.get_name(self.identifier).unwrap().to_string()
} }
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> { pub fn set_identifier(&mut self, id: String) -> SResult<()> {
match self.get_id(id.clone()) {
Some(id) => {
self.identifier = id;
Ok(())
},
None => Err(Error::FieldNameNotFound(id))
}
}
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
self.fields_map.get_id(name) self.fields_map.get_id(name)
} }
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> { pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
self.fields_map.get_name(id) self.fields_map.get_name(id)
} }
@ -52,7 +62,7 @@ impl Schema {
pub fn get_or_create<S: Into<String> + std::clone::Clone>(&mut self, name: S) -> SResult<FieldId> { pub fn get_or_create<S: Into<String> + std::clone::Clone>(&mut self, name: S) -> SResult<FieldId> {
match self.fields_map.get_id(name.clone()) { match self.fields_map.get_id(name.clone()) {
Some(id) => { Some(id) => {
Ok(*id) Ok(id)
} }
None => { None => {
self.set_indexed(name.clone())?; self.set_indexed(name.clone())?;
@ -61,6 +71,30 @@ impl Schema {
} }
} }
pub fn get_ranked(&self) -> HashSet<FieldId> {
self.ranked.clone()
}
pub fn get_ranked_name(&self) -> HashSet<String> {
self.ranked.iter().filter_map(|a| self.get_name(*a)).collect()
}
pub fn get_displayed(&self) -> HashSet<FieldId> {
self.displayed.clone()
}
pub fn get_displayed_name(&self) -> HashSet<String> {
self.displayed.iter().filter_map(|a| self.get_name(*a)).collect()
}
pub fn get_indexed(&self) -> Vec<FieldId> {
self.indexed.clone()
}
pub fn get_indexed_name(&self) -> Vec<String> {
self.indexed.iter().filter_map(|a| self.get_name(*a)).collect()
}
pub fn set_ranked<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> { pub fn set_ranked<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
let id = self.fields_map.insert(name.into())?; let id = self.fields_map.insert(name.into())?;
self.ranked.insert(id); self.ranked.insert(id);
@ -81,23 +115,42 @@ impl Schema {
Ok((id, pos.into())) Ok((id, pos.into()))
} }
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<&FieldId> { pub fn remove_ranked<S: Into<String>>(&mut self, name: S) {
if let Some(id) = self.fields_map.get_id(name.into()) {
self.ranked.remove(&id);
}
}
pub fn remove_displayed<S: Into<String>>(&mut self, name: S) {
if let Some(id) = self.fields_map.get_id(name.into()) {
self.displayed.remove(&id);
}
}
pub fn remove_indexed<S: Into<String>>(&mut self, name: S) {
if let Some(id) = self.fields_map.get_id(name.into()) {
self.indexed_map.remove(&id);
self.indexed.retain(|x| *x != id);
}
}
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<FieldId> {
match self.fields_map.get_id(name.into()) { match self.fields_map.get_id(name.into()) {
Some(id) => self.ranked.get(id), Some(id) => self.ranked.get(&id).map(|s| *s),
None => None, None => None,
} }
} }
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<&FieldId> { pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<FieldId> {
match self.fields_map.get_id(name.into()) { match self.fields_map.get_id(name.into()) {
Some(id) => self.displayed.get(id), Some(id) => self.displayed.get(&id).map(|s| *s),
None => None, None => None,
} }
} }
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<&IndexedPos> { pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<IndexedPos> {
match self.fields_map.get_id(name.into()) { match self.fields_map.get_id(name.into()) {
Some(id) => self.indexed_map.get(id), Some(id) => self.indexed_map.get(&id).map(|s| *s),
None => None, None => None,
} }
} }

View File

@ -28,6 +28,7 @@ pub struct DocIndex {
/// The attribute in the document where the word was found /// The attribute in the document where the word was found
/// along with the index in it. /// along with the index in it.
/// Is an IndexedPos and not FieldId. Must be convert each time.
pub attribute: u16, pub attribute: u16,
pub word_index: u16, pub word_index: u16,