mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
introduce a new schemaless way
This commit is contained in:
parent
bbe1845f66
commit
130fb74928
@ -7,6 +7,5 @@
|
|||||||
"overview",
|
"overview",
|
||||||
"release_date",
|
"release_date",
|
||||||
"poster"
|
"poster"
|
||||||
],
|
]
|
||||||
"attributes_ranked": ["release_date"]
|
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,8 @@ use structopt::StructOpt;
|
|||||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||||
|
|
||||||
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
|
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
|
||||||
use meilisearch_schema::SchemaAttr;
|
use meilisearch_core::settings::Settings;
|
||||||
|
use meilisearch_schema::FieldId;
|
||||||
|
|
||||||
// #[cfg(target_os = "linux")]
|
// #[cfg(target_os = "linux")]
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
@ -121,7 +122,8 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
|
|||||||
|
|
||||||
let settings = {
|
let settings = {
|
||||||
let string = fs::read_to_string(&command.settings)?;
|
let string = fs::read_to_string(&command.settings)?;
|
||||||
serde_json::from_str(&string).unwrap()
|
let settings: Settings = serde_json::from_str(&string).unwrap();
|
||||||
|
settings.into()
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut update_writer = db.update_write_txn().unwrap();
|
let mut update_writer = db.update_write_txn().unwrap();
|
||||||
@ -357,7 +359,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
|||||||
};
|
};
|
||||||
|
|
||||||
let attr = schema
|
let attr = schema
|
||||||
.attribute(&filter)
|
.get_id(filter)
|
||||||
.expect("Could not find filtered attribute");
|
.expect("Could not find filtered attribute");
|
||||||
|
|
||||||
builder.with_filter(move |document_id| {
|
builder.with_filter(move |document_id| {
|
||||||
@ -388,11 +390,11 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
|||||||
for (name, text) in document.0 {
|
for (name, text) in document.0 {
|
||||||
print!("{}: ", name);
|
print!("{}: ", name);
|
||||||
|
|
||||||
let attr = schema.attribute(&name).unwrap();
|
let attr = schema.get_id(&name).unwrap();
|
||||||
let highlights = doc
|
let highlights = doc
|
||||||
.highlights
|
.highlights
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|m| SchemaAttr::new(m.attribute) == attr)
|
.filter(|m| FieldId::new(m.attribute) == attr)
|
||||||
.cloned();
|
.cloned();
|
||||||
let (text, highlights) =
|
let (text, highlights) =
|
||||||
crop_text(&text, highlights, command.char_context);
|
crop_text(&text, highlights, command.char_context);
|
||||||
@ -407,8 +409,8 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
|||||||
|
|
||||||
let mut matching_attributes = HashSet::new();
|
let mut matching_attributes = HashSet::new();
|
||||||
for highlight in doc.highlights {
|
for highlight in doc.highlights {
|
||||||
let attr = SchemaAttr::new(highlight.attribute);
|
let attr = FieldId::new(highlight.attribute);
|
||||||
let name = schema.attribute_name(attr);
|
let name = schema.get_name(attr);
|
||||||
matching_attributes.insert(name);
|
matching_attributes.insert(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::cmp::{Ordering, Reverse};
|
use std::cmp::{Ordering, Reverse};
|
||||||
use std::collections::hash_map::{HashMap, Entry};
|
use std::collections::hash_map::{HashMap, Entry};
|
||||||
use meilisearch_schema::SchemaAttr;
|
use meilisearch_schema::IndexedPos;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use crate::{RawDocument, MResult};
|
use crate::{RawDocument, MResult};
|
||||||
use crate::bucket_sort::BareMatch;
|
use crate::bucket_sort::BareMatch;
|
||||||
@ -32,7 +32,7 @@ impl Criterion for Exact {
|
|||||||
for bm in group {
|
for bm in group {
|
||||||
for di in ctx.postings_lists[bm.postings_list].as_ref() {
|
for di in ctx.postings_lists[bm.postings_list].as_ref() {
|
||||||
|
|
||||||
let attr = SchemaAttr(di.attribute);
|
let attr = IndexedPos(di.attribute);
|
||||||
let count = match fields_counts.entry(attr) {
|
let count = match fields_counts.entry(attr) {
|
||||||
Entry::Occupied(entry) => *entry.get(),
|
Entry::Occupied(entry) => *entry.get(),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
|
@ -69,7 +69,7 @@ impl<'a> SortByAttr<'a> {
|
|||||||
reversed: bool,
|
reversed: bool,
|
||||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||||
let field_id = match schema.get_id(attr_name) {
|
let field_id = match schema.get_id(attr_name) {
|
||||||
Some(field_id) => *field_id,
|
Some(field_id) => field_id,
|
||||||
None => return Err(SortByAttrError::AttributeNotFound),
|
None => return Err(SortByAttrError::AttributeNotFound),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -8,11 +8,12 @@ pub type MResult<T> = Result<T, Error>;
|
|||||||
pub enum Error {
|
pub enum Error {
|
||||||
Io(io::Error),
|
Io(io::Error),
|
||||||
IndexAlreadyExists,
|
IndexAlreadyExists,
|
||||||
SchemaDiffer,
|
MissingSchemaIdentifier,
|
||||||
SchemaMissing,
|
SchemaMissing,
|
||||||
WordIndexMissing,
|
WordIndexMissing,
|
||||||
MissingDocumentId,
|
MissingDocumentId,
|
||||||
MaxFieldsLimitExceeded,
|
MaxFieldsLimitExceeded,
|
||||||
|
Schema(meilisearch_schema::Error),
|
||||||
Zlmdb(heed::Error),
|
Zlmdb(heed::Error),
|
||||||
Fst(fst::Error),
|
Fst(fst::Error),
|
||||||
SerdeJson(SerdeJsonError),
|
SerdeJson(SerdeJsonError),
|
||||||
@ -28,6 +29,12 @@ impl From<io::Error> for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<meilisearch_schema::Error> for Error {
|
||||||
|
fn from(error: meilisearch_schema::Error) -> Error {
|
||||||
|
Error::Schema(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<heed::Error> for Error {
|
impl From<heed::Error> for Error {
|
||||||
fn from(error: heed::Error) -> Error {
|
fn from(error: heed::Error) -> Error {
|
||||||
Error::Zlmdb(error)
|
Error::Zlmdb(error)
|
||||||
@ -76,10 +83,12 @@ impl fmt::Display for Error {
|
|||||||
match self {
|
match self {
|
||||||
Io(e) => write!(f, "{}", e),
|
Io(e) => write!(f, "{}", e),
|
||||||
IndexAlreadyExists => write!(f, "index already exists"),
|
IndexAlreadyExists => write!(f, "index already exists"),
|
||||||
SchemaDiffer => write!(f, "schemas differ"),
|
MissingSchemaIdentifier => write!(f, "schema cannot be build without identifier"),
|
||||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||||
MissingDocumentId => write!(f, "document id is missing"),
|
MissingDocumentId => write!(f, "document id is missing"),
|
||||||
|
MaxFieldsLimitExceeded => write!(f, "maximum field in a document is exceeded"),
|
||||||
|
Schema(e) => write!(f, "schemas error; {}", e),
|
||||||
Zlmdb(e) => write!(f, "heed error; {}", e),
|
Zlmdb(e) => write!(f, "heed error; {}", e),
|
||||||
Fst(e) => write!(f, "fst error; {}", e),
|
Fst(e) => write!(f, "fst error; {}", e),
|
||||||
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
||||||
|
@ -136,7 +136,7 @@ mod tests {
|
|||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
use fst::{IntoStreamer, Set};
|
use fst::{IntoStreamer, Set};
|
||||||
use meilisearch_schema::SchemaAttr;
|
use meilisearch_schema::IndexedPos;
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
@ -295,14 +295,14 @@ mod tests {
|
|||||||
for ((docid, attr, _), count) in fields_counts {
|
for ((docid, attr, _), count) in fields_counts {
|
||||||
let prev = index
|
let prev = index
|
||||||
.documents_fields_counts
|
.documents_fields_counts
|
||||||
.document_field_count(&mut writer, docid, SchemaAttr(attr))
|
.document_field_count(&mut writer, docid, IndexedPos(attr))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let prev = prev.unwrap_or(0);
|
let prev = prev.unwrap_or(0);
|
||||||
|
|
||||||
index
|
index
|
||||||
.documents_fields_counts
|
.documents_fields_counts
|
||||||
.put_document_field_count(&mut writer, docid, SchemaAttr(attr), prev + count)
|
.put_document_field_count(&mut writer, docid, IndexedPos(attr), prev + count)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,16 +180,16 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O
|
|||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use meilisearch_schema::SchemaAttr;
|
use meilisearch_schema::IndexedPos;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn strange_apostrophe() {
|
fn strange_apostrophe() {
|
||||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||||
|
|
||||||
let docid = DocumentId(0);
|
let docid = DocumentId(0);
|
||||||
let attr = SchemaAttr(0);
|
let indexed_pos = IndexedPos(0);
|
||||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||||
indexer.index_text(docid, attr, text);
|
indexer.index_text(docid, indexed_pos, text);
|
||||||
|
|
||||||
let Indexed {
|
let Indexed {
|
||||||
words_doc_indexes, ..
|
words_doc_indexes, ..
|
||||||
@ -209,9 +209,9 @@ mod tests {
|
|||||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||||
|
|
||||||
let docid = DocumentId(0);
|
let docid = DocumentId(0);
|
||||||
let attr = SchemaAttr(0);
|
let indexed_pos = IndexedPos(0);
|
||||||
let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
|
let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
|
||||||
indexer.index_text_seq(docid, attr, text);
|
indexer.index_text_seq(docid, indexed_pos, text);
|
||||||
|
|
||||||
let Indexed {
|
let Indexed {
|
||||||
words_doc_indexes, ..
|
words_doc_indexes, ..
|
||||||
@ -234,9 +234,9 @@ mod tests {
|
|||||||
let mut indexer = RawIndexer::new(stop_words);
|
let mut indexer = RawIndexer::new(stop_words);
|
||||||
|
|
||||||
let docid = DocumentId(0);
|
let docid = DocumentId(0);
|
||||||
let attr = SchemaAttr(0);
|
let indexed_pos = IndexedPos(0);
|
||||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||||
indexer.index_text(docid, attr, text);
|
indexer.index_text(docid, indexed_pos, text);
|
||||||
|
|
||||||
let Indexed {
|
let Indexed {
|
||||||
words_doc_indexes, ..
|
words_doc_indexes, ..
|
||||||
@ -258,9 +258,9 @@ mod tests {
|
|||||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||||
|
|
||||||
let docid = DocumentId(0);
|
let docid = DocumentId(0);
|
||||||
let attr = SchemaAttr(0);
|
let indexed_pos = IndexedPos(0);
|
||||||
let text = "🇯🇵";
|
let text = "🇯🇵";
|
||||||
indexer.index_text(docid, attr, text);
|
indexer.index_text(docid, indexed_pos, text);
|
||||||
|
|
||||||
let Indexed {
|
let Indexed {
|
||||||
words_doc_indexes, ..
|
words_doc_indexes, ..
|
||||||
|
@ -99,7 +99,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
|||||||
let ioread = SerdeJsonIoRead::new(cursor);
|
let ioread = SerdeJsonIoRead::new(cursor);
|
||||||
let value = Value(SerdeJsonDeserializer::new(ioread));
|
let value = Value(SerdeJsonDeserializer::new(ioread));
|
||||||
|
|
||||||
Some((*attribute_name, value))
|
Some((attribute_name, value))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ pub use self::convert_to_string::ConvertToString;
|
|||||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||||
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
|
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
|
||||||
pub use self::indexer::Indexer;
|
pub use self::indexer::Indexer;
|
||||||
pub use self::serializer::{serialize_value, Serializer};
|
pub use self::serializer::{serialize_value, serialize_value_with_id, Serializer};
|
||||||
|
|
||||||
use std::{error::Error, fmt};
|
use std::{error::Error, fmt};
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use meilisearch_schema::{Schema, FieldsMap};
|
use meilisearch_schema::{Schema, FieldId};
|
||||||
use serde::ser;
|
use serde::ser;
|
||||||
|
|
||||||
use crate::database::MainT;
|
use crate::database::MainT;
|
||||||
@ -10,12 +10,11 @@ use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
|
|||||||
|
|
||||||
pub struct Serializer<'a, 'b> {
|
pub struct Serializer<'a, 'b> {
|
||||||
pub txn: &'a mut heed::RwTxn<'b, MainT>,
|
pub txn: &'a mut heed::RwTxn<'b, MainT>,
|
||||||
pub schema: &'a Schema,
|
pub schema: &'a mut Schema,
|
||||||
pub document_store: DocumentsFields,
|
pub document_store: DocumentsFields,
|
||||||
pub document_fields_counts: DocumentsFieldsCounts,
|
pub document_fields_counts: DocumentsFieldsCounts,
|
||||||
pub indexer: &'a mut RawIndexer,
|
pub indexer: &'a mut RawIndexer,
|
||||||
pub ranked_map: &'a mut RankedMap,
|
pub ranked_map: &'a mut RankedMap,
|
||||||
pub fields_map: &'a mut FieldsMap,
|
|
||||||
pub document_id: DocumentId,
|
pub document_id: DocumentId,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,7 +158,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
|||||||
document_fields_counts: self.document_fields_counts,
|
document_fields_counts: self.document_fields_counts,
|
||||||
indexer: self.indexer,
|
indexer: self.indexer,
|
||||||
ranked_map: self.ranked_map,
|
ranked_map: self.ranked_map,
|
||||||
fields_map: self.fields_map,
|
|
||||||
current_key_name: None,
|
current_key_name: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -177,7 +175,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
|||||||
document_fields_counts: self.document_fields_counts,
|
document_fields_counts: self.document_fields_counts,
|
||||||
indexer: self.indexer,
|
indexer: self.indexer,
|
||||||
ranked_map: self.ranked_map,
|
ranked_map: self.ranked_map,
|
||||||
fields_map: self.fields_map,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,13 +193,12 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
|||||||
|
|
||||||
pub struct MapSerializer<'a, 'b> {
|
pub struct MapSerializer<'a, 'b> {
|
||||||
txn: &'a mut heed::RwTxn<'b, MainT>,
|
txn: &'a mut heed::RwTxn<'b, MainT>,
|
||||||
schema: &'a Schema,
|
schema: &'a mut Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
document_store: DocumentsFields,
|
document_store: DocumentsFields,
|
||||||
document_fields_counts: DocumentsFieldsCounts,
|
document_fields_counts: DocumentsFieldsCounts,
|
||||||
indexer: &'a mut RawIndexer,
|
indexer: &'a mut RawIndexer,
|
||||||
ranked_map: &'a mut RankedMap,
|
ranked_map: &'a mut RankedMap,
|
||||||
fields_map: &'a mut FieldsMap,
|
|
||||||
current_key_name: Option<String>,
|
current_key_name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,21 +233,17 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
|
|||||||
V: ser::Serialize,
|
V: ser::Serialize,
|
||||||
{
|
{
|
||||||
let key = key.serialize(ConvertToString)?;
|
let key = key.serialize(ConvertToString)?;
|
||||||
match self.schema.attribute(&key) {
|
serialize_value(
|
||||||
Some(attribute) => serialize_value(
|
|
||||||
self.txn,
|
self.txn,
|
||||||
attribute,
|
key,
|
||||||
self.schema.props(attribute),
|
self.schema,
|
||||||
self.document_id,
|
self.document_id,
|
||||||
self.document_store,
|
self.document_store,
|
||||||
self.document_fields_counts,
|
self.document_fields_counts,
|
||||||
self.indexer,
|
self.indexer,
|
||||||
self.ranked_map,
|
self.ranked_map,
|
||||||
self.fields_map,
|
|
||||||
value,
|
value,
|
||||||
),
|
)
|
||||||
None => Ok(()),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||||
@ -261,13 +253,12 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
|
|||||||
|
|
||||||
pub struct StructSerializer<'a, 'b> {
|
pub struct StructSerializer<'a, 'b> {
|
||||||
txn: &'a mut heed::RwTxn<'b, MainT>,
|
txn: &'a mut heed::RwTxn<'b, MainT>,
|
||||||
schema: &'a Schema,
|
schema: &'a mut Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
document_store: DocumentsFields,
|
document_store: DocumentsFields,
|
||||||
document_fields_counts: DocumentsFieldsCounts,
|
document_fields_counts: DocumentsFieldsCounts,
|
||||||
indexer: &'a mut RawIndexer,
|
indexer: &'a mut RawIndexer,
|
||||||
ranked_map: &'a mut RankedMap,
|
ranked_map: &'a mut RankedMap,
|
||||||
fields_map: &'a mut FieldsMap,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
||||||
@ -282,19 +273,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
|||||||
where
|
where
|
||||||
T: ser::Serialize,
|
T: ser::Serialize,
|
||||||
{
|
{
|
||||||
// let id = fields_map.insert(key)?;
|
|
||||||
|
|
||||||
// let attribute = match self.schema.attribute(id) {
|
|
||||||
// Some(attribute) => attribute,
|
|
||||||
// None => {
|
|
||||||
|
|
||||||
// },
|
|
||||||
// }
|
|
||||||
|
|
||||||
serialize_value(
|
serialize_value(
|
||||||
self.txn,
|
self.txn,
|
||||||
attribute,
|
key.to_string(),
|
||||||
self.schema.props(attribute),
|
self.schema,
|
||||||
self.document_id,
|
self.document_id,
|
||||||
self.document_store,
|
self.document_store,
|
||||||
self.document_fields_counts,
|
self.document_fields_counts,
|
||||||
@ -311,7 +293,36 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
|||||||
|
|
||||||
pub fn serialize_value<'a, T: ?Sized>(
|
pub fn serialize_value<'a, T: ?Sized>(
|
||||||
txn: &mut heed::RwTxn<MainT>,
|
txn: &mut heed::RwTxn<MainT>,
|
||||||
attribute: &'static str,
|
attribute: String,
|
||||||
|
schema: &'a mut Schema,
|
||||||
|
document_id: DocumentId,
|
||||||
|
document_store: DocumentsFields,
|
||||||
|
documents_fields_counts: DocumentsFieldsCounts,
|
||||||
|
indexer: &mut RawIndexer,
|
||||||
|
ranked_map: &mut RankedMap,
|
||||||
|
value: &T,
|
||||||
|
) -> Result<(), SerializerError>
|
||||||
|
where
|
||||||
|
T: ser::Serialize,
|
||||||
|
{
|
||||||
|
let field_id = schema.get_or_create(attribute)?;
|
||||||
|
|
||||||
|
serialize_value_with_id(
|
||||||
|
txn,
|
||||||
|
field_id,
|
||||||
|
schema,
|
||||||
|
document_id,
|
||||||
|
document_store,
|
||||||
|
documents_fields_counts,
|
||||||
|
indexer,
|
||||||
|
ranked_map,
|
||||||
|
value
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn serialize_value_with_id<'a, T: ?Sized>(
|
||||||
|
txn: &mut heed::RwTxn<MainT>,
|
||||||
|
field_id: FieldId,
|
||||||
schema: &'a Schema,
|
schema: &'a Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
document_store: DocumentsFields,
|
document_store: DocumentsFields,
|
||||||
@ -324,12 +335,11 @@ where
|
|||||||
T: ser::Serialize,
|
T: ser::Serialize,
|
||||||
{
|
{
|
||||||
let serialized = serde_json::to_vec(value)?;
|
let serialized = serde_json::to_vec(value)?;
|
||||||
let field_id = schema.get_or_create(attribute)?;
|
|
||||||
document_store.put_document_field(txn, document_id, field_id, &serialized)?;
|
document_store.put_document_field(txn, document_id, field_id, &serialized)?;
|
||||||
|
|
||||||
if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
|
if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
|
||||||
let indexer = Indexer {
|
let indexer = Indexer {
|
||||||
field_id,
|
pos: *indexed_pos,
|
||||||
indexer,
|
indexer,
|
||||||
document_id,
|
document_id,
|
||||||
};
|
};
|
||||||
@ -337,13 +347,13 @@ where
|
|||||||
documents_fields_counts.put_document_field_count(
|
documents_fields_counts.put_document_field_count(
|
||||||
txn,
|
txn,
|
||||||
document_id,
|
document_id,
|
||||||
field_id,
|
*indexed_pos,
|
||||||
number_of_words as u16,
|
number_of_words as u16,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(field_id) = schema.id_is_ranked(field_id) {
|
if schema.id_is_ranked(field_id) {
|
||||||
let number = value.serialize(ConvertToNumber)?;
|
let number = value.serialize(ConvertToNumber)?;
|
||||||
ranked_map.insert(document_id, field_id, number);
|
ranked_map.insert(document_id, field_id, number);
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
use heed::types::{ByteSlice, OwnedType};
|
use heed::types::{ByteSlice, OwnedType};
|
||||||
use crate::database::MainT;
|
use crate::database::MainT;
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use meilisearch_schema::SchemaAttr;
|
use meilisearch_schema::FieldId;
|
||||||
|
|
||||||
use super::DocumentAttrKey;
|
use super::DocumentFieldStoredKey;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct DocumentsFields {
|
pub struct DocumentsFields {
|
||||||
pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
|
pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentsFields {
|
impl DocumentsFields {
|
||||||
@ -16,10 +16,10 @@ impl DocumentsFields {
|
|||||||
self,
|
self,
|
||||||
writer: &mut heed::RwTxn<MainT>,
|
writer: &mut heed::RwTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
attribute: SchemaAttr,
|
attribute: FieldId,
|
||||||
value: &[u8],
|
value: &[u8],
|
||||||
) -> ZResult<()> {
|
) -> ZResult<()> {
|
||||||
let key = DocumentAttrKey::new(document_id, attribute);
|
let key = DocumentFieldStoredKey::new(document_id, attribute);
|
||||||
self.documents_fields.put(writer, &key, value)
|
self.documents_fields.put(writer, &key, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,8 +28,8 @@ impl DocumentsFields {
|
|||||||
writer: &mut heed::RwTxn<MainT>,
|
writer: &mut heed::RwTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
) -> ZResult<usize> {
|
) -> ZResult<usize> {
|
||||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||||
self.documents_fields.delete_range(writer, &(start..=end))
|
self.documents_fields.delete_range(writer, &(start..=end))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,9 +41,9 @@ impl DocumentsFields {
|
|||||||
self,
|
self,
|
||||||
reader: &'txn heed::RoTxn<MainT>,
|
reader: &'txn heed::RoTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
attribute: SchemaAttr,
|
attribute: FieldId,
|
||||||
) -> ZResult<Option<&'txn [u8]>> {
|
) -> ZResult<Option<&'txn [u8]>> {
|
||||||
let key = DocumentAttrKey::new(document_id, attribute);
|
let key = DocumentFieldStoredKey::new(document_id, attribute);
|
||||||
self.documents_fields.get(reader, &key)
|
self.documents_fields.get(reader, &key)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,25 +52,25 @@ impl DocumentsFields {
|
|||||||
reader: &'txn heed::RoTxn<MainT>,
|
reader: &'txn heed::RoTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
) -> ZResult<DocumentFieldsIter<'txn>> {
|
) -> ZResult<DocumentFieldsIter<'txn>> {
|
||||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||||
let iter = self.documents_fields.range(reader, &(start..=end))?;
|
let iter = self.documents_fields.range(reader, &(start..=end))?;
|
||||||
Ok(DocumentFieldsIter { iter })
|
Ok(DocumentFieldsIter { iter })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentFieldsIter<'txn> {
|
pub struct DocumentFieldsIter<'txn> {
|
||||||
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
|
iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
|
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
|
||||||
type Item = ZResult<(SchemaAttr, &'txn [u8])>;
|
type Item = ZResult<(FieldId, &'txn [u8])>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self.iter.next() {
|
match self.iter.next() {
|
||||||
Some(Ok((key, bytes))) => {
|
Some(Ok((key, bytes))) => {
|
||||||
let attr = SchemaAttr(key.attr.get());
|
let field_id = FieldId(key.field_id.get());
|
||||||
Some(Ok((attr, bytes)))
|
Some(Ok((field_id, bytes)))
|
||||||
}
|
}
|
||||||
Some(Err(e)) => Some(Err(e)),
|
Some(Err(e)) => Some(Err(e)),
|
||||||
None => None,
|
None => None,
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
use super::DocumentAttrKey;
|
use super::DocumentFieldIndexedKey;
|
||||||
use crate::database::MainT;
|
use crate::database::MainT;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
use heed::types::OwnedType;
|
use heed::types::OwnedType;
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use meilisearch_schema::FieldId;
|
use meilisearch_schema::IndexedPos;
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct DocumentsFieldsCounts {
|
pub struct DocumentsFieldsCounts {
|
||||||
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentsFieldsCounts {
|
impl DocumentsFieldsCounts {
|
||||||
@ -15,10 +15,10 @@ impl DocumentsFieldsCounts {
|
|||||||
self,
|
self,
|
||||||
writer: &mut heed::RwTxn<MainT>,
|
writer: &mut heed::RwTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
attribute: FieldId,
|
attribute: IndexedPos,
|
||||||
value: u16,
|
value: u16,
|
||||||
) -> ZResult<()> {
|
) -> ZResult<()> {
|
||||||
let key = DocumentAttrKey::new(document_id, attribute);
|
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||||
self.documents_fields_counts.put(writer, &key, &value)
|
self.documents_fields_counts.put(writer, &key, &value)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -27,10 +27,9 @@ impl DocumentsFieldsCounts {
|
|||||||
writer: &mut heed::RwTxn<MainT>,
|
writer: &mut heed::RwTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
) -> ZResult<usize> {
|
) -> ZResult<usize> {
|
||||||
let start = DocumentAttrKey::new(document_id, FieldId::min());
|
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||||
let end = DocumentAttrKey::new(document_id, FieldId::max());
|
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||||
self.documents_fields_counts
|
self.documents_fields_counts.delete_range(writer, &(start..=end))
|
||||||
.delete_range(writer, &(start..=end))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||||
@ -41,9 +40,9 @@ impl DocumentsFieldsCounts {
|
|||||||
self,
|
self,
|
||||||
reader: &heed::RoTxn<MainT>,
|
reader: &heed::RoTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
attribute: FieldId,
|
attribute: IndexedPos,
|
||||||
) -> ZResult<Option<u16>> {
|
) -> ZResult<Option<u16>> {
|
||||||
let key = DocumentAttrKey::new(document_id, attribute);
|
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||||
match self.documents_fields_counts.get(reader, &key)? {
|
match self.documents_fields_counts.get(reader, &key)? {
|
||||||
Some(count) => Ok(Some(count)),
|
Some(count) => Ok(Some(count)),
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
@ -55,8 +54,8 @@ impl DocumentsFieldsCounts {
|
|||||||
reader: &'txn heed::RoTxn<MainT>,
|
reader: &'txn heed::RoTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
|
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
|
||||||
let start = DocumentAttrKey::new(document_id, FieldId::min());
|
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||||
let end = DocumentAttrKey::new(document_id, FieldId::max());
|
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||||
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
|
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
|
||||||
Ok(DocumentFieldsCountsIter { iter })
|
Ok(DocumentFieldsCountsIter { iter })
|
||||||
}
|
}
|
||||||
@ -79,17 +78,17 @@ impl DocumentsFieldsCounts {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentFieldsCountsIter<'txn> {
|
pub struct DocumentFieldsCountsIter<'txn> {
|
||||||
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for DocumentFieldsCountsIter<'_> {
|
impl Iterator for DocumentFieldsCountsIter<'_> {
|
||||||
type Item = ZResult<(FieldId, u16)>;
|
type Item = ZResult<(IndexedPos, u16)>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self.iter.next() {
|
match self.iter.next() {
|
||||||
Some(Ok((key, count))) => {
|
Some(Ok((key, count))) => {
|
||||||
let attr = FieldId(key.attr.get());
|
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||||
Some(Ok((attr, count)))
|
Some(Ok((indexed_pos, count)))
|
||||||
}
|
}
|
||||||
Some(Err(e)) => Some(Err(e)),
|
Some(Err(e)) => Some(Err(e)),
|
||||||
None => None,
|
None => None,
|
||||||
@ -99,7 +98,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> {
|
|||||||
|
|
||||||
pub struct DocumentsIdsIter<'txn> {
|
pub struct DocumentsIdsIter<'txn> {
|
||||||
last_seen_id: Option<DocumentId>,
|
last_seen_id: Option<DocumentId>,
|
||||||
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for DocumentsIdsIter<'_> {
|
impl Iterator for DocumentsIdsIter<'_> {
|
||||||
@ -123,18 +122,18 @@ impl Iterator for DocumentsIdsIter<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct AllDocumentsFieldsCountsIter<'txn> {
|
pub struct AllDocumentsFieldsCountsIter<'txn> {
|
||||||
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
|
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
|
||||||
type Item = ZResult<(DocumentId, FieldId, u16)>;
|
type Item = ZResult<(DocumentId, IndexedPos, u16)>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self.iter.next() {
|
match self.iter.next() {
|
||||||
Some(Ok((key, count))) => {
|
Some(Ok((key, count))) => {
|
||||||
let docid = DocumentId(key.docid.get());
|
let docid = DocumentId(key.docid.get());
|
||||||
let attr = FieldId(key.attr.get());
|
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||||
Some(Ok((docid, attr, count)))
|
Some(Ok((docid, indexed_pos, count)))
|
||||||
}
|
}
|
||||||
Some(Err(e)) => Some(Err(e)),
|
Some(Err(e)) => Some(Err(e)),
|
||||||
None => None,
|
None => None,
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
use crate::fields_map::FieldsMap;
|
use std::sync::Arc;
|
||||||
use crate::database::MainT;
|
use std::collections::{HashMap, BTreeMap, BTreeSet};
|
||||||
use crate::RankedMap;
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
|
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use meilisearch_schema::Schema;
|
use meilisearch_schema::Schema;
|
||||||
use std::collections::{HashMap, BTreeMap, BTreeSet};
|
|
||||||
use std::sync::Arc;
|
use crate::database::MainT;
|
||||||
|
use crate::RankedMap;
|
||||||
|
|
||||||
const CREATED_AT_KEY: &str = "created-at";
|
const CREATED_AT_KEY: &str = "created-at";
|
||||||
const RANKING_RULES_KEY: &str = "ranking-rules-key";
|
const RANKING_RULES_KEY: &str = "ranking-rules-key";
|
||||||
@ -18,7 +19,6 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
|
|||||||
const NAME_KEY: &str = "name";
|
const NAME_KEY: &str = "name";
|
||||||
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||||
const FIELDS_MAP_KEY: &str = "fields-map";
|
|
||||||
const SCHEMA_KEY: &str = "schema";
|
const SCHEMA_KEY: &str = "schema";
|
||||||
const UPDATED_AT_KEY: &str = "updated-at";
|
const UPDATED_AT_KEY: &str = "updated-at";
|
||||||
const WORDS_KEY: &str = "words";
|
const WORDS_KEY: &str = "words";
|
||||||
@ -114,16 +114,6 @@ impl Main {
|
|||||||
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
|
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn put_fields_map(self, writer: &mut heed::RwTxn<MainT>, fields_map: &FieldsMap) -> ZResult<()> {
|
|
||||||
self.main
|
|
||||||
.put::<_, Str, SerdeBincode<FieldsMap>>(writer, FIELDS_MAP_KEY, &fields_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn fields_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FieldsMap>> {
|
|
||||||
self.main
|
|
||||||
.get::<_, Str, SerdeBincode<FieldsMap>>(reader, FIELDS_MAP_KEY)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
|
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
|
||||||
let bytes = fst.as_fst().as_bytes();
|
let bytes = fst.as_fst().as_bytes();
|
||||||
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
|
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
|
||||||
|
@ -43,18 +43,50 @@ use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MR
|
|||||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||||
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||||
|
|
||||||
|
// #[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||||
|
// #[repr(C)]
|
||||||
|
// pub struct DocumentAttrKey {
|
||||||
|
// docid: BEU64,
|
||||||
|
// indexed_pos: BEU16,
|
||||||
|
// }
|
||||||
|
|
||||||
|
// impl DocumentAttrKey {
|
||||||
|
// fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentAttrKey {
|
||||||
|
// DocumentAttrKey {
|
||||||
|
// docid: BEU64::new(docid.0),
|
||||||
|
// indexed_pos: BEU16::new(indexed_pos.0),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct DocumentAttrKey {
|
pub struct DocumentFieldIndexedKey {
|
||||||
docid: BEU64,
|
docid: BEU64,
|
||||||
attr: BEU16,
|
indexed_pos: BEU16,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentAttrKey {
|
impl DocumentFieldIndexedKey {
|
||||||
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
|
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
|
||||||
DocumentAttrKey {
|
DocumentFieldIndexedKey {
|
||||||
docid: BEU64::new(docid.0),
|
docid: BEU64::new(docid.0),
|
||||||
attr: BEU16::new(attr.0),
|
indexed_pos: BEU16::new(indexed_pos.0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct DocumentFieldStoredKey {
|
||||||
|
docid: BEU64,
|
||||||
|
field_id: BEU16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DocumentFieldStoredKey {
|
||||||
|
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
|
||||||
|
DocumentFieldStoredKey {
|
||||||
|
docid: BEU64::new(docid.0),
|
||||||
|
field_id: BEU16::new(field_id.0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -228,7 +260,7 @@ impl Index {
|
|||||||
&self,
|
&self,
|
||||||
reader: &heed::RoTxn<MainT>,
|
reader: &heed::RoTxn<MainT>,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
attribute: SchemaAttr,
|
attribute: FieldId,
|
||||||
) -> MResult<Option<T>> {
|
) -> MResult<Option<T>> {
|
||||||
let bytes = self
|
let bytes = self
|
||||||
.documents_fields
|
.documents_fields
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
use std::collections::{HashMap, BTreeSet};
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use fst::{set::OpBuilder, SetBuilder};
|
use fst::{set::OpBuilder, SetBuilder};
|
||||||
use sdset::{duo::Union, SetOperation};
|
use sdset::{duo::Union, SetOperation};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use meilisearch_schema::{Schema, DISPLAYED, INDEXED};
|
|
||||||
|
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||||
use crate::raw_indexer::RawIndexer;
|
use crate::raw_indexer::RawIndexer;
|
||||||
use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer};
|
use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer};
|
||||||
use crate::store;
|
use crate::store;
|
||||||
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
||||||
use crate::{Error, MResult, RankedMap};
|
use crate::{Error, MResult, RankedMap};
|
||||||
@ -115,16 +114,11 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
None => return Err(Error::SchemaMissing),
|
None => return Err(Error::SchemaMissing),
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(new_schema) = lazy_new_schema(&schema, &addition) {
|
let identifier = schema.identifier();
|
||||||
main_store.put_schema(writer, &new_schema)?;
|
|
||||||
schema = new_schema;
|
|
||||||
}
|
|
||||||
|
|
||||||
let identifier = schema.identifier_name();
|
|
||||||
|
|
||||||
// 1. store documents ids for future deletion
|
// 1. store documents ids for future deletion
|
||||||
for document in addition {
|
for document in addition {
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
let document_id = match extract_document_id(&identifier, &document)? {
|
||||||
Some(id) => id,
|
Some(id) => id,
|
||||||
None => return Err(Error::MissingDocumentId),
|
None => return Err(Error::MissingDocumentId),
|
||||||
};
|
};
|
||||||
@ -147,8 +141,6 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
None => fst::Set::default(),
|
None => fst::Set::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
|
|
||||||
|
|
||||||
// 3. index the documents fields in the stores
|
// 3. index the documents fields in the stores
|
||||||
let mut indexer = RawIndexer::new(stop_words);
|
let mut indexer = RawIndexer::new(stop_words);
|
||||||
|
|
||||||
@ -160,7 +152,6 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
document_fields_counts: index.documents_fields_counts,
|
document_fields_counts: index.documents_fields_counts,
|
||||||
indexer: &mut indexer,
|
indexer: &mut indexer,
|
||||||
ranked_map: &mut ranked_map,
|
ranked_map: &mut ranked_map,
|
||||||
fields_map: &mut fields_map,
|
|
||||||
document_id,
|
document_id,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -192,16 +183,11 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||||||
None => return Err(Error::SchemaMissing),
|
None => return Err(Error::SchemaMissing),
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(new_schema) = lazy_new_schema(&schema, &addition) {
|
let identifier = schema.identifier();
|
||||||
main_store.put_schema(writer, &new_schema)?;
|
|
||||||
schema = new_schema;
|
|
||||||
}
|
|
||||||
|
|
||||||
let identifier = schema.identifier_name();
|
|
||||||
|
|
||||||
// 1. store documents ids for future deletion
|
// 1. store documents ids for future deletion
|
||||||
for mut document in addition {
|
for mut document in addition {
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
let document_id = match extract_document_id(&identifier, &document)? {
|
||||||
Some(id) => id,
|
Some(id) => id,
|
||||||
None => return Err(Error::MissingDocumentId),
|
None => return Err(Error::MissingDocumentId),
|
||||||
};
|
};
|
||||||
@ -241,8 +227,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||||||
None => fst::Set::default(),
|
None => fst::Set::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
|
|
||||||
|
|
||||||
// 3. index the documents fields in the stores
|
// 3. index the documents fields in the stores
|
||||||
let mut indexer = RawIndexer::new(stop_words);
|
let mut indexer = RawIndexer::new(stop_words);
|
||||||
|
|
||||||
@ -254,7 +238,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||||||
document_fields_counts: index.documents_fields_counts,
|
document_fields_counts: index.documents_fields_counts,
|
||||||
indexer: &mut indexer,
|
indexer: &mut indexer,
|
||||||
ranked_map: &mut ranked_map,
|
ranked_map: &mut ranked_map,
|
||||||
fields_map: &mut fields_map,
|
|
||||||
document_id,
|
document_id,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -281,7 +264,6 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut ranked_map = RankedMap::default();
|
let mut ranked_map = RankedMap::default();
|
||||||
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
|
|
||||||
|
|
||||||
// 1. retrieve all documents ids
|
// 1. retrieve all documents ids
|
||||||
let mut documents_ids_to_reindex = Vec::new();
|
let mut documents_ids_to_reindex = Vec::new();
|
||||||
@ -312,21 +294,20 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
|||||||
for result in index.documents_fields.document_fields(writer, *document_id)? {
|
for result in index.documents_fields.document_fields(writer, *document_id)? {
|
||||||
let (attr, bytes) = result?;
|
let (attr, bytes) = result?;
|
||||||
let value: serde_json::Value = serde_json::from_slice(bytes)?;
|
let value: serde_json::Value = serde_json::from_slice(bytes)?;
|
||||||
ram_store.insert((document_id, attr), value);
|
ram_store.insert((document_id, field_id), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
for ((docid, attr), value) in ram_store.drain() {
|
for ((docid, field_id), value) in ram_store.drain() {
|
||||||
serialize_value(
|
serialize_value_with_id(
|
||||||
writer,
|
writer,
|
||||||
attr,
|
field_id,
|
||||||
schema.props(attr),
|
&schema,
|
||||||
*docid,
|
*docid,
|
||||||
index.documents_fields,
|
index.documents_fields,
|
||||||
index.documents_fields_counts,
|
index.documents_fields_counts,
|
||||||
&mut indexer,
|
&mut indexer,
|
||||||
&mut ranked_map,
|
&mut ranked_map,
|
||||||
&mut fields_map,
|
&value
|
||||||
&value,
|
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -401,30 +382,3 @@ pub fn write_documents_addition_index(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lazy_new_schema(
|
|
||||||
schema: &Schema,
|
|
||||||
documents: &[HashMap<String, serde_json::Value>],
|
|
||||||
) -> Option<Schema> {
|
|
||||||
let mut attributes_to_add = BTreeSet::new();
|
|
||||||
|
|
||||||
for document in documents {
|
|
||||||
for (key, _) in document {
|
|
||||||
if schema.attribute(key).is_none() {
|
|
||||||
attributes_to_add.insert(key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if attributes_to_add.is_empty() {
|
|
||||||
return None
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut schema_builder = schema.to_builder();
|
|
||||||
for attribute in attributes_to_add {
|
|
||||||
schema_builder.new_attribute(attribute, DISPLAYED | INDEXED);
|
|
||||||
}
|
|
||||||
let schema = schema_builder.build();
|
|
||||||
|
|
||||||
Some(schema)
|
|
||||||
}
|
|
||||||
|
@ -40,8 +40,8 @@ impl DocumentsDeletion {
|
|||||||
where
|
where
|
||||||
D: serde::Serialize,
|
D: serde::Serialize,
|
||||||
{
|
{
|
||||||
let identifier = schema.identifier_name();
|
let identifier = schema.identifier();
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
let document_id = match extract_document_id(&identifier, &document)? {
|
||||||
Some(id) => id,
|
Some(id) => id,
|
||||||
None => return Err(Error::MissingDocumentId),
|
None => return Err(Error::MissingDocumentId),
|
||||||
};
|
};
|
||||||
@ -101,18 +101,7 @@ pub fn apply_documents_deletion(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// collect the ranked attributes according to the schema
|
// collect the ranked attributes according to the schema
|
||||||
let ranked_attrs: Vec<_> = schema
|
let ranked_attrs = schema.get_ranked();
|
||||||
.iter()
|
|
||||||
.filter_map(
|
|
||||||
|(_, attr, prop)| {
|
|
||||||
if prop.is_ranked() {
|
|
||||||
Some(attr)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let mut words_document_ids = HashMap::new();
|
let mut words_document_ids = HashMap::new();
|
||||||
for id in idset {
|
for id in idset {
|
||||||
|
@ -1,16 +1,15 @@
|
|||||||
use std::collections::{HashMap, BTreeMap, BTreeSet};
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use fst::{set::OpBuilder, SetBuilder};
|
use fst::{set::OpBuilder, SetBuilder};
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
|
use meilisearch_schema::Schema;
|
||||||
use meilisearch_schema::{Schema, SchemaAttr, diff_transposition, generate_schema};
|
|
||||||
|
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
use crate::settings::{UpdateState, SettingsUpdate};
|
use crate::settings::{UpdateState, SettingsUpdate};
|
||||||
use crate::update::documents_addition::reindex_all_documents;
|
use crate::update::documents_addition::reindex_all_documents;
|
||||||
use crate::update::{next_update_id, Update};
|
use crate::update::{next_update_id, Update};
|
||||||
use crate::{store, MResult};
|
use crate::{store, MResult, Error};
|
||||||
|
|
||||||
pub fn push_settings_update(
|
pub fn push_settings_update(
|
||||||
writer: &mut heed::RwTxn<UpdateT>,
|
writer: &mut heed::RwTxn<UpdateT>,
|
||||||
@ -35,7 +34,17 @@ pub fn apply_settings_update(
|
|||||||
|
|
||||||
let mut must_reindex = false;
|
let mut must_reindex = false;
|
||||||
|
|
||||||
let old_schema = index.main.schema(writer)?;
|
let mut schema = match index.main.schema(writer)? {
|
||||||
|
Some(schema) => schema,
|
||||||
|
None => {
|
||||||
|
match settings.attribute_identifier.clone() {
|
||||||
|
UpdateState::Update(id) => Schema::with_identifier(id),
|
||||||
|
_ => return Err(Error::MissingSchemaIdentifier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
println!("settings: {:?}", settings);
|
||||||
|
|
||||||
match settings.ranking_rules {
|
match settings.ranking_rules {
|
||||||
UpdateState::Update(v) => {
|
UpdateState::Update(v) => {
|
||||||
@ -55,157 +64,69 @@ pub fn apply_settings_update(
|
|||||||
},
|
},
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
let identifier = match settings.attribute_identifier.clone() {
|
|
||||||
UpdateState::Update(v) => v,
|
if let UpdateState::Update(id) = settings.attribute_identifier {
|
||||||
_ => {
|
schema.set_identifier(id)?;
|
||||||
old_schema.clone().unwrap().identifier_name().to_owned()
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
let attributes_searchable: Vec<String> = match settings.attributes_searchable.clone() {
|
|
||||||
UpdateState::Update(v) => v,
|
match settings.attributes_searchable.clone() {
|
||||||
UpdateState::Clear => Vec::new(),
|
UpdateState::Update(v) => schema.update_indexed(v)?,
|
||||||
UpdateState::Nothing => {
|
UpdateState::Clear => {
|
||||||
match old_schema.clone() {
|
let clear: Vec<String> = Vec::new();
|
||||||
Some(schema) => {
|
schema.update_indexed(clear)?;
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_indexed())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
UpdateState::Nothing => (),
|
||||||
UpdateState::Add(attrs) => {
|
UpdateState::Add(attrs) => {
|
||||||
let mut old_attrs = match old_schema.clone() {
|
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_indexed())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
};
|
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
if !old_attrs.contains(&attr) {
|
schema.set_indexed(attr)?;
|
||||||
old_attrs.push(attr);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
old_attrs
|
|
||||||
},
|
},
|
||||||
UpdateState::Delete(attrs) => {
|
UpdateState::Delete(attrs) => {
|
||||||
let mut old_attrs = match old_schema.clone() {
|
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_indexed())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
};
|
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
old_attrs.retain(|x| *x == attr)
|
schema.remove_indexed(attr);
|
||||||
}
|
}
|
||||||
old_attrs
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let attributes_displayed: Vec<String> = match settings.attributes_displayed.clone() {
|
match settings.attributes_displayed.clone() {
|
||||||
UpdateState::Update(v) => v,
|
UpdateState::Update(v) => schema.update_displayed(v)?,
|
||||||
UpdateState::Clear => Vec::new(),
|
UpdateState::Clear => {
|
||||||
UpdateState::Nothing => {
|
let clear: Vec<String> = Vec::new();
|
||||||
match old_schema.clone() {
|
schema.update_displayed(clear)?;
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_displayed())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
UpdateState::Nothing => (),
|
||||||
UpdateState::Add(attrs) => {
|
UpdateState::Add(attrs) => {
|
||||||
let mut old_attrs = match old_schema.clone() {
|
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_displayed())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
};
|
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
if !old_attrs.contains(&attr) {
|
schema.set_displayed(attr)?;
|
||||||
old_attrs.push(attr);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
old_attrs
|
|
||||||
},
|
},
|
||||||
UpdateState::Delete(attrs) => {
|
UpdateState::Delete(attrs) => {
|
||||||
let mut old_attrs = match old_schema.clone() {
|
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_displayed())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
};
|
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
old_attrs.retain(|x| *x == attr)
|
schema.remove_displayed(attr);
|
||||||
}
|
}
|
||||||
old_attrs
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let attributes_ranked: Vec<String> = match settings.attributes_ranked.clone() {
|
match settings.attributes_ranked.clone() {
|
||||||
UpdateState::Update(v) => v,
|
UpdateState::Update(v) => schema.update_ranked(v)?,
|
||||||
UpdateState::Clear => Vec::new(),
|
UpdateState::Clear => {
|
||||||
UpdateState::Nothing => {
|
let clear: Vec<String> = Vec::new();
|
||||||
match old_schema.clone() {
|
schema.update_ranked(clear)?;
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_ranked())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
UpdateState::Nothing => (),
|
||||||
UpdateState::Add(attrs) => {
|
UpdateState::Add(attrs) => {
|
||||||
let mut old_attrs = match old_schema.clone() {
|
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_ranked())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
};
|
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
if !old_attrs.contains(&attr) {
|
schema.set_ranked(attr)?;
|
||||||
old_attrs.push(attr);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
old_attrs
|
|
||||||
},
|
},
|
||||||
UpdateState::Delete(attrs) => {
|
UpdateState::Delete(attrs) => {
|
||||||
let mut old_attrs = match old_schema.clone() {
|
|
||||||
Some(schema) => {
|
|
||||||
schema.into_iter()
|
|
||||||
.filter(|(_, props)| props.is_ranked())
|
|
||||||
.map(|(name, _)| name)
|
|
||||||
.collect()
|
|
||||||
},
|
|
||||||
None => Vec::new(),
|
|
||||||
};
|
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
old_attrs.retain(|x| *x == attr)
|
schema.remove_ranked(attr);
|
||||||
}
|
}
|
||||||
old_attrs
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let new_schema = generate_schema(identifier, attributes_searchable, attributes_displayed, attributes_ranked);
|
index.main.put_schema(writer, &schema)?;
|
||||||
|
|
||||||
index.main.put_schema(writer, &new_schema)?;
|
println!("schema: {:?}", schema);
|
||||||
|
|
||||||
match settings.stop_words {
|
match settings.stop_words {
|
||||||
UpdateState::Update(stop_words) => {
|
UpdateState::Update(stop_words) => {
|
||||||
@ -233,16 +154,6 @@ pub fn apply_settings_update(
|
|||||||
let postings_lists_store = index.postings_lists;
|
let postings_lists_store = index.postings_lists;
|
||||||
let docs_words_store = index.docs_words;
|
let docs_words_store = index.docs_words;
|
||||||
|
|
||||||
if settings.attribute_identifier.is_changed() ||
|
|
||||||
settings.attributes_ranked.is_changed() ||
|
|
||||||
settings.attributes_searchable.is_changed() ||
|
|
||||||
settings.attributes_displayed.is_changed()
|
|
||||||
{
|
|
||||||
if let Some(old_schema) = old_schema {
|
|
||||||
rewrite_all_documents(writer, index, &old_schema, &new_schema)?;
|
|
||||||
must_reindex = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if must_reindex {
|
if must_reindex {
|
||||||
reindex_all_documents(
|
reindex_all_documents(
|
||||||
writer,
|
writer,
|
||||||
@ -438,46 +349,3 @@ pub fn apply_synonyms_update(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn rewrite_all_documents(
|
|
||||||
writer: &mut heed::RwTxn<MainT>,
|
|
||||||
index: &store::Index,
|
|
||||||
old_schema: &Schema,
|
|
||||||
new_schema: &Schema,
|
|
||||||
) -> MResult<()> {
|
|
||||||
|
|
||||||
let mut documents_ids_to_reindex = Vec::new();
|
|
||||||
|
|
||||||
// Retrieve all documents present on the database
|
|
||||||
for result in index.documents_fields_counts.documents_ids(writer)? {
|
|
||||||
let document_id = result?;
|
|
||||||
documents_ids_to_reindex.push(document_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
let transpotition = diff_transposition(old_schema, new_schema);
|
|
||||||
|
|
||||||
// Rewrite all documents one by one
|
|
||||||
for id in documents_ids_to_reindex {
|
|
||||||
let mut document: HashMap<SchemaAttr, Vec<u8>> = HashMap::new();
|
|
||||||
|
|
||||||
// Retrieve the old document
|
|
||||||
for item in index.documents_fields.document_fields(writer, id)? {
|
|
||||||
if let Ok(item) = item {
|
|
||||||
if let Some(pos) = transpotition[(item.0).0 as usize] {
|
|
||||||
// Save the current document with the new SchemaAttr
|
|
||||||
document.insert(SchemaAttr::new(pos), item.1.to_vec());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Remove the current document
|
|
||||||
index.documents_fields.del_all_document_fields(writer, id)?;
|
|
||||||
|
|
||||||
// Rewrite the new document
|
|
||||||
// TODO: use cursor to not do memory jump at each call
|
|
||||||
for (key, value) in document {
|
|
||||||
index.documents_fields.put_document_field(writer, id, key, &value)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
@ -5,6 +5,7 @@ pub type SResult<T> = Result<T, Error>;
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
|
FieldNameNotFound(String),
|
||||||
MaxFieldsLimitExceeded,
|
MaxFieldsLimitExceeded,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -12,6 +13,7 @@ impl fmt::Display for Error {
|
|||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
use self::Error::*;
|
use self::Error::*;
|
||||||
match self {
|
match self {
|
||||||
|
FieldNameNotFound(field) => write!(f, "The field {} doesn't exist", field),
|
||||||
MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"),
|
MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,9 +3,8 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{SResult, SchemaAttr};
|
use crate::{SResult, FieldId};
|
||||||
|
|
||||||
pub type FieldId = SchemaAttr;
|
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
pub struct FieldsMap {
|
pub struct FieldsMap {
|
||||||
@ -43,13 +42,13 @@ impl FieldsMap {
|
|||||||
self.name_map.remove(&name);
|
self.name_map.remove(&name);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||||
let name = name.into();
|
let name = name.into();
|
||||||
self.name_map.get(&name)
|
self.name_map.get(&name).map(|s| *s)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
|
pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
|
||||||
self.id_map.get(&id.into())
|
self.id_map.get(&id.into()).map(|s| s.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
|
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
|
||||||
@ -74,14 +73,14 @@ mod tests {
|
|||||||
assert_eq!(fields_map.insert("id").unwrap(), 0.into());
|
assert_eq!(fields_map.insert("id").unwrap(), 0.into());
|
||||||
assert_eq!(fields_map.insert("title").unwrap(), 1.into());
|
assert_eq!(fields_map.insert("title").unwrap(), 1.into());
|
||||||
assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into());
|
assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into());
|
||||||
assert_eq!(fields_map.get_id("id"), Some(&0.into()));
|
assert_eq!(fields_map.get_id("id"), Some(0.into()));
|
||||||
assert_eq!(fields_map.get_id("title"), Some(&1.into()));
|
assert_eq!(fields_map.get_id("title"), Some(1.into()));
|
||||||
assert_eq!(fields_map.get_id("descritpion"), Some(&2.into()));
|
assert_eq!(fields_map.get_id("descritpion"), Some(2.into()));
|
||||||
assert_eq!(fields_map.get_id("date"), None);
|
assert_eq!(fields_map.get_id("date"), None);
|
||||||
assert_eq!(fields_map.len(), 3);
|
assert_eq!(fields_map.len(), 3);
|
||||||
assert_eq!(fields_map.get_name(0), Some(&"id".to_owned()));
|
assert_eq!(fields_map.get_name(0), Some("id".to_owned()));
|
||||||
assert_eq!(fields_map.get_name(1), Some(&"title".to_owned()));
|
assert_eq!(fields_map.get_name(1), Some("title".to_owned()));
|
||||||
assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned()));
|
assert_eq!(fields_map.get_name(2), Some("descritpion".to_owned()));
|
||||||
assert_eq!(fields_map.get_name(4), None);
|
assert_eq!(fields_map.get_name(4), None);
|
||||||
fields_map.remove("title");
|
fields_map.remove("title");
|
||||||
assert_eq!(fields_map.get_id("title"), None);
|
assert_eq!(fields_map.get_id("title"), None);
|
||||||
|
@ -3,48 +3,88 @@ mod fields_map;
|
|||||||
mod schema;
|
mod schema;
|
||||||
|
|
||||||
pub use error::{Error, SResult};
|
pub use error::{Error, SResult};
|
||||||
pub use fields_map::{FieldsMap, FieldId};
|
pub use fields_map::FieldsMap;
|
||||||
pub use schema::{Schema, IndexedPos};
|
pub use schema::Schema;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
pub struct SchemaAttr(pub u16);
|
pub struct IndexedPos(pub u16);
|
||||||
|
|
||||||
impl SchemaAttr {
|
impl IndexedPos {
|
||||||
pub const fn new(value: u16) -> SchemaAttr {
|
pub const fn new(value: u16) -> IndexedPos {
|
||||||
SchemaAttr(value)
|
IndexedPos(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const fn min() -> SchemaAttr {
|
pub const fn min() -> IndexedPos {
|
||||||
SchemaAttr(u16::min_value())
|
IndexedPos(u16::min_value())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const fn max() -> SchemaAttr {
|
pub const fn max() -> IndexedPos {
|
||||||
SchemaAttr(u16::max_value())
|
IndexedPos(u16::max_value())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(self) -> SResult<SchemaAttr> {
|
pub fn next(self) -> SResult<IndexedPos> {
|
||||||
self.0.checked_add(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded)
|
self.0.checked_add(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn prev(self) -> SResult<SchemaAttr> {
|
pub fn prev(self) -> SResult<IndexedPos> {
|
||||||
self.0.checked_sub(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded)
|
self.0.checked_sub(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<u16> for SchemaAttr {
|
impl From<u16> for IndexedPos {
|
||||||
fn from(value: u16) -> SchemaAttr {
|
fn from(value: u16) -> IndexedPos {
|
||||||
SchemaAttr(value)
|
IndexedPos(value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Into<u16> for SchemaAttr {
|
impl Into<u16> for IndexedPos {
|
||||||
fn into(self) -> u16 {
|
fn into(self) -> u16 {
|
||||||
self.0
|
self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
|
pub struct FieldId(pub u16);
|
||||||
|
|
||||||
|
impl FieldId {
|
||||||
|
pub const fn new(value: u16) -> FieldId {
|
||||||
|
FieldId(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn min() -> FieldId {
|
||||||
|
FieldId(u16::min_value())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn max() -> FieldId {
|
||||||
|
FieldId(u16::max_value())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next(self) -> SResult<FieldId> {
|
||||||
|
self.0.checked_add(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prev(self) -> SResult<FieldId> {
|
||||||
|
self.0.checked_sub(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<u16> for FieldId {
|
||||||
|
fn from(value: u16) -> FieldId {
|
||||||
|
FieldId(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Into<u16> for FieldId {
|
||||||
|
fn into(self) -> u16 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// use std::collections::{BTreeMap, HashMap};
|
// use std::collections::{BTreeMap, HashMap};
|
||||||
// use std::ops::BitOr;
|
// use std::ops::BitOr;
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
use crate::{FieldsMap, FieldId, SResult, SchemaAttr};
|
use serde::{Serialize, Deserialize};
|
||||||
|
|
||||||
pub type IndexedPos = SchemaAttr;
|
use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos};
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
||||||
pub struct Schema {
|
pub struct Schema {
|
||||||
fields_map: FieldsMap,
|
fields_map: FieldsMap,
|
||||||
|
|
||||||
@ -30,11 +30,21 @@ impl Schema {
|
|||||||
self.fields_map.get_name(self.identifier).unwrap().to_string()
|
self.fields_map.get_name(self.identifier).unwrap().to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
pub fn set_identifier(&mut self, id: String) -> SResult<()> {
|
||||||
|
match self.get_id(id.clone()) {
|
||||||
|
Some(id) => {
|
||||||
|
self.identifier = id;
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
None => Err(Error::FieldNameNotFound(id))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||||
self.fields_map.get_id(name)
|
self.fields_map.get_id(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
|
pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
|
||||||
self.fields_map.get_name(id)
|
self.fields_map.get_name(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,7 +62,7 @@ impl Schema {
|
|||||||
pub fn get_or_create<S: Into<String> + std::clone::Clone>(&mut self, name: S) -> SResult<FieldId> {
|
pub fn get_or_create<S: Into<String> + std::clone::Clone>(&mut self, name: S) -> SResult<FieldId> {
|
||||||
match self.fields_map.get_id(name.clone()) {
|
match self.fields_map.get_id(name.clone()) {
|
||||||
Some(id) => {
|
Some(id) => {
|
||||||
Ok(*id)
|
Ok(id)
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
self.set_indexed(name.clone())?;
|
self.set_indexed(name.clone())?;
|
||||||
@ -61,6 +71,30 @@ impl Schema {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_ranked(&self) -> HashSet<FieldId> {
|
||||||
|
self.ranked.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_ranked_name(&self) -> HashSet<String> {
|
||||||
|
self.ranked.iter().filter_map(|a| self.get_name(*a)).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_displayed(&self) -> HashSet<FieldId> {
|
||||||
|
self.displayed.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_displayed_name(&self) -> HashSet<String> {
|
||||||
|
self.displayed.iter().filter_map(|a| self.get_name(*a)).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_indexed(&self) -> Vec<FieldId> {
|
||||||
|
self.indexed.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_indexed_name(&self) -> Vec<String> {
|
||||||
|
self.indexed.iter().filter_map(|a| self.get_name(*a)).collect()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn set_ranked<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
|
pub fn set_ranked<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
|
||||||
let id = self.fields_map.insert(name.into())?;
|
let id = self.fields_map.insert(name.into())?;
|
||||||
self.ranked.insert(id);
|
self.ranked.insert(id);
|
||||||
@ -81,23 +115,42 @@ impl Schema {
|
|||||||
Ok((id, pos.into()))
|
Ok((id, pos.into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
pub fn remove_ranked<S: Into<String>>(&mut self, name: S) {
|
||||||
|
if let Some(id) = self.fields_map.get_id(name.into()) {
|
||||||
|
self.ranked.remove(&id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn remove_displayed<S: Into<String>>(&mut self, name: S) {
|
||||||
|
if let Some(id) = self.fields_map.get_id(name.into()) {
|
||||||
|
self.displayed.remove(&id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn remove_indexed<S: Into<String>>(&mut self, name: S) {
|
||||||
|
if let Some(id) = self.fields_map.get_id(name.into()) {
|
||||||
|
self.indexed_map.remove(&id);
|
||||||
|
self.indexed.retain(|x| *x != id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||||
match self.fields_map.get_id(name.into()) {
|
match self.fields_map.get_id(name.into()) {
|
||||||
Some(id) => self.ranked.get(id),
|
Some(id) => self.ranked.get(&id).map(|s| *s),
|
||||||
None => None,
|
None => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||||
match self.fields_map.get_id(name.into()) {
|
match self.fields_map.get_id(name.into()) {
|
||||||
Some(id) => self.displayed.get(id),
|
Some(id) => self.displayed.get(&id).map(|s| *s),
|
||||||
None => None,
|
None => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<&IndexedPos> {
|
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<IndexedPos> {
|
||||||
match self.fields_map.get_id(name.into()) {
|
match self.fields_map.get_id(name.into()) {
|
||||||
Some(id) => self.indexed_map.get(id),
|
Some(id) => self.indexed_map.get(&id).map(|s| *s),
|
||||||
None => None,
|
None => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ pub struct DocIndex {
|
|||||||
|
|
||||||
/// The attribute in the document where the word was found
|
/// The attribute in the document where the word was found
|
||||||
/// along with the index in it.
|
/// along with the index in it.
|
||||||
|
/// Is an IndexedPos and not FieldId. Must be convert each time.
|
||||||
pub attribute: u16,
|
pub attribute: u16,
|
||||||
pub word_index: u16,
|
pub word_index: u16,
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user