mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
Remove the serde Indexer
This commit is contained in:
parent
615825b9fd
commit
5e063da14f
@ -34,7 +34,7 @@ pest_derive = "2.0"
|
||||
regex = "1.3.6"
|
||||
sdset = "0.4.0"
|
||||
serde = { version = "1.0.105", features = ["derive"] }
|
||||
serde_json = "1.0.50"
|
||||
serde_json = { version = "1.0.50", features = ["preserve_order"] }
|
||||
siphasher = "0.3.2"
|
||||
slice-group-by = "0.2.6"
|
||||
unicase = "2.6.0"
|
||||
|
@ -1,362 +0,0 @@
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use serde::ser;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::{ConvertToString, SerializerError};
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::DocumentId;
|
||||
|
||||
pub struct Indexer<'a> {
|
||||
pub pos: IndexedPos,
|
||||
pub indexer: &'a mut RawIndexer,
|
||||
pub document_id: DocumentId,
|
||||
}
|
||||
|
||||
impl<'a> ser::Serializer for Indexer<'a> {
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
type SerializeSeq = SeqIndexer<'a>;
|
||||
type SerializeTuple = TupleIndexer<'a>;
|
||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeMap = MapIndexer<'a>;
|
||||
type SerializeStruct = StructIndexer<'a>;
|
||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
|
||||
fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.serialize_str(&text)
|
||||
}
|
||||
|
||||
fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
|
||||
let number_of_words = self
|
||||
.indexer
|
||||
.index_text(self.document_id, self.pos, text);
|
||||
Ok(Some(number_of_words))
|
||||
}
|
||||
|
||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnindexableType { type_name: "&[u8]" })
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
let number_of_words = self
|
||||
.indexer
|
||||
.index_text(self.document_id, self.pos, &text);
|
||||
Ok(Some(number_of_words))
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
|
||||
fn serialize_newtype_variant<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "newtype variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
let indexer = SeqIndexer {
|
||||
pos: self.pos,
|
||||
document_id: self.document_id,
|
||||
indexer: self.indexer,
|
||||
texts: Vec::new(),
|
||||
};
|
||||
|
||||
Ok(indexer)
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
let indexer = TupleIndexer {
|
||||
pos: self.pos,
|
||||
document_id: self.document_id,
|
||||
indexer: self.indexer,
|
||||
texts: Vec::new(),
|
||||
};
|
||||
|
||||
Ok(indexer)
|
||||
}
|
||||
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "tuple struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "tuple variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
let indexer = MapIndexer {
|
||||
pos: self.pos,
|
||||
document_id: self.document_id,
|
||||
indexer: self.indexer,
|
||||
texts: Vec::new(),
|
||||
};
|
||||
|
||||
Ok(indexer)
|
||||
}
|
||||
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||
let indexer = StructIndexer {
|
||||
pos: self.pos,
|
||||
document_id: self.document_id,
|
||||
indexer: self.indexer,
|
||||
texts: Vec::new(),
|
||||
};
|
||||
|
||||
Ok(indexer)
|
||||
}
|
||||
|
||||
fn serialize_struct_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "struct variant",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SeqIndexer<'a> {
|
||||
pos: IndexedPos,
|
||||
document_id: DocumentId,
|
||||
indexer: &'a mut RawIndexer,
|
||||
texts: Vec<String>,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.pos, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MapIndexer<'a> {
|
||||
pos: IndexedPos,
|
||||
document_id: DocumentId,
|
||||
indexer: &'a mut RawIndexer,
|
||||
texts: Vec<String>,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeMap for MapIndexer<'a> {
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = key.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.pos, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StructIndexer<'a> {
|
||||
pos: IndexedPos,
|
||||
document_id: DocumentId,
|
||||
indexer: &'a mut RawIndexer,
|
||||
texts: Vec<String>,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeStruct for StructIndexer<'a> {
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_field<T: ?Sized>(
|
||||
&mut self,
|
||||
key: &'static str,
|
||||
value: &T,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let key_text = key.to_owned();
|
||||
let value_text = value.serialize(ConvertToString)?;
|
||||
self.texts.push(key_text);
|
||||
self.texts.push(value_text);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.pos, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TupleIndexer<'a> {
|
||||
pos: IndexedPos,
|
||||
document_id: DocumentId,
|
||||
indexer: &'a mut RawIndexer,
|
||||
texts: Vec<String>,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.pos, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
@ -12,13 +12,11 @@ mod convert_to_number;
|
||||
mod convert_to_string;
|
||||
mod deserializer;
|
||||
mod extract_document_id;
|
||||
mod indexer;
|
||||
|
||||
pub use self::convert_to_number::ConvertToNumber;
|
||||
pub use self::convert_to_string::ConvertToString;
|
||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
|
||||
pub use self::indexer::Indexer;
|
||||
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write as _;
|
||||
use std::fmt;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use indexmap::IndexMap;
|
||||
@ -6,12 +8,15 @@ use sdset::{duo::Union, SetOperation};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use meilisearch_types::DocumentId;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::facets;
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::{extract_document_id, Deserializer};
|
||||
use crate::serde::{ConvertToNumber, Indexer};
|
||||
use crate::serde::ConvertToNumber;
|
||||
use crate::store;
|
||||
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
||||
use crate::{Error, MResult, RankedMap};
|
||||
@ -106,6 +111,69 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
// TODO move this helper functions elsewhere
|
||||
/// Returns the number of words indexed or `None` if the type
|
||||
fn index_value(
|
||||
indexer: &mut RawIndexer,
|
||||
document_id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
value: &Value,
|
||||
) -> Option<usize>
|
||||
{
|
||||
fn value_to_string(string: &mut String, value: &Value) {
|
||||
match value {
|
||||
Value::Null => (),
|
||||
Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); },
|
||||
Value::Number(number) => { let _ = write!(string, "{}", &number); },
|
||||
Value::String(text) => string.push_str(&text),
|
||||
Value::Array(array) => {
|
||||
for value in array {
|
||||
value_to_string(string, value);
|
||||
let _ = string.write_str(". ");
|
||||
}
|
||||
},
|
||||
Value::Object(object) => {
|
||||
for (key, value) in object {
|
||||
string.push_str(key);
|
||||
let _ = string.write_str(". ");
|
||||
value_to_string(string, value);
|
||||
let _ = string.write_str(". ");
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
match value {
|
||||
Value::Null => None,
|
||||
Value::Bool(boolean) => {
|
||||
let text = boolean.to_string();
|
||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
||||
Some(number_of_words)
|
||||
},
|
||||
Value::Number(number) => {
|
||||
let text = number.to_string();
|
||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
||||
Some(number_of_words)
|
||||
},
|
||||
Value::String(string) => {
|
||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &string);
|
||||
Some(number_of_words)
|
||||
},
|
||||
Value::Array(_) => {
|
||||
let mut text = String::new();
|
||||
value_to_string(&mut text, value);
|
||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
||||
Some(number_of_words)
|
||||
},
|
||||
Value::Object(_) => {
|
||||
let mut text = String::new();
|
||||
value_to_string(&mut text, value);
|
||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
||||
Some(number_of_words)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
index: &store::Index,
|
||||
@ -183,8 +251,8 @@ pub fn apply_addition<'a, 'b>(
|
||||
index.documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
|
||||
|
||||
if let Some(indexed_pos) = schema.is_indexed(field_id) {
|
||||
let indexer = Indexer { pos: *indexed_pos, indexer: &mut indexer, document_id };
|
||||
if let Some(number_of_words) = value.serialize(indexer)? {
|
||||
let number_of_words = index_value(&mut indexer, document_id, *indexed_pos, &value);
|
||||
if let Some(number_of_words) = number_of_words {
|
||||
index.documents_fields_counts.put_document_field_count(
|
||||
writer,
|
||||
document_id,
|
||||
@ -280,8 +348,8 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
index.documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
|
||||
|
||||
if let Some(indexed_pos) = schema.is_indexed(field_id) {
|
||||
let indexer = Indexer { pos: *indexed_pos, indexer: &mut indexer, document_id };
|
||||
if let Some(number_of_words) = value.serialize(indexer)? {
|
||||
let number_of_words = index_value(&mut indexer, document_id, *indexed_pos, &value);
|
||||
if let Some(number_of_words) = number_of_words {
|
||||
index.documents_fields_counts.put_document_field_count(
|
||||
writer,
|
||||
document_id,
|
||||
|
Loading…
Reference in New Issue
Block a user