mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 03:25:06 +08:00
Add vectors field and geo field to document trait
This commit is contained in:
parent
73e29ee155
commit
c278024709
@ -1,9 +1,9 @@
|
|||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
use raw_collections::RawMap;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::document_change::{Entry, Versions};
|
|
||||||
use super::{KvReaderFieldId, KvWriterFieldId};
|
use super::{KvReaderFieldId, KvWriterFieldId};
|
||||||
use crate::documents::FieldIdMapper;
|
use crate::documents::FieldIdMapper;
|
||||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||||
@ -17,11 +17,26 @@ pub trait Document<'doc> {
|
|||||||
/// Iterate over all **top-level** fields of the document, returning their name and raw JSON value.
|
/// Iterate over all **top-level** fields of the document, returning their name and raw JSON value.
|
||||||
///
|
///
|
||||||
/// - The returned values *may* contain nested fields.
|
/// - The returned values *may* contain nested fields.
|
||||||
/// - The `_vectors` field is **ignored** by this method, meaning it is **not returned** by this method.
|
/// - The `_vectors` and `_geo` fields are **ignored** by this method, meaning they are **not returned** by this method.
|
||||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>>;
|
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>>;
|
||||||
|
|
||||||
|
/// Returns the unparsed value of the `_vectors` field from the document data.
|
||||||
|
///
|
||||||
|
/// This field alone is insufficient to retrieve vectors, as they may be stored in a dedicated location in the database.
|
||||||
|
/// Use a [`super::vector_document::VectorDocument`] to access the vector.
|
||||||
|
///
|
||||||
|
/// This method is meant as a convenience for implementors of [`super::vector_document::VectorDocument`].
|
||||||
|
fn vectors_field(&self) -> Result<Option<&'doc RawValue>>;
|
||||||
|
|
||||||
|
/// Returns the unparsed value of the `_geo` field from the document data.
|
||||||
|
///
|
||||||
|
/// This field alone is insufficient to retrieve geo data, as they may be stored in a dedicated location in the database.
|
||||||
|
/// Use a [`super::geo_document::GeoDocument`] to access the vector.
|
||||||
|
///
|
||||||
|
/// This method is meant as a convenience for implementors of [`super::geo_document::GeoDocument`].
|
||||||
|
fn geo_field(&self) -> Result<Option<&'doc RawValue>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
|
||||||
pub struct DocumentFromDb<'t, Mapper: FieldIdMapper>
|
pub struct DocumentFromDb<'t, Mapper: FieldIdMapper>
|
||||||
where
|
where
|
||||||
Mapper: FieldIdMapper,
|
Mapper: FieldIdMapper,
|
||||||
@ -30,6 +45,14 @@ where
|
|||||||
content: &'t KvReaderFieldId,
|
content: &'t KvReaderFieldId,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'t, Mapper: FieldIdMapper> Clone for DocumentFromDb<'t, Mapper> {
|
||||||
|
#[inline]
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
*self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'t, Mapper: FieldIdMapper> Copy for DocumentFromDb<'t, Mapper> {}
|
||||||
|
|
||||||
impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
|
impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
|
||||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'t str, &'t RawValue)>> {
|
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'t str, &'t RawValue)>> {
|
||||||
let mut it = self.content.iter();
|
let mut it = self.content.iter();
|
||||||
@ -53,6 +76,14 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
|
|||||||
Some(res)
|
Some(res)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn vectors_field(&self) -> Result<Option<&'t RawValue>> {
|
||||||
|
self.field(RESERVED_VECTORS_FIELD_NAME)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn geo_field(&self) -> Result<Option<&'t RawValue>> {
|
||||||
|
self.field("_geo")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
|
impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
|
||||||
@ -66,6 +97,14 @@ impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
|
|||||||
reader.map(|reader| Self { fields_ids_map: db_fields_ids_map, content: reader })
|
reader.map(|reader| Self { fields_ids_map: db_fields_ids_map, content: reader })
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn field(&self, name: &str) -> Result<Option<&'t RawValue>> {
|
||||||
|
let Some(fid) = self.fields_ids_map.id(name) else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
let Some(value) = self.content.get(fid) else { return Ok(None) };
|
||||||
|
Ok(Some(serde_json::from_slice(value).map_err(InternalError::SerdeJson)?))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
@ -81,29 +120,15 @@ impl<'doc> DocumentFromVersions<'doc> {
|
|||||||
|
|
||||||
impl<'doc> Document<'doc> for DocumentFromVersions<'doc> {
|
impl<'doc> Document<'doc> for DocumentFromVersions<'doc> {
|
||||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>> {
|
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>> {
|
||||||
match &self.versions {
|
self.versions.iter_top_level_fields().map(Ok)
|
||||||
Versions::Single(version) => either::Either::Left(version.iter_top_level_fields()),
|
}
|
||||||
Versions::Multiple(versions) => {
|
|
||||||
let mut seen_fields = BTreeSet::new();
|
|
||||||
let mut it = versions.iter().rev().flat_map(|version| version.iter()).copied();
|
|
||||||
either::Either::Right(std::iter::from_fn(move || loop {
|
|
||||||
let (name, value) = it.next()?;
|
|
||||||
|
|
||||||
if seen_fields.contains(name) {
|
fn vectors_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||||
continue;
|
Ok(self.versions.vectors_field())
|
||||||
}
|
}
|
||||||
seen_fields.insert(name);
|
|
||||||
return Some(Ok((name, value)));
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// used in document from payload
|
fn geo_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||||
impl<'doc> Document<'doc> for &'doc [Entry<'doc>] {
|
Ok(self.versions.geo_field())
|
||||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<Entry<'doc>>> {
|
|
||||||
self.iter().copied().map(|(k, v)| Ok((k, v)))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,6 +189,26 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn vectors_field(&self) -> Result<Option<&'d RawValue>> {
|
||||||
|
if let Some(vectors) = self.new_doc.vectors_field()? {
|
||||||
|
return Ok(Some(vectors));
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(db) = self.db else { return Ok(None) };
|
||||||
|
|
||||||
|
db.vectors_field()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn geo_field(&self) -> Result<Option<&'d RawValue>> {
|
||||||
|
if let Some(geo) = self.new_doc.geo_field()? {
|
||||||
|
return Ok(Some(geo));
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(db) = self.db else { return Ok(None) };
|
||||||
|
|
||||||
|
db.geo_field()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc, D> Document<'doc> for &D
|
impl<'doc, D> Document<'doc> for &D
|
||||||
@ -173,6 +218,14 @@ where
|
|||||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>> {
|
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>> {
|
||||||
D::iter_top_level_fields(self)
|
D::iter_top_level_fields(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn vectors_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||||
|
D::vectors_field(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn geo_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||||
|
D::geo_field(self)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Turn this document into an obkv, whose fields are indexed by the provided `FieldIdMapper`.
|
/// Turn this document into an obkv, whose fields are indexed by the provided `FieldIdMapper`.
|
||||||
@ -245,3 +298,52 @@ where
|
|||||||
writer.finish().unwrap();
|
writer.finish().unwrap();
|
||||||
Ok(KvReaderFieldId::from_slice(document_buffer))
|
Ok(KvReaderFieldId::from_slice(document_buffer))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub type Entry<'doc> = (&'doc str, &'doc RawValue);
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct Versions<'doc> {
|
||||||
|
data: &'doc [Entry<'doc>],
|
||||||
|
vectors: Option<&'doc RawValue>,
|
||||||
|
geo: Option<&'doc RawValue>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'doc> Versions<'doc> {
|
||||||
|
pub fn multiple(
|
||||||
|
mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
|
||||||
|
) -> Result<Option<Self>> {
|
||||||
|
let Some(data) = versions.next() else { return Ok(None) };
|
||||||
|
let mut data = data?;
|
||||||
|
for future_version in versions {
|
||||||
|
let future_version = future_version?;
|
||||||
|
for (field, value) in future_version {
|
||||||
|
data.insert(field, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Some(Self::single(data)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn single(version: RawMap<'doc>) -> Self {
|
||||||
|
let vectors_id = version.get_index(RESERVED_VECTORS_FIELD_NAME);
|
||||||
|
let geo_id = version.get_index("_geo");
|
||||||
|
let mut data = version.into_vec();
|
||||||
|
let geo = geo_id.map(|geo_id| data.remove(geo_id).1);
|
||||||
|
let vectors = vectors_id.map(|vectors_id| data.remove(vectors_id).1);
|
||||||
|
|
||||||
|
let data = data.into_bump_slice();
|
||||||
|
|
||||||
|
Self { data, geo, vectors }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn iter_top_level_fields(&self) -> impl Iterator<Item = Entry<'doc>> {
|
||||||
|
self.data.iter().copied()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn vectors_field(&self) -> Option<&'doc RawValue> {
|
||||||
|
self.vectors
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn geo_field(&self) -> Option<&'doc RawValue> {
|
||||||
|
self.geo
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use serde_json::value::RawValue;
|
|
||||||
|
|
||||||
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument};
|
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument};
|
||||||
use crate::documents::FieldIdMapper;
|
use crate::documents::FieldIdMapper;
|
||||||
@ -138,11 +137,3 @@ impl<'doc> Update<'doc> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Entry<'doc> = (&'doc str, &'doc RawValue);
|
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
|
||||||
pub enum Versions<'doc> {
|
|
||||||
Single(&'doc [Entry<'doc>]),
|
|
||||||
Multiple(&'doc [&'doc [Entry<'doc>]]),
|
|
||||||
}
|
|
||||||
|
@ -2,7 +2,6 @@ use bumpalo::collections::CollectIn;
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use rayon::iter::IntoParallelIterator;
|
|
||||||
use rayon::slice::ParallelSlice;
|
use rayon::slice::ParallelSlice;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
use IndexDocumentsMethod as Idm;
|
use IndexDocumentsMethod as Idm;
|
||||||
@ -10,8 +9,7 @@ use IndexDocumentsMethod as Idm;
|
|||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
use super::document_changes::{DocumentChangeContext, DocumentChanges, MostlySend};
|
use super::document_changes::{DocumentChangeContext, DocumentChanges, MostlySend};
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::update::new::document::DocumentFromVersions;
|
use crate::update::new::document::{DocumentFromVersions, Versions};
|
||||||
use crate::update::new::document_change::Versions;
|
|
||||||
use crate::update::new::{Deletion, Insertion, Update};
|
use crate::update::new::{Deletion, Insertion, Update};
|
||||||
use crate::update::{AvailableIds, IndexDocumentsMethod};
|
use crate::update::{AvailableIds, IndexDocumentsMethod};
|
||||||
use crate::{DocumentId, Error, FieldsIdsMap, Index, Result, UserError};
|
use crate::{DocumentId, Error, FieldsIdsMap, Index, Result, UserError};
|
||||||
@ -291,8 +289,7 @@ impl MergeChanges for MergeDocumentForReplacement {
|
|||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
.map_err(UserError::SerdeJson)?;
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
let document = document.into_bump_slice();
|
let document = DocumentFromVersions::new(Versions::single(document));
|
||||||
let document = DocumentFromVersions::new(Versions::Single(document));
|
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
||||||
@ -365,9 +362,22 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut versions = bumpalo::collections::Vec::with_capacity_in(operations.len(), doc_alloc);
|
let versions = match operations {
|
||||||
|
[single] => {
|
||||||
|
let DocumentOffset { content } = match single {
|
||||||
|
InnerDocOp::Addition(offset) => offset,
|
||||||
|
InnerDocOp::Deletion => {
|
||||||
|
unreachable!("Deletion in document operations")
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
for operation in operations {
|
Some(Versions::single(document))
|
||||||
|
}
|
||||||
|
operations => {
|
||||||
|
let versions = operations.iter().map(|operation| {
|
||||||
let DocumentOffset { content } = match operation {
|
let DocumentOffset { content } = match operation {
|
||||||
InnerDocOp::Addition(offset) => offset,
|
InnerDocOp::Addition(offset) => offset,
|
||||||
InnerDocOp::Deletion => {
|
InnerDocOp::Deletion => {
|
||||||
@ -378,17 +388,14 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
.map_err(UserError::SerdeJson)?;
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
Ok(document)
|
||||||
let document = document.into_bump_slice();
|
});
|
||||||
versions.push(document);
|
Versions::multiple(versions)?
|
||||||
}
|
}
|
||||||
|
|
||||||
let versions = versions.into_bump_slice();
|
|
||||||
let versions = match versions {
|
|
||||||
[single] => Versions::Single(single),
|
|
||||||
versions => Versions::Multiple(versions),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let Some(versions) = versions else { return Ok(None) };
|
||||||
|
|
||||||
let document = DocumentFromVersions::new(versions);
|
let document = DocumentFromVersions::new(versions);
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
|
Loading…
Reference in New Issue
Block a user