mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-31 15:31:53 +08:00
update vector extractor
This commit is contained in:
parent
12323d610e
commit
4b64c33aa2
@ -7,7 +7,8 @@ use serde_json::{from_slice, Value};
|
|||||||
|
|
||||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
|
use crate::update::index_documents::helpers::try_split_at;
|
||||||
|
use crate::{DocumentId, FieldId, InternalError, Result, VectorOrArrayOfVectors};
|
||||||
|
|
||||||
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
||||||
///
|
///
|
||||||
@ -28,15 +29,17 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
let mut cursor = obkv_documents.into_cursor()?;
|
let mut cursor = obkv_documents.into_cursor()?;
|
||||||
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
|
// this must always be serialized as (docid, external_docid);
|
||||||
|
let (docid_bytes, external_id_bytes) =
|
||||||
|
try_split_at(key, std::mem::size_of::<DocumentId>()).unwrap();
|
||||||
|
debug_assert!(std::str::from_utf8(external_id_bytes).is_ok());
|
||||||
|
|
||||||
let obkv = obkv::KvReader::new(value);
|
let obkv = obkv::KvReader::new(value);
|
||||||
|
|
||||||
// since we only needs the primary key when we throw an error we create this getter to
|
// since we only needs the primary key when we throw an error we create this getter to
|
||||||
// lazily get it when needed
|
// lazily get it when needed
|
||||||
let document_id = || -> Value {
|
let document_id = || -> Value { std::str::from_utf8(external_id_bytes).unwrap().into() };
|
||||||
let document_id = obkv.get(primary_key_id).unwrap();
|
|
||||||
from_slice(document_id).unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
// first we retrieve the _vectors field
|
// first we retrieve the _vectors field
|
||||||
if let Some(vectors) = obkv.get(vectors_fid) {
|
if let Some(vectors) = obkv.get(vectors_fid) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user