mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Generate internal ids only when needed
This commit is contained in:
parent
ec31e33fdc
commit
bce7ab614a
@ -1752,7 +1752,7 @@ pub(crate) mod tests {
|
|||||||
let embedders = EmbeddingConfigs::default();
|
let embedders = EmbeddingConfigs::default();
|
||||||
/// TODO: fetch configs from the index
|
/// TODO: fetch configs from the index
|
||||||
let mut indexer =
|
let mut indexer =
|
||||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
indexer::DocumentOperation::new(self.index_documents_config.update_method);
|
||||||
indexer.add_documents(&documents).unwrap();
|
indexer.add_documents(&documents).unwrap();
|
||||||
|
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
@ -1832,7 +1832,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
let embedders = EmbeddingConfigs::default();
|
let embedders = EmbeddingConfigs::default();
|
||||||
let mut indexer =
|
let mut indexer =
|
||||||
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
indexer::DocumentOperation::new(self.index_documents_config.update_method);
|
||||||
let external_document_ids: Vec<_> =
|
let external_document_ids: Vec<_> =
|
||||||
external_document_ids.iter().map(AsRef::as_ref).collect();
|
external_document_ids.iter().map(AsRef::as_ref).collect();
|
||||||
indexer.delete_documents(external_document_ids.as_slice());
|
indexer.delete_documents(external_document_ids.as_slice());
|
||||||
@ -2429,7 +2429,16 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
// And adding lots of documents afterwards instead of just one.
|
// And adding lots of documents afterwards instead of just one.
|
||||||
// These extra subtests don't add much, but it's better than nothing.
|
// These extra subtests don't add much, but it's better than nothing.
|
||||||
index.add_documents(documents!([{ "primary_key": 38 }, { "primary_key": 39 }, { "primary_key": 41 }, { "primary_key": 40 }, { "primary_key": 41 }, { "primary_key": 42 }])).unwrap();
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "primary_key": 38 },
|
||||||
|
{ "primary_key": 39 },
|
||||||
|
{ "primary_key": 41 },
|
||||||
|
{ "primary_key": 40 },
|
||||||
|
{ "primary_key": 41 },
|
||||||
|
{ "primary_key": 42 },
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
||||||
db_snap!(index, external_documents_ids, 7, @r###"
|
db_snap!(index, external_documents_ids, 7, @r###"
|
||||||
|
@ -57,7 +57,9 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
let mut primary_key = None;
|
let mut primary_key = None;
|
||||||
|
|
||||||
for operation in operations {
|
for operation in operations {
|
||||||
let (bytes, document_count, result) = match operation {
|
let mut bytes = 0;
|
||||||
|
let mut document_count = 0;
|
||||||
|
let result = match operation {
|
||||||
Payload::Addition(payload) => extract_addition_payload_changes(
|
Payload::Addition(payload) => extract_addition_payload_changes(
|
||||||
indexer,
|
indexer,
|
||||||
index,
|
index,
|
||||||
@ -66,6 +68,8 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
&mut primary_key,
|
&mut primary_key,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
&mut available_docids,
|
&mut available_docids,
|
||||||
|
&mut bytes,
|
||||||
|
&mut document_count,
|
||||||
&docids_version_offsets,
|
&docids_version_offsets,
|
||||||
method,
|
method,
|
||||||
payload,
|
payload,
|
||||||
@ -74,6 +78,7 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
index,
|
index,
|
||||||
rtxn,
|
rtxn,
|
||||||
&mut available_docids,
|
&mut available_docids,
|
||||||
|
&mut document_count,
|
||||||
&docids_version_offsets,
|
&docids_version_offsets,
|
||||||
method,
|
method,
|
||||||
to_delete,
|
to_delete,
|
||||||
@ -122,67 +127,38 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
primary_key: &mut Option<PrimaryKey<'r>>,
|
primary_key: &mut Option<PrimaryKey<'r>>,
|
||||||
new_fields_ids_map: &mut FieldsIdsMap,
|
new_fields_ids_map: &mut FieldsIdsMap,
|
||||||
available_docids: &mut AvailableIds,
|
available_docids: &mut AvailableIds,
|
||||||
|
bytes: &mut u64,
|
||||||
|
number_of_documents: &mut u64,
|
||||||
main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>,
|
main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>,
|
||||||
method: MergeMethod,
|
method: MergeMethod,
|
||||||
payload: &'pl [u8],
|
payload: &'pl [u8],
|
||||||
) -> (u64, u64, Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>>) {
|
) -> Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>> {
|
||||||
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
|
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
|
||||||
|
|
||||||
/// TODO manage the error
|
/// TODO manage the error
|
||||||
let mut previous_offset = 0;
|
let mut previous_offset = 0;
|
||||||
let mut iter = Deserializer::from_slice(payload).into_iter::<&RawValue>();
|
let mut iter = Deserializer::from_slice(payload).into_iter::<&RawValue>();
|
||||||
loop {
|
while let Some(doc) = iter.next().transpose().map_err(InternalError::SerdeJson)? {
|
||||||
let optdoc = match iter.next().transpose() {
|
*bytes = previous_offset as u64;
|
||||||
Ok(optdoc) => optdoc,
|
*number_of_documents = new_docids_version_offsets.len() as u64;
|
||||||
Err(e) => {
|
|
||||||
return (
|
|
||||||
payload.len() as u64,
|
|
||||||
new_docids_version_offsets.len() as u64,
|
|
||||||
Err(InternalError::SerdeJson(e).into()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Only guess the primary key if it is the first document
|
// Only guess the primary key if it is the first document
|
||||||
let retrieved_primary_key = if previous_offset == 0 {
|
let retrieved_primary_key = if previous_offset == 0 {
|
||||||
let optdoc = match optdoc {
|
let doc =
|
||||||
Some(doc) => match RawMap::from_raw_value(doc, indexer) {
|
RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
|
||||||
Ok(docmap) => Some(docmap),
|
|
||||||
Err(error) => {
|
|
||||||
return (
|
|
||||||
payload.len() as u64,
|
|
||||||
new_docids_version_offsets.len() as u64,
|
|
||||||
Err(Error::UserError(UserError::SerdeJson(error))),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = retrieve_or_guess_primary_key(
|
let result = retrieve_or_guess_primary_key(
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
primary_key_from_op,
|
primary_key_from_op,
|
||||||
optdoc,
|
doc,
|
||||||
);
|
);
|
||||||
|
|
||||||
let (pk, _has_been_changed) = match result {
|
let (pk, _has_been_changed) = match result {
|
||||||
Ok(Ok(pk)) => pk,
|
Ok(Ok(pk)) => pk,
|
||||||
Ok(Err(user_error)) => {
|
Ok(Err(user_error)) => return Err(Error::UserError(user_error)),
|
||||||
return (
|
Err(error) => return Err(error),
|
||||||
payload.len() as u64,
|
|
||||||
new_docids_version_offsets.len() as u64,
|
|
||||||
Err(Error::UserError(user_error)),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
return (
|
|
||||||
payload.len() as u64,
|
|
||||||
new_docids_version_offsets.len() as u64,
|
|
||||||
Err(error),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
primary_key.get_or_insert(pk)
|
primary_key.get_or_insert(pk)
|
||||||
@ -190,20 +166,13 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
primary_key.as_ref().unwrap()
|
primary_key.as_ref().unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
let doc = match optdoc {
|
|
||||||
Some(doc) => doc,
|
|
||||||
None => break,
|
|
||||||
};
|
|
||||||
|
|
||||||
let external_id = match retrieved_primary_key.extract_fields_and_docid(
|
let external_id = match retrieved_primary_key.extract_fields_and_docid(
|
||||||
doc,
|
doc,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
indexer,
|
indexer,
|
||||||
) {
|
) {
|
||||||
Ok(edi) => edi,
|
Ok(edi) => edi,
|
||||||
Err(e) => {
|
Err(e) => return Err(e),
|
||||||
return (payload.len() as u64, new_docids_version_offsets.len() as u64, Err(e))
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let external_id = external_id.to_de();
|
let external_id = external_id.to_de();
|
||||||
@ -212,40 +181,38 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
|
|
||||||
match main_docids_version_offsets.get(external_id) {
|
match main_docids_version_offsets.get(external_id) {
|
||||||
None => {
|
None => {
|
||||||
let (docid, is_new) = match index.external_documents_ids().get(rtxn, external_id) {
|
match index.external_documents_ids().get(rtxn, external_id) {
|
||||||
Ok(Some(docid)) => (docid, false),
|
Ok(Some(docid)) => match new_docids_version_offsets.entry(external_id) {
|
||||||
Ok(None) => (
|
Entry::Occupied(mut entry) => {
|
||||||
match available_docids.next() {
|
entry.get_mut().push_addition(document_offset)
|
||||||
Some(docid) => docid,
|
|
||||||
None => {
|
|
||||||
return (
|
|
||||||
payload.len() as u64,
|
|
||||||
new_docids_version_offsets.len() as u64,
|
|
||||||
Err(UserError::DocumentLimitReached.into()),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
},
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
Err(e) => {
|
|
||||||
return (
|
|
||||||
payload.len() as u64,
|
|
||||||
new_docids_version_offsets.len() as u64,
|
|
||||||
Err(e.into()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match new_docids_version_offsets.entry(external_id) {
|
|
||||||
Entry::Occupied(mut entry) => entry.get_mut().push_addition(document_offset),
|
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
entry.insert(PayloadOperations::new_addition(
|
entry.insert(PayloadOperations::new_addition(
|
||||||
method,
|
method,
|
||||||
docid,
|
docid,
|
||||||
is_new,
|
false, // is new
|
||||||
document_offset,
|
document_offset,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
Ok(None) => match new_docids_version_offsets.entry(external_id) {
|
||||||
|
Entry::Occupied(mut entry) => {
|
||||||
|
entry.get_mut().push_addition(document_offset)
|
||||||
|
}
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let docid = match available_docids.next() {
|
||||||
|
Some(docid) => docid,
|
||||||
|
None => return Err(UserError::DocumentLimitReached.into()),
|
||||||
|
};
|
||||||
|
entry.insert(PayloadOperations::new_addition(
|
||||||
|
method,
|
||||||
|
docid,
|
||||||
|
true, // is new
|
||||||
|
document_offset,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(payload_operations) => match new_docids_version_offsets.entry(external_id) {
|
Some(payload_operations) => match new_docids_version_offsets.entry(external_id) {
|
||||||
@ -264,48 +231,51 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
previous_offset = iter.byte_offset();
|
previous_offset = iter.byte_offset();
|
||||||
}
|
}
|
||||||
|
|
||||||
(payload.len() as u64, new_docids_version_offsets.len() as u64, Ok(new_docids_version_offsets))
|
Ok(new_docids_version_offsets)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_deletion_payload_changes<'s, 'pl: 's>(
|
fn extract_deletion_payload_changes<'s, 'pl: 's>(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
available_docids: &mut AvailableIds,
|
available_docids: &mut AvailableIds,
|
||||||
|
number_of_documents: &mut u64,
|
||||||
main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>,
|
main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>,
|
||||||
method: MergeMethod,
|
method: MergeMethod,
|
||||||
to_delete: &'pl [&'pl str],
|
to_delete: &'pl [&'pl str],
|
||||||
) -> (u64, u64, Result<hashbrown::HashMap<&'s str, PayloadOperations<'pl>>>) {
|
) -> Result<hashbrown::HashMap<&'s str, PayloadOperations<'pl>>> {
|
||||||
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
|
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
|
||||||
let mut document_count = 0;
|
|
||||||
|
|
||||||
for external_id in to_delete {
|
for external_id in to_delete {
|
||||||
match main_docids_version_offsets.get(external_id) {
|
match main_docids_version_offsets.get(external_id) {
|
||||||
None => {
|
None => {
|
||||||
let (docid, is_new) = match index.external_documents_ids().get(rtxn, external_id) {
|
match index.external_documents_ids().get(rtxn, external_id) {
|
||||||
Ok(Some(docid)) => (docid, false),
|
Ok(Some(docid)) => {
|
||||||
Ok(None) => (
|
|
||||||
match available_docids.next() {
|
|
||||||
Some(docid) => docid,
|
|
||||||
None => {
|
|
||||||
return (
|
|
||||||
0,
|
|
||||||
new_docids_version_offsets.len() as u64,
|
|
||||||
Err(UserError::DocumentLimitReached.into()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
true,
|
|
||||||
),
|
|
||||||
Err(e) => return (0, new_docids_version_offsets.len() as u64, Err(e.into())),
|
|
||||||
};
|
|
||||||
|
|
||||||
match new_docids_version_offsets.entry(external_id) {
|
match new_docids_version_offsets.entry(external_id) {
|
||||||
Entry::Occupied(mut entry) => entry.get_mut().push_deletion(),
|
Entry::Occupied(mut entry) => entry.get_mut().push_deletion(),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
entry.insert(PayloadOperations::new_deletion(method, docid, is_new));
|
entry.insert(PayloadOperations::new_deletion(
|
||||||
|
method, docid, false, // is new
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
let docid = match available_docids.next() {
|
||||||
|
Some(docid) => docid,
|
||||||
|
None => return Err(UserError::DocumentLimitReached.into()),
|
||||||
|
};
|
||||||
|
match new_docids_version_offsets.entry(external_id) {
|
||||||
|
Entry::Occupied(mut entry) => entry.get_mut().push_deletion(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
entry.insert(PayloadOperations::new_deletion(
|
||||||
|
method, docid, true, // is new
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
Some(payload_operations) => match new_docids_version_offsets.entry(external_id) {
|
Some(payload_operations) => match new_docids_version_offsets.entry(external_id) {
|
||||||
Entry::Occupied(mut entry) => entry.get_mut().push_deletion(),
|
Entry::Occupied(mut entry) => entry.get_mut().push_deletion(),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
@ -317,10 +287,10 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>(
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
document_count += 1;
|
*number_of_documents += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
(0, document_count, Ok(new_docids_version_offsets))
|
Ok(new_docids_version_offsets)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_version_offsets<'s, 'pl>(
|
fn merge_version_offsets<'s, 'pl>(
|
||||||
|
Loading…
Reference in New Issue
Block a user