From 65b1d09d55400349b00219afaa62f92ab3233acb Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 9 Jun 2021 14:57:03 +0200 Subject: [PATCH] Move the obkv merging functions into the merge_function module --- .../update/index_documents/merge_function.rs | 20 ++++++++++++++++-- milli/src/update/index_documents/transform.rs | 21 ++----------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs index 0a32603b5..8c93773ce 100644 --- a/milli/src/update/index_documents/merge_function.rs +++ b/milli/src/update/index_documents/merge_function.rs @@ -1,12 +1,28 @@ use std::borrow::Cow; -use anyhow::bail; -use bstr::ByteSlice as _; use fst::IntoStreamer; use roaring::RoaringBitmap; use crate::heed_codec::CboRoaringBitmapCodec; +/// Only the last value associated with an id is kept. +pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result> { + Ok(obkvs.last().unwrap().clone().into_owned()) +} + +/// Merge all the obks in the order we see them. +pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result> { + let mut iter = obkvs.iter(); + let first = iter.next().map(|b| b.clone().into_owned()).unwrap(); + Ok(iter.fold(first, |acc, current| { + let first = obkv::KvReader::new(&acc); + let second = obkv::KvReader::new(current); + let mut buffer = Vec::new(); + merge_two_obkvs(first, second, &mut buffer); + buffer + })) +} + // Union of multiple FSTs pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { let fsts = values.iter().map(fst::Set::new).collect::, _>>()?; diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index cfc2530b4..5fbd24bb1 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -10,8 +10,9 @@ use log::info; use roaring::RoaringBitmap; use serde_json::{Map, Value}; -use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution}; +use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; +use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution}; use super::merge_function::merge_two_obkvs; use super::{create_writer, create_sorter, IndexDocumentsMethod}; @@ -552,24 +553,6 @@ fn compute_primary_key_pair( } } -/// Only the last value associated with an id is kept. -fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result> { - obkvs.last().context("no last value").map(|last| last.clone().into_owned()) -} - -/// Merge all the obks in the order we see them. -fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result> { - let mut iter = obkvs.iter(); - let first = iter.next().map(|b| b.clone().into_owned()).context("no first value")?; - Ok(iter.fold(first, |acc, current| { - let first = obkv::KvReader::new(&acc); - let second = obkv::KvReader::new(current); - let mut buffer = Vec::new(); - merge_two_obkvs(first, second, &mut buffer); - buffer - })) -} - fn validate_document_id(document_id: &str) -> Option<&str> { let document_id = document_id.trim(); Some(document_id).filter(|id| {