Add iterator to find external ids of a bitmap of internal ids

This commit is contained in:
Louis Dureuil 2023-10-26 10:36:34 +02:00
parent a35988550c
commit 9a2dccc3bc
No known key found for this signature in database

View File

@ -4,6 +4,7 @@ use std::convert::TryInto;
use std::fmt; use std::fmt;
use fst::Streamer; use fst::Streamer;
use roaring::RoaringBitmap;
use crate::DocumentId; use crate::DocumentId;
@ -55,7 +56,24 @@ impl<'a> ExternalDocumentsIds<'a> {
self.0.as_fst().as_bytes() self.0.as_fst().as_bytes()
} }
/// Apply the list of operations passed as argument, modifying the current external to internal id mapping. /// Looks for the internal ids in the passed bitmap, and returns an iterator over the mapping between
/// these internal ids and their external id.
///
/// The returned iterator has `Result<(String, DocumentId), RoaringBitmap>` as `Item`,
/// where the returned values can be:
/// - `Ok((external_id, internal_id))`: if a mapping was found
/// - `Err(remaining_ids)`: if the external ids for some of the requested internal ids weren't found.
/// In that case the returned bitmap contains the internal ids whose external ids were not found after traversing
/// the entire fst.
pub fn find_external_id_of(
&self,
internal_ids: RoaringBitmap,
) -> ExternalToInternalOwnedIterator<'_> {
let it = ExternalToInternalOwnedIterator { stream: self.0.stream(), internal_ids };
it
}
/// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
/// ///
/// If the list contains multiple operations on the same external id, then the result is unspecified. /// If the list contains multiple operations on the same external id, then the result is unspecified.
/// ///
@ -129,3 +147,51 @@ impl Default for ExternalDocumentsIds<'static> {
ExternalDocumentsIds(fst::Map::default().map_data(Cow::Owned).unwrap()) ExternalDocumentsIds(fst::Map::default().map_data(Cow::Owned).unwrap())
} }
} }
/// An iterator over mappings between requested internal ids and external ids.
///
/// See [`ExternalDocumentsIds::find_external_id_of`] for details.
pub struct ExternalToInternalOwnedIterator<'it> {
stream: fst::map::Stream<'it>,
internal_ids: RoaringBitmap,
}
impl<'it> Iterator for ExternalToInternalOwnedIterator<'it> {
/// A result indicating if a mapping was found, or if the stream was exhausted without finding all internal ids.
type Item = Result<(String, DocumentId), RoaringBitmap>;
fn next(&mut self) -> Option<Self::Item> {
// if all requested ids were found, we won't find any other, so short-circuit
if self.internal_ids.is_empty() {
return None;
}
loop {
let Some((external, internal)) = self.stream.next() else {
// we exhausted the stream but we still have some internal ids to find
let remaining_ids = std::mem::take(&mut self.internal_ids);
return Some(Err(remaining_ids));
// note: next calls to `next` will return `None` since we replaced the internal_ids
// with the default empty bitmap
};
let internal = internal.try_into().unwrap();
let was_contained = self.internal_ids.remove(internal);
if was_contained {
return Some(Ok((std::str::from_utf8(external).unwrap().to_owned(), internal)));
}
}
}
}
impl<'it> ExternalToInternalOwnedIterator<'it> {
/// Returns the bitmap of internal ids whose external id are yet to be found
pub fn remaining_internal_ids(&self) -> &RoaringBitmap {
&self.internal_ids
}
/// Consumes this iterator and returns an iterator over only the external ids, ignoring the internal ids.
///
/// Use this when you don't need the mapping between the external and the internal ids.
pub fn only_external_ids(self) -> impl Iterator<Item = Result<String, RoaringBitmap>> + 'it {
self.map(|res| res.map(|(external, _internal)| external))
}
}