meilisearch/milli/src/search/new/interner.rs

196 lines
5.9 KiB
Rust
Raw Normal View History

2023-03-14 23:37:47 +08:00
use std::fmt;
use std::hash::Hash;
use std::marker::PhantomData;
2023-03-08 16:55:53 +08:00
use fxhash::FxHashMap;
2023-03-16 16:58:59 +08:00
/// An index within an interner ([`FixedSizeInterner`], [`DedupInterner`], or [`MappedInterner`]).
pub struct Interned<T> {
2023-03-14 23:37:47 +08:00
idx: u16,
_phantom: PhantomData<T>,
}
impl<T> Interned<T> {
2023-03-16 16:58:59 +08:00
/// Create an interned value manually from its raw index within the interner.
pub fn from_raw(idx: u16) -> Self {
Self { idx, _phantom: PhantomData }
}
2023-03-16 16:58:59 +08:00
/// Get the raw index from the interned value
pub fn into_raw(self) -> u16 {
2023-03-14 23:37:47 +08:00
self.idx
}
}
2023-03-16 16:58:59 +08:00
/// A [`DedupInterner`] is used to store a unique copy of a value of type `T`. This value
2023-03-08 20:26:29 +08:00
/// is then identified by a lightweight index of type [`Interned<T>`], which can
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
2023-03-16 16:58:59 +08:00
/// can be retrieved using `self.get(interned)`. A set of values within the interner can be
/// efficiently managed using [`SmallBitmap<T>`](super::small_bitmap::SmallBitmap).
2023-03-13 21:03:48 +08:00
#[derive(Clone)]
2023-03-14 23:37:47 +08:00
pub struct DedupInterner<T> {
stable_store: Vec<T>,
lookup: FxHashMap<T, Interned<T>>,
}
2023-03-14 23:37:47 +08:00
impl<T> Default for DedupInterner<T> {
fn default() -> Self {
Self { stable_store: Default::default(), lookup: Default::default() }
}
}
2023-03-14 23:37:47 +08:00
impl<T> DedupInterner<T> {
2023-03-16 16:58:59 +08:00
///
2023-03-14 23:37:47 +08:00
pub fn freeze(self) -> FixedSizeInterner<T> {
FixedSizeInterner { stable_store: self.stable_store }
}
}
2023-03-14 23:37:47 +08:00
impl<T> DedupInterner<T>
where
T: Clone + Eq + Hash,
{
pub fn insert(&mut self, s: T) -> Interned<T> {
if let Some(interned) = self.lookup.get(&s) {
*interned
} else {
2023-03-14 23:37:47 +08:00
assert!(self.stable_store.len() < u16::MAX as usize);
self.stable_store.push(s.clone());
2023-03-16 16:58:59 +08:00
let interned = Interned::from_raw(self.stable_store.len() as u16 - 1);
self.lookup.insert(s, interned);
interned
}
}
pub fn get(&self, interned: Interned<T>) -> &T {
&self.stable_store[interned.idx as usize]
}
}
2023-03-14 23:37:47 +08:00
#[derive(Clone)]
pub struct Interner<T> {
stable_store: Vec<T>,
}
impl<T> Default for Interner<T> {
fn default() -> Self {
Self { stable_store: Default::default() }
}
}
impl<T> Interner<T> {
pub fn freeze(self) -> FixedSizeInterner<T> {
FixedSizeInterner { stable_store: self.stable_store }
}
pub fn push(&mut self, s: T) -> Interned<T> {
assert!(self.stable_store.len() < u16::MAX as usize);
self.stable_store.push(s);
2023-03-16 16:58:59 +08:00
Interned::from_raw(self.stable_store.len() as u16 - 1)
2023-03-14 23:37:47 +08:00
}
}
#[derive(Clone)]
pub struct FixedSizeInterner<T> {
stable_store: Vec<T>,
}
impl<T: Clone> FixedSizeInterner<T> {
pub fn new(length: u16, value: T) -> Self {
Self { stable_store: vec![value; length as usize] }
}
}
impl<T> FixedSizeInterner<T> {
pub fn from_vec(store: Vec<T>) -> Self {
Self { stable_store: store }
}
pub fn get(&self, interned: Interned<T>) -> &T {
&self.stable_store[interned.idx as usize]
}
pub fn get_mut(&mut self, interned: Interned<T>) -> &mut T {
&mut self.stable_store[interned.idx as usize]
}
pub fn len(&self) -> u16 {
self.stable_store.len() as u16
}
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, T> {
MappedInterner {
stable_store: self.stable_store.iter().map(map_f).collect(),
_phantom: PhantomData,
}
}
pub fn indexes(&self) -> impl Iterator<Item = Interned<T>> {
2023-03-16 16:58:59 +08:00
(0..self.stable_store.len()).map(|i| Interned::from_raw(i as u16))
2023-03-14 23:37:47 +08:00
}
pub fn iter(&self) -> impl Iterator<Item = (Interned<T>, &T)> {
2023-03-16 16:58:59 +08:00
self.stable_store.iter().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
2023-03-14 23:37:47 +08:00
}
pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> {
2023-03-16 16:58:59 +08:00
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
2023-03-14 23:37:47 +08:00
}
}
#[derive(Clone)]
pub struct MappedInterner<T, From> {
stable_store: Vec<T>,
_phantom: PhantomData<From>,
}
2023-03-14 23:37:47 +08:00
impl<T, From> MappedInterner<T, From> {
pub fn get(&self, interned: Interned<From>) -> &T {
&self.stable_store[interned.idx as usize]
}
pub fn get_mut(&mut self, interned: Interned<From>) -> &mut T {
&mut self.stable_store[interned.idx as usize]
}
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, From> {
MappedInterner {
stable_store: self.stable_store.iter().map(map_f).collect(),
_phantom: PhantomData,
}
}
pub fn iter(&self) -> impl Iterator<Item = (Interned<From>, &T)> {
2023-03-16 16:58:59 +08:00
self.stable_store.iter().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
2023-03-14 23:37:47 +08:00
}
pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<From>, &mut T)> {
2023-03-16 16:58:59 +08:00
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
2023-03-14 23:37:47 +08:00
}
}
// Interned<T> boilerplate implementations
impl<T> Hash for Interned<T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.idx.hash(state);
}
}
impl<T: Ord> Ord for Interned<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.idx.cmp(&other.idx)
}
}
impl<T> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.idx.partial_cmp(&other.idx)
}
}
impl<T> Eq for Interned<T> {}
impl<T> PartialEq for Interned<T> {
fn eq(&self, other: &Self) -> bool {
self.idx == other.idx
}
}
impl<T> Clone for Interned<T> {
fn clone(&self) -> Self {
Self { idx: self.idx, _phantom: PhantomData }
}
}
impl<T> Copy for Interned<T> {}
2023-03-14 23:37:47 +08:00
impl<T> fmt::Display for Interned<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.idx, f)
}
}
impl<T> fmt::Debug for Interned<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.idx, f)
}
}