Introduce the TreeLevel struct

This commit is contained in:
Kerollmops 2021-03-18 17:20:16 +01:00 committed by many
parent bd1a371c62
commit 89ee2cf576
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA
5 changed files with 67 additions and 15 deletions

View File

@ -5,7 +5,7 @@ use std::{str, io, fmt};
use anyhow::Context; use anyhow::Context;
use byte_unit::Byte; use byte_unit::Byte;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::Index; use milli::{Index, TreeLevel};
use structopt::StructOpt; use structopt::StructOpt;
use Command::*; use Command::*;
@ -561,13 +561,12 @@ fn words_level_positions_docids(
for word in words.iter().map(AsRef::as_ref) { for word in words.iter().map(AsRef::as_ref) {
let range = { let range = {
let left = (word, 0, u32::min_value(), u32::min_value()); let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
let right = (word, u8::max_value(), u32::max_value(), u32::max_value()); let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
left..=right left..=right
}; };
for result in index.word_level_position_docids.range(rtxn, &range)? { for result in index.word_level_position_docids.range(rtxn, &range)? {
let ((w, level, left, right), docids) = result?; let ((w, level, left, right), docids) = result?;
if word != w { break }
let count = docids.len().to_string(); let count = docids.len().to_string();
let docids = if debug { let docids = if debug {
@ -575,7 +574,7 @@ fn words_level_positions_docids(
} else { } else {
format!("{:?}", docids.iter().collect::<Vec<_>>()) format!("{:?}", docids.iter().collect::<Vec<_>>())
}; };
let position_range = if level == 0 { let position_range = if level == TreeLevel::min_value() {
format!("{:?}", left) format!("{:?}", left)
} else { } else {
format!("{:?}", left..=right) format!("{:?}", left..=right)

View File

@ -1,12 +1,14 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto; use std::convert::{TryFrom, TryInto};
use std::mem::size_of; use std::mem::size_of;
use std::str; use std::str;
use crate::TreeLevel;
pub struct StrLevelPositionCodec; pub struct StrLevelPositionCodec;
impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec { impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
type DItem = (&'a str, u8, u32, u32); type DItem = (&'a str, TreeLevel, u32, u32);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let footer_len = size_of::<u8>() + size_of::<u32>() * 2; let footer_len = size_of::<u8>() + size_of::<u32>() * 2;
@ -19,13 +21,14 @@ impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
let (level, bytes) = bytes.split_first()?; let (level, bytes) = bytes.split_first()?;
let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?; let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?;
let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?; let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?;
let level = TreeLevel::try_from(*level).ok()?;
Some((word, *level, left, right)) Some((word, level, left, right))
} }
} }
impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec { impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
type EItem = (&'a str, u8, u32, u32); type EItem = (&'a str, TreeLevel, u32, u32);
fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
let left = left.to_be_bytes(); let left = left.to_be_bytes();
@ -33,7 +36,7 @@ impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len()); let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len());
bytes.extend_from_slice(word.as_bytes()); bytes.extend_from_slice(word.as_bytes());
bytes.push(*level); bytes.push((*level).into());
bytes.extend_from_slice(&left[..]); bytes.extend_from_slice(&left[..]);
bytes.extend_from_slice(&right[..]); bytes.extend_from_slice(&right[..]);

View File

@ -9,6 +9,7 @@ pub mod facet;
pub mod heed_codec; pub mod heed_codec;
pub mod index; pub mod index;
pub mod proximity; pub mod proximity;
pub mod tree_level;
pub mod update; pub mod update;
use std::borrow::Cow; use std::borrow::Cow;
@ -27,6 +28,7 @@ pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringB
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec}; pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
pub use self::index::Index; pub use self::index::Index;
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords}; pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords};
pub use self::tree_level::TreeLevel;
pub use self::update_store::UpdateStore; pub use self::update_store::UpdateStore;
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;

47
milli/src/tree_level.rs Normal file
View File

@ -0,0 +1,47 @@
use std::convert::TryFrom;
use std::fmt;
/// This is just before the lowest printable character (space, sp, 32)
const MAX_VALUE: u8 = 31;
#[derive(Debug, Copy, Clone)]
pub enum Error {
LevelTooHigh(u8),
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct TreeLevel(u8);
impl TreeLevel {
pub const fn max_value() -> TreeLevel {
TreeLevel(MAX_VALUE)
}
pub const fn min_value() -> TreeLevel {
TreeLevel(0)
}
}
impl Into<u8> for TreeLevel {
fn into(self) -> u8 {
self.0
}
}
impl TryFrom<u8> for TreeLevel {
type Error = Error;
fn try_from(value: u8) -> Result<TreeLevel, Error> {
match value {
0..=MAX_VALUE => Ok(TreeLevel(value)),
_ => Err(Error::LevelTooHigh(value)),
}
}
}
impl fmt::Display for TreeLevel {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}

View File

@ -1,4 +1,5 @@
use std::cmp; use std::cmp;
use std::convert::TryFrom;
use std::fs::File; use std::fs::File;
use std::num::NonZeroUsize; use std::num::NonZeroUsize;
@ -9,9 +10,9 @@ use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec}; use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
use crate::Index;
use crate::update::index_documents::WriteMethod; use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
use crate::{Index, TreeLevel};
pub struct WordsLevelPositions<'t, 'u, 'i> { pub struct WordsLevelPositions<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -105,8 +106,8 @@ fn compute_positions_levels(
let (word, ()) = result?; let (word, ()) = result?;
let level_0_range = { let level_0_range = {
let left = (word, 0, u32::min_value(), u32::min_value()); let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
let right = (word, 0, u32::max_value(), u32::max_value()); let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
left..=right left..=right
}; };
@ -117,7 +118,7 @@ fn compute_positions_levels(
// Groups sizes are always a power of the original level_group_size and therefore a group // Groups sizes are always a power of the original level_group_size and therefore a group
// always maps groups of the previous level and never splits previous levels groups in half. // always maps groups of the previous level and never splits previous levels groups in half.
let group_size_iter = (1u8..) let group_size_iter = (1u8..)
.map(|l| (l, level_group_size.get().pow(l as u32))) .map(|l| (TreeLevel::try_from(l).unwrap(), level_group_size.get().pow(l as u32)))
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get()); .take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
// As specified in the documentation, we also write the level 0 entries. // As specified in the documentation, we also write the level 0 entries.
@ -163,7 +164,7 @@ fn compute_positions_levels(
fn write_level_entry( fn write_level_entry(
writer: &mut Writer<File>, writer: &mut Writer<File>,
word: &str, word: &str,
level: u8, level: TreeLevel,
left: u32, left: u32,
right: u32, right: u32,
ids: &RoaringBitmap, ids: &RoaringBitmap,