From ba4ba685f99c5543d0d16393a6851352899149ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 28 Nov 2020 12:43:43 +0100 Subject: [PATCH] Make the facet levels maps to previous level groups and don't split them --- http-ui/src/main.rs | 32 +++------ src/update/facets.rs | 116 +++++++----------------------- src/update/index_documents/mod.rs | 16 ++--- src/update/mod.rs | 2 +- 4 files changed, 41 insertions(+), 125 deletions(-) diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 1c5385b14..80402f0a0 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -28,7 +28,7 @@ use warp::{Filter, http::Response}; use milli::tokenizer::{simple_tokenizer, TokenType}; use milli::update::UpdateIndexingStep::*; -use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat, EasingName}; +use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat}; use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition}; static GLOBAL_THREAD_POOL: OnceCell = OnceCell::new(); @@ -237,9 +237,8 @@ struct Settings { #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] struct Facets { - last_level_size: Option, - number_of_levels: Option, - easing_function: Option, + level_group_size: Option, + min_level_size: Option, } // Any value that is present is considered Some value, including null. @@ -415,27 +414,12 @@ async fn main() -> anyhow::Result<()> { // We must use the write transaction of the update here. let mut wtxn = index_cloned.write_txn()?; let mut builder = update_builder.facets(&mut wtxn, &index_cloned); - if let Some(value) = levels.last_level_size { - builder.last_level_size(value); + if let Some(value) = levels.level_group_size { + builder.level_group_size(value); } - if let Some(value) = levels.number_of_levels { - builder.number_of_levels(value); + if let Some(value) = levels.min_level_size { + builder.min_level_size(value); } - if let Some(value) = levels.easing_function { - let easing_name = if value.eq_ignore_ascii_case("expo") { - EasingName::Expo - } else if value.eq_ignore_ascii_case("quart") { - EasingName::Quart - } else if value.eq_ignore_ascii_case("circ") { - EasingName::Circ - } else if value.eq_ignore_ascii_case("linear") { - EasingName::Linear - } else { - panic!("Invalid easing function name") - }; - builder.easing_function(easing_name); - } - match builder.execute() { Ok(()) => wtxn.commit().map_err(Into::into), Err(e) => Err(e.into()) @@ -804,7 +788,7 @@ async fn main() -> anyhow::Result<()> { let update_store_cloned = update_store.clone(); let update_status_sender_cloned = update_status_sender.clone(); let change_facet_levels_route = warp::filters::method::post() - .and(warp::path!("facet-levels")) + .and(warp::path!("facet-level-sizes")) .and(warp::body::json()) .map(move |levels: Facets| { let meta = UpdateMeta::Facets(levels); diff --git a/src/update/facets.rs b/src/update/facets.rs index 96a7e825e..e26f030df 100644 --- a/src/update/facets.rs +++ b/src/update/facets.rs @@ -1,10 +1,10 @@ +use std::cmp; use std::fs::File; use std::num::NonZeroUsize; use grenad::{CompressionType, Reader, Writer, FileFuse}; use heed::types::{ByteSlice, DecodeIgnore}; use heed::{BytesEncode, Error}; -use itertools::Itertools; use log::debug; use num_traits::{Bounded, Zero}; use roaring::RoaringBitmap; @@ -16,23 +16,14 @@ use crate::Index; use crate::update::index_documents::WriteMethod; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; -#[derive(Debug, Copy, Clone)] -pub enum EasingName { - Expo, - Quart, - Circ, - Linear, -} - pub struct Facets<'t, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index, pub(crate) chunk_compression_type: CompressionType, pub(crate) chunk_compression_level: Option, pub(crate) chunk_fusing_shrink_size: Option, - number_of_levels: NonZeroUsize, - last_level_size: NonZeroUsize, - easing_function: EasingName, + level_group_size: NonZeroUsize, + min_level_size: NonZeroUsize, } impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { @@ -43,24 +34,18 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { chunk_compression_type: CompressionType::None, chunk_compression_level: None, chunk_fusing_shrink_size: None, - number_of_levels: NonZeroUsize::new(5).unwrap(), - last_level_size: NonZeroUsize::new(5).unwrap(), - easing_function: EasingName::Expo, + level_group_size: NonZeroUsize::new(4).unwrap(), + min_level_size: NonZeroUsize::new(5).unwrap(), } } - pub fn number_of_levels(&mut self, value: NonZeroUsize) -> &mut Self { - self.number_of_levels = value; + pub fn level_group_size(&mut self, value: NonZeroUsize) -> &mut Self { + self.level_group_size = NonZeroUsize::new(cmp::max(value.get(), 2)).unwrap(); self } - pub fn last_level_size(&mut self, value: NonZeroUsize) -> &mut Self { - self.last_level_size = value; - self - } - - pub fn easing_function(&mut self, value: EasingName) -> &mut Self { - self.easing_function = value; + pub fn min_level_size(&mut self, value: NonZeroUsize) -> &mut Self { + self.min_level_size = value; self } @@ -90,9 +75,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { self.chunk_compression_type, self.chunk_compression_level, self.chunk_fusing_shrink_size, - self.last_level_size, - self.number_of_levels, - self.easing_function, + self.level_group_size, + self.min_level_size, field_id, )?; @@ -117,9 +101,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { self.chunk_compression_type, self.chunk_compression_level, self.chunk_fusing_shrink_size, - self.last_level_size, - self.number_of_levels, - self.easing_function, + self.level_group_size, + self.min_level_size, field_id, )?; @@ -175,9 +158,8 @@ fn compute_facet_levels<'t, T: 't, KC>( compression_type: CompressionType, compression_level: Option, shrink_size: Option, - last_level_size: NonZeroUsize, - number_of_levels: NonZeroUsize, - easing_function: EasingName, + level_group_size: NonZeroUsize, + min_level_size: NonZeroUsize, field_id: u8, ) -> anyhow::Result> where @@ -201,15 +183,13 @@ where left..=right }; - let level_sizes_iter = - levels_iterator(first_level_size, last_level_size.get(), number_of_levels.get(), easing_function) - .map(|size| (first_level_size as f64 / size as f64).ceil() as usize) - .unique() - .enumerate() - .skip(1); + // Groups sizes are always a power of the original level_group_size and therefore a group + // always maps groups of the previous level and never splits previous levels groups in half. + let group_size_iter = (1u8..) + .map(|l| (l, level_group_size.get().pow(l as u32))) + .take_while(|(_, s)| first_level_size / *s >= min_level_size.get()); - // TODO we must not create levels with identical group sizes. - for (level, level_entry_sizes) in level_sizes_iter { + for (level, group_size) in group_size_iter { let mut left = T::zero(); let mut right = T::zero(); let mut group_docids = RoaringBitmap::new(); @@ -220,10 +200,10 @@ where if i == 0 { left = value; - } else if i % level_entry_sizes == 0 { + } else if i % group_size == 0 { // we found the first bound of the next group, we must store the left // and right bounds associated with the docids. - write_entry::(&mut writer, field_id, level as u8, left, right, &group_docids)?; + write_entry::(&mut writer, field_id, level, left, right, &group_docids)?; // We save the left bound for the new group and also reset the docids. group_docids = RoaringBitmap::new(); @@ -236,7 +216,7 @@ where } if !group_docids.is_empty() { - write_entry::(&mut writer, field_id, level as u8, left, right, &group_docids)?; + write_entry::(&mut writer, field_id, level, left, right, &group_docids)?; } } @@ -274,51 +254,3 @@ where writer.insert(&key, &data)?; Ok(()) } - -fn levels_iterator( - first_level_size: usize, // biggest level - last_level_size: usize, // smallest level - number_of_levels: usize, - easing_function: EasingName, -) -> impl Iterator -{ - let easing_function = match easing_function { - EasingName::Expo => ease_out_expo, - EasingName::Quart => ease_out_quart, - EasingName::Circ => ease_out_circ, - EasingName::Linear => ease_out_linear, - }; - - let b = last_level_size as f64; - let end = first_level_size as f64; - let c = end - b; - let d = number_of_levels; - (0..=d).map(move |t| ((end + b) - easing_function(t as f64, b, c, d as f64)) as usize) -} - -// Go look at the function definitions here: -// https://docs.rs/easer/0.2.1/easer/index.html -// https://easings.net/#easeOutExpo -fn ease_out_expo(t: f64, b: f64, c: f64, d: f64) -> f64 { - if t == d { - b + c - } else { - c * (-2.0_f64.powf(-10.0 * t / d) + 1.0) + b - } -} - -// https://easings.net/#easeOutCirc -fn ease_out_circ(t: f64, b: f64, c: f64, d: f64) -> f64 { - let t = t / d - 1.0; - c * (1.0 - t * t).sqrt() + b -} - -// https://easings.net/#easeOutQuart -fn ease_out_quart(t: f64, b: f64, c: f64, d: f64) -> f64 { - let t = t / d - 1.0; - -c * ((t * t * t * t) - 1.0) + b -} - -fn ease_out_linear(t: f64, b: f64, c: f64, d: f64) -> f64 { - c * t / d + b -} diff --git a/src/update/index_documents/mod.rs b/src/update/index_documents/mod.rs index 362175ce5..4a3ec43f9 100644 --- a/src/update/index_documents/mod.rs +++ b/src/update/index_documents/mod.rs @@ -208,8 +208,8 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a> { pub(crate) chunk_compression_level: Option, pub(crate) chunk_fusing_shrink_size: Option, pub(crate) thread_pool: Option<&'a ThreadPool>, - facet_number_of_levels: Option, - facet_last_level_size: Option, + facet_level_group_size: Option, + facet_min_level_size: Option, update_method: IndexDocumentsMethod, update_format: UpdateFormat, autogenerate_docids: bool, @@ -228,8 +228,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { chunk_compression_level: None, chunk_fusing_shrink_size: None, thread_pool: None, - facet_number_of_levels: None, - facet_last_level_size: None, + facet_level_group_size: None, + facet_min_level_size: None, update_method: IndexDocumentsMethod::ReplaceDocuments, update_format: UpdateFormat::Json, autogenerate_docids: true, @@ -588,11 +588,11 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { builder.chunk_compression_type = self.chunk_compression_type; builder.chunk_compression_level = self.chunk_compression_level; builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; - if let Some(value) = self.facet_number_of_levels { - builder.number_of_levels(value); + if let Some(value) = self.facet_level_group_size { + builder.level_group_size(value); } - if let Some(value) = self.facet_last_level_size { - builder.last_level_size(value); + if let Some(value) = self.facet_min_level_size { + builder.min_level_size(value); } builder.execute()?; diff --git a/src/update/mod.rs b/src/update/mod.rs index 416e88464..d05396f00 100644 --- a/src/update/mod.rs +++ b/src/update/mod.rs @@ -12,7 +12,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::clear_documents::ClearDocuments; pub use self::delete_documents::DeleteDocuments; pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat}; -pub use self::facets::{Facets, EasingName}; +pub use self::facets::Facets; pub use self::settings::Settings; pub use self::update_builder::UpdateBuilder; pub use self::update_step::UpdateIndexingStep;