Reapply #2601

2024-11-23 10:37:41 +08:00 · 2022-10-26 19:25:59 +02:00 · 2022-10-26 19:25:59 +02:00 · 0dd8e00929
commit 0dd8e00929
parent a99ddf85f7
9 changed files with 58 additions and 2569 deletions
--- a/meilisearch-http/src/routes/indexes/search.rs
+++ b/meilisearch-http/src/routes/indexes/search.rs
@ -15,7 +15,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
 use crate::search::{
    perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
-    DEFAULT_SEARCH_OFFSET
+    DEFAULT_SEARCH_OFFSET,
 };
 pub fn configure(cfg: &mut web::ServiceConfig) {
@ -72,9 +72,7 @@ impl From<SearchQueryGet> for SearchQuery {
            limit: other.limit,
            page: other.page,
            hits_per_page: other.hits_per_page,
-            attributes_to_retrieve: other
+            attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
                .attributes_to_retrieve
                .map(|o| o.into_iter().collect()),
            attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
            crop_length: other.crop_length,
            attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
--- a/meilisearch-http/src/search.rs
+++ b/meilisearch-http/src/search.rs
@ -19,6 +19,7 @@ use crate::error::MeilisearchHttpError;
 type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
 pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
 pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
 pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
 pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
@ -29,9 +30,12 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
 #[serde(rename_all = "camelCase", deny_unknown_fields)]
 pub struct SearchQuery {
    pub q: Option<String>,
-    pub offset: Option<usize>,
+    #[serde(default = "DEFAULT_SEARCH_OFFSET")]
    pub offset: usize,
    #[serde(default = "DEFAULT_SEARCH_LIMIT")]
    pub limit: usize,
    pub page: Option<usize>,
    pub hits_per_page: Option<usize>,
    pub attributes_to_retrieve: Option<BTreeSet<String>>,
    pub attributes_to_crop: Option<Vec<String>>,
    #[serde(default = "DEFAULT_CROP_LENGTH")]
@ -53,6 +57,12 @@ pub struct SearchQuery {
    pub matching_strategy: MatchingStrategy,
 }
 impl SearchQuery {
    pub fn is_finite_pagination(&self) -> bool {
        self.page.or(self.hits_per_page).is_some()
    }
 }
 #[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
 #[serde(rename_all = "camelCase")]
 pub enum MatchingStrategy {
@ -91,15 +101,23 @@ pub struct SearchHit {
 #[serde(rename_all = "camelCase")]
 pub struct SearchResult {
    pub hits: Vec<SearchHit>,
    pub estimated_total_hits: u64,
    pub query: String,
    pub limit: usize,
    pub offset: usize,
    pub processing_time_ms: u128,
    #[serde(flatten)]
    pub hits_info: HitsInfo,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
 }
 #[derive(Serialize, Debug, Clone, PartialEq, Eq)]
 #[serde(untagged)]
 pub enum HitsInfo {
    #[serde(rename_all = "camelCase")]
    Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize },
    #[serde(rename_all = "camelCase")]
    OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
 }
 pub fn perform_search(
    index: &Index,
    query: SearchQuery,
@ -113,6 +131,7 @@ pub fn perform_search(
        search.query(query);
    }
    let is_finite_pagination = query.is_finite_pagination();
    search.terms_matching_strategy(query.matching_strategy.into());
    let max_total_hits = index
@ -120,10 +139,23 @@ pub fn perform_search(
        .map_err(milli::Error::from)?
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
    search.exhaustive_number_hits(is_finite_pagination);
    // compute the offset on the limit depending on the pagination mode.
    let (offset, limit) = if is_finite_pagination {
        let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
        let page = query.page.unwrap_or(1);
        // page 0 gives a limit of 0 forcing Meilisearch to return no document.
        page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit))
    } else {
        (query.offset, query.limit)
    };
    // Make sure that a user can't get more documents than the hard limit,
    // we align that on the offset too.
-    let offset = min(query.offset.unwrap_or(0), max_total_hits);
+    let offset = min(offset, max_total_hits);
-    let limit = min(query.limit, max_total_hits.saturating_sub(offset));
+    let limit = min(limit, max_total_hits.saturating_sub(offset));
    search.offset(offset);
    search.limit(limit);
@ -239,7 +271,23 @@ pub fn perform_search(
        documents.push(hit);
    }
-    let estimated_total_hits = candidates.len();
+    let number_of_hits = min(candidates.len() as usize, max_total_hits);
    let hits_info = if is_finite_pagination {
        let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
        // If hit_per_page is 0, then pages can't be computed and so we respond 0.
        let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
            .checked_div(hits_per_page)
            .unwrap_or(0);
        HitsInfo::Pagination {
            hits_per_page,
            page: query.page.unwrap_or(1),
            total_pages,
            total_hits: number_of_hits,
        }
    } else {
        HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
    };
    let facet_distribution = match query.facets {
        Some(ref fields) => {
@ -263,10 +311,8 @@ pub fn perform_search(
    let result = SearchResult {
        hits: documents,
-        estimated_total_hits,
+        hits_info,
        query: query.q.clone().unwrap_or_default(),
        limit: query.limit,
        offset: query.offset.unwrap_or_default(),
        processing_time_ms: before_search.elapsed().as_millis(),
        facet_distribution,
    };
--- a/meilisearch-lib/Cargo.toml
+++ b/meilisearch-lib/Cargo.toml
@ -1,85 +0,0 @@
 [package]
 name = "meilisearch-lib"
 version = "0.29.1"
 edition = "2021"
 [dependencies]
 actix-web = { version = "4.2.1", default-features = false }
 anyhow = { version = "1.0.65", features = ["backtrace"] }
 async-stream = "0.3.3"
 async-trait = "0.1.57"
 atomic_refcell = "0.1.8"
 byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
 bytes = "1.2.1"
 clap = { version = "4.0.9", features = ["derive", "env"] }
 crossbeam-channel = "0.5.6"
 csv = "1.1.6"
 derivative = "2.2.0"
 either = { version = "1.8.0", features = ["serde"] }
 flate2 = "1.0.24"
 fs_extra = "1.2.0"
 fst = "0.4.7"
 futures = "0.3.24"
 futures-util = "0.3.24"
 http = "0.2.8"
 indexmap = { version = "1.9.1", features = ["serde-1"] }
 itertools = "0.10.5"
 lazy_static = "1.4.0"
 log = "0.4.17"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false }
 mime = "0.3.16"
 num_cpus = "1.13.1"
 obkv = "0.2.0"
 once_cell = "1.15.0"
 page_size = "0.4.2"
 parking_lot = "0.12.1"
 permissive-json-pointer = { path = "../permissive-json-pointer" }
 rand = "0.8.5"
 rayon = "1.5.3"
 regex = "1.6.0"
 reqwest = { version = "0.11.12", features = ["json", "rustls-tls"], default-features = false, optional = true }
 roaring = "0.10.1"
 rustls = "0.20.6"
 serde = { version = "1.0.145", features = ["derive"] }
 serde_json = { version = "1.0.85", features = ["preserve_order"] }
 siphasher = "0.3.10"
 slice-group-by = "0.3.0"
 sysinfo = "0.26.4"
 tar = "0.4.38"
 tempfile = "3.3.0"
 thiserror = "1.0.37"
 time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
 tokio = { version = "1.21.2", features = ["full"] }
 uuid = { version = "1.1.2", features = ["serde", "v4"] }
 walkdir = "2.3.2"
 whoami = { version = "1.2.3", optional = true }
 index-scheduler = { path = "../index-scheduler" }
 index = { path = "../index" }
 file-store = { path = "../file-store" }
 [dev-dependencies]
 actix-rt = "2.7.0"
 meilisearch-types = { path = "../meilisearch-types", features = ["test-traits"] }
 mockall = "0.11.2"
 nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
 paste = "1.0.9"
 proptest = "1.0.0"
 proptest-derive = "0.3.0"
 [features]
 # all specialized tokenizations
 default = ["milli/default"]
 # chinese specialized tokenization
 chinese = ["milli/chinese"]
 # hebrew specialized tokenization
 hebrew = ["milli/hebrew"]
 # japanese specialized tokenization
 japanese = ["milli/japanese"]
 # thai specialized tokenization
 thai = ["milli/thai"]
--- a/meilisearch-lib/src/dump/compat/v4.rs
+++ b/meilisearch-lib/src/dump/compat/v4.rs
@ -1,145 +0,0 @@
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use milli::update::IndexDocumentsMethod;
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 use uuid::Uuid;
 use crate::index::{Settings, Unchecked};
 use crate::tasks::batch::BatchId;
 use crate::tasks::task::{
    DocumentDeletion, TaskContent as NewTaskContent, TaskEvent as NewTaskEvent, TaskId, TaskResult,
 };
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Task {
    pub id: TaskId,
    pub index_uid: IndexUid,
    pub content: TaskContent,
    pub events: Vec<TaskEvent>,
 }
 impl From<Task> for crate::tasks::task::Task {
    fn from(other: Task) -> Self {
        Self {
            id: other.id,
            content: NewTaskContent::from((other.index_uid, other.content)),
            events: other.events.into_iter().map(Into::into).collect(),
        }
    }
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub enum TaskEvent {
    Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
    Batched {
        #[serde(with = "time::serde::rfc3339")]
        timestamp: OffsetDateTime,
        batch_id: BatchId,
    },
    Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
    Succeded {
        result: TaskResult,
        #[serde(with = "time::serde::rfc3339")]
        timestamp: OffsetDateTime,
    },
    Failed {
        error: ResponseError,
        #[serde(with = "time::serde::rfc3339")]
        timestamp: OffsetDateTime,
    },
 }
 impl From<TaskEvent> for NewTaskEvent {
    fn from(other: TaskEvent) -> Self {
        match other {
            TaskEvent::Created(x) => NewTaskEvent::Created(x),
            TaskEvent::Batched {
                timestamp,
                batch_id,
            } => NewTaskEvent::Batched {
                timestamp,
                batch_id,
            },
            TaskEvent::Processing(x) => NewTaskEvent::Processing(x),
            TaskEvent::Succeded { result, timestamp } => {
                NewTaskEvent::Succeeded { result, timestamp }
            }
            TaskEvent::Failed { error, timestamp } => NewTaskEvent::Failed { error, timestamp },
        }
    }
 }
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 #[allow(clippy::large_enum_variant)]
 pub enum TaskContent {
    DocumentAddition {
        content_uuid: Uuid,
        merge_strategy: IndexDocumentsMethod,
        primary_key: Option<String>,
        documents_count: usize,
        allow_index_creation: bool,
    },
    DocumentDeletion(DocumentDeletion),
    SettingsUpdate {
        settings: Settings<Unchecked>,
        /// Indicates whether the task was a deletion
        is_deletion: bool,
        allow_index_creation: bool,
    },
    IndexDeletion,
    IndexCreation {
        primary_key: Option<String>,
    },
    IndexUpdate {
        primary_key: Option<String>,
    },
    Dump {
        uid: String,
    },
 }
 impl From<(IndexUid, TaskContent)> for NewTaskContent {
    fn from((index_uid, content): (IndexUid, TaskContent)) -> Self {
        match content {
            TaskContent::DocumentAddition {
                content_uuid,
                merge_strategy,
                primary_key,
                documents_count,
                allow_index_creation,
            } => NewTaskContent::DocumentAddition {
                index_uid,
                content_uuid,
                merge_strategy,
                primary_key,
                documents_count,
                allow_index_creation,
            },
            TaskContent::DocumentDeletion(deletion) => NewTaskContent::DocumentDeletion {
                index_uid,
                deletion,
            },
            TaskContent::SettingsUpdate {
                settings,
                is_deletion,
                allow_index_creation,
            } => NewTaskContent::SettingsUpdate {
                index_uid,
                settings,
                is_deletion,
                allow_index_creation,
            },
            TaskContent::IndexDeletion => NewTaskContent::IndexDeletion { index_uid },
            TaskContent::IndexCreation { primary_key } => NewTaskContent::IndexCreation {
                index_uid,
                primary_key,
            },
            TaskContent::IndexUpdate { primary_key } => NewTaskContent::IndexUpdate {
                index_uid,
                primary_key,
            },
            TaskContent::Dump { uid } => NewTaskContent::Dump { uid },
        }
    }
 }
--- a/meilisearch-lib/src/index/mod.rs
+++ b/meilisearch-lib/src/index/mod.rs
@ -1,250 +0,0 @@
 pub use search::{
    HitsInfo, MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH,
    DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
    DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };
 pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
 mod dump;
 pub mod error;
 mod search;
 pub mod updates;
 #[allow(clippy::module_inception)]
 mod index;
 pub use index::{Document, IndexMeta, IndexStats};
 #[cfg(not(test))]
 pub use index::Index;
 #[cfg(test)]
 pub use test::MockIndex as Index;
 /// The index::test module provides means of mocking an index instance. I can be used throughout the
 /// code for unit testing, in places where an index would normally be used.
 #[cfg(test)]
 pub mod test {
    use std::path::{Path, PathBuf};
    use std::sync::Arc;
    use milli::update::{
        DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig,
    };
    use nelson::Mocker;
    use uuid::Uuid;
    use super::error::Result;
    use super::index::Index;
    use super::Document;
    use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
    use crate::update_file_store::UpdateFileStore;
    #[derive(Clone)]
    pub enum MockIndex {
        Real(Index),
        Mock(Arc<Mocker>),
    }
    impl MockIndex {
        pub fn mock(mocker: Mocker) -> Self {
            Self::Mock(Arc::new(mocker))
        }
        pub fn open(
            path: impl AsRef<Path>,
            size: usize,
            uuid: Uuid,
            update_handler: Arc<IndexerConfig>,
        ) -> Result<Self> {
            let index = Index::open(path, size, uuid, update_handler)?;
            Ok(Self::Real(index))
        }
        pub fn load_dump(
            src: impl AsRef<Path>,
            dst: impl AsRef<Path>,
            size: usize,
            update_handler: &IndexerConfig,
        ) -> anyhow::Result<()> {
            Index::load_dump(src, dst, size, update_handler)
        }
        pub fn uuid(&self) -> Uuid {
            match self {
                MockIndex::Real(index) => index.uuid(),
                MockIndex::Mock(m) => unsafe { m.get("uuid").call(()) },
            }
        }
        pub fn stats(&self) -> Result<IndexStats> {
            match self {
                MockIndex::Real(index) => index.stats(),
                MockIndex::Mock(m) => unsafe { m.get("stats").call(()) },
            }
        }
        pub fn meta(&self) -> Result<IndexMeta> {
            match self {
                MockIndex::Real(index) => index.meta(),
                MockIndex::Mock(_) => todo!(),
            }
        }
        pub fn settings(&self) -> Result<Settings<Checked>> {
            match self {
                MockIndex::Real(index) => index.settings(),
                MockIndex::Mock(_) => todo!(),
            }
        }
        pub fn retrieve_documents<S: AsRef<str>>(
            &self,
            offset: usize,
            limit: usize,
            attributes_to_retrieve: Option<Vec<S>>,
        ) -> Result<(u64, Vec<Document>)> {
            match self {
                MockIndex::Real(index) => {
                    index.retrieve_documents(offset, limit, attributes_to_retrieve)
                }
                MockIndex::Mock(_) => todo!(),
            }
        }
        pub fn retrieve_document<S: AsRef<str>>(
            &self,
            doc_id: String,
            attributes_to_retrieve: Option<Vec<S>>,
        ) -> Result<Document> {
            match self {
                MockIndex::Real(index) => index.retrieve_document(doc_id, attributes_to_retrieve),
                MockIndex::Mock(_) => todo!(),
            }
        }
        pub fn size(&self) -> u64 {
            match self {
                MockIndex::Real(index) => index.size(),
                MockIndex::Mock(_) => todo!(),
            }
        }
        pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
            match self {
                MockIndex::Real(index) => index.snapshot(path),
                MockIndex::Mock(m) => unsafe { m.get("snapshot").call(path.as_ref()) },
            }
        }
        pub fn close(self) {
            match self {
                MockIndex::Real(index) => index.close(),
                MockIndex::Mock(m) => unsafe { m.get("close").call(()) },
            }
        }
        pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
            match self {
                MockIndex::Real(index) => index.perform_search(query),
                MockIndex::Mock(m) => unsafe { m.get("perform_search").call(query) },
            }
        }
        pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
            match self {
                MockIndex::Real(index) => index.dump(path),
                MockIndex::Mock(m) => unsafe { m.get("dump").call(path.as_ref()) },
            }
        }
        pub fn update_documents(
            &self,
            method: IndexDocumentsMethod,
            primary_key: Option<String>,
            file_store: UpdateFileStore,
            contents: impl Iterator<Item = Uuid>,
        ) -> Result<Vec<Result<DocumentAdditionResult>>> {
            match self {
                MockIndex::Real(index) => {
                    index.update_documents(method, primary_key, file_store, contents)
                }
                MockIndex::Mock(mocker) => unsafe {
                    mocker
                        .get("update_documents")
                        .call((method, primary_key, file_store, contents))
                },
            }
        }
        pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
            match self {
                MockIndex::Real(index) => index.update_settings(settings),
                MockIndex::Mock(m) => unsafe { m.get("update_settings").call(settings) },
            }
        }
        pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
            match self {
                MockIndex::Real(index) => index.update_primary_key(primary_key),
                MockIndex::Mock(m) => unsafe { m.get("update_primary_key").call(primary_key) },
            }
        }
        pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
            match self {
                MockIndex::Real(index) => index.delete_documents(ids),
                MockIndex::Mock(m) => unsafe { m.get("delete_documents").call(ids) },
            }
        }
        pub fn clear_documents(&self) -> Result<()> {
            match self {
                MockIndex::Real(index) => index.clear_documents(),
                MockIndex::Mock(m) => unsafe { m.get("clear_documents").call(()) },
            }
        }
    }
    #[test]
    fn test_faux_index() {
        let faux = Mocker::default();
        faux.when("snapshot")
            .times(2)
            .then(|_: &Path| -> Result<()> { Ok(()) });
        let index = MockIndex::mock(faux);
        let path = PathBuf::from("hello");
        index.snapshot(&path).unwrap();
        index.snapshot(&path).unwrap();
    }
    #[test]
    #[should_panic]
    fn test_faux_unexisting_method_stub() {
        let faux = Mocker::default();
        let index = MockIndex::mock(faux);
        let path = PathBuf::from("hello");
        index.snapshot(&path).unwrap();
        index.snapshot(&path).unwrap();
    }
    #[test]
    #[should_panic]
    fn test_faux_panic() {
        let faux = Mocker::default();
        faux.when("snapshot")
            .times(2)
            .then(|_: &Path| -> Result<()> {
                panic!();
            });
        let index = MockIndex::mock(faux);
        let path = PathBuf::from("hello");
        index.snapshot(&path).unwrap();
        index.snapshot(&path).unwrap();
    }
 }
--- a/meilisearch-lib/src/index/search.rs
+++ b/meilisearch-lib/src/index/search.rs
@ -1,747 +0,0 @@
 use std::cmp::min;
 use std::collections::{BTreeMap, BTreeSet, HashSet};
 use std::str::FromStr;
 use std::time::Instant;
 use either::Either;
 use milli::tokenizer::TokenizerBuilder;
 use milli::{
    AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
    TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
 };
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
 use crate::index::error::FacetError;
 use super::error::{IndexError, Result};
 use super::index::Index;
 pub type Document = serde_json::Map<String, Value>;
 type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
 pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
 pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
 pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
 pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
 pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
 pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
 /// The maximum number of results that the engine
 /// will be able to return in one search call.
 pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
 #[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
 #[serde(rename_all = "camelCase", deny_unknown_fields)]
 pub struct SearchQuery {
    pub q: Option<String>,
    #[serde(default = "DEFAULT_SEARCH_OFFSET")]
    pub offset: usize,
    #[serde(default = "DEFAULT_SEARCH_LIMIT")]
    pub limit: usize,
    pub page: Option<usize>,
    pub hits_per_page: Option<usize>,
    pub attributes_to_retrieve: Option<BTreeSet<String>>,
    pub attributes_to_crop: Option<Vec<String>>,
    #[serde(default = "DEFAULT_CROP_LENGTH")]
    pub crop_length: usize,
    pub attributes_to_highlight: Option<HashSet<String>>,
    // Default to false
    #[serde(default = "Default::default")]
    pub show_matches_position: bool,
    pub filter: Option<Value>,
    pub sort: Option<Vec<String>>,
    pub facets: Option<Vec<String>>,
    #[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
    pub highlight_pre_tag: String,
    #[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
    pub highlight_post_tag: String,
    #[serde(default = "DEFAULT_CROP_MARKER")]
    pub crop_marker: String,
    #[serde(default)]
    pub matching_strategy: MatchingStrategy,
 }
 impl SearchQuery {
    pub fn is_finite_pagination(&self) -> bool {
        self.page.or(self.hits_per_page).is_some()
    }
 }
 #[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
 #[serde(rename_all = "camelCase")]
 pub enum MatchingStrategy {
    /// Remove query words from last to first
    Last,
    /// All query words are mandatory
    All,
 }
 impl Default for MatchingStrategy {
    fn default() -> Self {
        Self::Last
    }
 }
 impl From<MatchingStrategy> for TermsMatchingStrategy {
    fn from(other: MatchingStrategy) -> Self {
        match other {
            MatchingStrategy::Last => Self::Last,
            MatchingStrategy::All => Self::All,
        }
    }
 }
 #[derive(Debug, Clone, Serialize, PartialEq)]
 pub struct SearchHit {
    #[serde(flatten)]
    pub document: Document,
    #[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
    pub formatted: Document,
    #[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
    pub matches_position: Option<MatchesPosition>,
 }
 #[derive(Serialize, Debug, Clone, PartialEq)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResult {
    pub hits: Vec<SearchHit>,
    pub query: String,
    pub processing_time_ms: u128,
    #[serde(flatten)]
    pub hits_info: HitsInfo,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
 }
 #[derive(Serialize, Debug, Clone, PartialEq, Eq)]
 #[serde(untagged)]
 pub enum HitsInfo {
    #[serde(rename_all = "camelCase")]
    Pagination {
        hits_per_page: usize,
        page: usize,
        total_pages: usize,
        total_hits: usize,
    },
    #[serde(rename_all = "camelCase")]
    OffsetLimit {
        limit: usize,
        offset: usize,
        estimated_total_hits: usize,
    },
 }
 impl Index {
    pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
        let before_search = Instant::now();
        let rtxn = self.read_txn()?;
        let mut search = self.search(&rtxn);
        if let Some(ref query) = query.q {
            search.query(query);
        }
        let is_finite_pagination = query.is_finite_pagination();
        search.terms_matching_strategy(query.matching_strategy.into());
        let max_total_hits = self
            .pagination_max_total_hits(&rtxn)?
            .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
        search.exhaustive_number_hits(is_finite_pagination);
        // compute the offset on the limit depending on the pagination mode.
        let (offset, limit) = if is_finite_pagination {
            let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
            let page = query.page.unwrap_or(1);
            // page 0 gives a limit of 0 forcing Meilisearch to return no document.
            page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit))
        } else {
            (query.offset, query.limit)
        };
        // Make sure that a user can't get more documents than the hard limit,
        // we align that on the offset too.
        let offset = min(offset, max_total_hits);
        let limit = min(limit, max_total_hits.saturating_sub(offset));
        search.offset(offset);
        search.limit(limit);
        if let Some(ref filter) = query.filter {
            if let Some(facets) = parse_filter(filter)? {
                search.filter(facets);
            }
        }
        if let Some(ref sort) = query.sort {
            let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
                Ok(sorts) => sorts,
                Err(asc_desc_error) => {
                    return Err(IndexError::Milli(SortError::from(asc_desc_error).into()))
                }
            };
            search.sort_criteria(sort);
        }
        let milli::SearchResult {
            documents_ids,
            matching_words,
            candidates,
            ..
        } = search.execute()?;
        let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
        let displayed_ids = self
            .displayed_fields_ids(&rtxn)?
            .map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
            .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
        let fids = |attrs: &BTreeSet<String>| {
            let mut ids = BTreeSet::new();
            for attr in attrs {
                if attr == "*" {
                    ids = displayed_ids.clone();
                    break;
                }
                if let Some(id) = fields_ids_map.id(attr) {
                    ids.insert(id);
                }
            }
            ids
        };
        // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
        // but these attributes must be also be present
        // - in the fields_ids_map
        // - in the the displayed attributes
        let to_retrieve_ids: BTreeSet<_> = query
            .attributes_to_retrieve
            .as_ref()
            .map(fids)
            .unwrap_or_else(|| displayed_ids.clone())
            .intersection(&displayed_ids)
            .cloned()
            .collect();
        let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
        let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
        // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
        // These attributes are:
        // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
        // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
        // But these attributes must be also present in displayed attributes
        let formatted_options = compute_formatted_options(
            &attr_to_highlight,
            &attr_to_crop,
            query.crop_length,
            &to_retrieve_ids,
            &fields_ids_map,
            &displayed_ids,
        );
        let tokenizer = TokenizerBuilder::default().build();
        let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
        formatter_builder.crop_marker(query.crop_marker);
        formatter_builder.highlight_prefix(query.highlight_pre_tag);
        formatter_builder.highlight_suffix(query.highlight_post_tag);
        let mut documents = Vec::new();
        let documents_iter = self.documents(&rtxn, documents_ids)?;
        for (_id, obkv) in documents_iter {
            // First generate a document with all the displayed fields
            let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
            // select the attributes to retrieve
            let attributes_to_retrieve = to_retrieve_ids
                .iter()
                .map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
            let mut document =
                permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
            let (matches_position, formatted) = format_fields(
                &displayed_document,
                &fields_ids_map,
                &formatter_builder,
                &formatted_options,
                query.show_matches_position,
                &displayed_ids,
            )?;
            if let Some(sort) = query.sort.as_ref() {
                insert_geo_distance(sort, &mut document);
            }
            let hit = SearchHit {
                document,
                formatted,
                matches_position,
            };
            documents.push(hit);
        }
        let number_of_hits = min(candidates.len() as usize, max_total_hits);
        let hits_info = if is_finite_pagination {
            let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
            // If hit_per_page is 0, then pages can't be computed and so we respond 0.
            let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
                .checked_div(hits_per_page)
                .unwrap_or(0);
            HitsInfo::Pagination {
                hits_per_page,
                page: query.page.unwrap_or(1),
                total_pages,
                total_hits: number_of_hits,
            }
        } else {
            HitsInfo::OffsetLimit {
                limit: query.limit,
                offset,
                estimated_total_hits: number_of_hits,
            }
        };
        let facet_distribution = match query.facets {
            Some(ref fields) => {
                let mut facet_distribution = self.facets_distribution(&rtxn);
                let max_values_by_facet = self
                    .max_values_per_facet(&rtxn)?
                    .unwrap_or(DEFAULT_VALUES_PER_FACET);
                facet_distribution.max_values_per_facet(max_values_by_facet);
                if fields.iter().all(|f| f != "*") {
                    facet_distribution.facets(fields);
                }
                let distribution = facet_distribution.candidates(candidates).execute()?;
                Some(distribution)
            }
            None => None,
        };
        let result = SearchResult {
            hits: documents,
            hits_info,
            query: query.q.clone().unwrap_or_default(),
            processing_time_ms: before_search.elapsed().as_millis(),
            facet_distribution,
        };
        Ok(result)
    }
 }
 fn insert_geo_distance(sorts: &[String], document: &mut Document) {
    lazy_static::lazy_static! {
        static ref GEO_REGEX: Regex =
            Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap();
    };
    if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) {
        // TODO: TAMO: milli encountered an internal error, what do we want to do?
        let base = [
            capture_group[1].parse().unwrap(),
            capture_group[2].parse().unwrap(),
        ];
        let geo_point = &document.get("_geo").unwrap_or(&json!(null));
        if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
            let distance = milli::distance_between_two_points(&base, &[lat, lng]);
            document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
        }
    }
 }
 fn compute_formatted_options(
    attr_to_highlight: &HashSet<String>,
    attr_to_crop: &[String],
    query_crop_length: usize,
    to_retrieve_ids: &BTreeSet<FieldId>,
    fields_ids_map: &FieldsIdsMap,
    displayed_ids: &BTreeSet<FieldId>,
 ) -> BTreeMap<FieldId, FormatOptions> {
    let mut formatted_options = BTreeMap::new();
    add_highlight_to_formatted_options(
        &mut formatted_options,
        attr_to_highlight,
        fields_ids_map,
        displayed_ids,
    );
    add_crop_to_formatted_options(
        &mut formatted_options,
        attr_to_crop,
        query_crop_length,
        fields_ids_map,
        displayed_ids,
    );
    // Should not return `_formatted` if no valid attributes to highlight/crop
    if !formatted_options.is_empty() {
        add_non_formatted_ids_to_formatted_options(&mut formatted_options, to_retrieve_ids);
    }
    formatted_options
 }
 fn add_highlight_to_formatted_options(
    formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
    attr_to_highlight: &HashSet<String>,
    fields_ids_map: &FieldsIdsMap,
    displayed_ids: &BTreeSet<FieldId>,
 ) {
    for attr in attr_to_highlight {
        let new_format = FormatOptions {
            highlight: true,
            crop: None,
        };
        if attr == "*" {
            for id in displayed_ids {
                formatted_options.insert(*id, new_format);
            }
            break;
        }
        if let Some(id) = fields_ids_map.id(attr) {
            if displayed_ids.contains(&id) {
                formatted_options.insert(id, new_format);
            }
        }
    }
 }
 fn add_crop_to_formatted_options(
    formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
    attr_to_crop: &[String],
    crop_length: usize,
    fields_ids_map: &FieldsIdsMap,
    displayed_ids: &BTreeSet<FieldId>,
 ) {
    for attr in attr_to_crop {
        let mut split = attr.rsplitn(2, ':');
        let (attr_name, attr_len) = match split.next().zip(split.next()) {
            Some((len, name)) => {
                let crop_len = len.parse::<usize>().unwrap_or(crop_length);
                (name, crop_len)
            }
            None => (attr.as_str(), crop_length),
        };
        if attr_name == "*" {
            for id in displayed_ids {
                formatted_options
                    .entry(*id)
                    .and_modify(|f| f.crop = Some(attr_len))
                    .or_insert(FormatOptions {
                        highlight: false,
                        crop: Some(attr_len),
                    });
            }
        }
        if let Some(id) = fields_ids_map.id(attr_name) {
            if displayed_ids.contains(&id) {
                formatted_options
                    .entry(id)
                    .and_modify(|f| f.crop = Some(attr_len))
                    .or_insert(FormatOptions {
                        highlight: false,
                        crop: Some(attr_len),
                    });
            }
        }
    }
 }
 fn add_non_formatted_ids_to_formatted_options(
    formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
    to_retrieve_ids: &BTreeSet<FieldId>,
 ) {
    for id in to_retrieve_ids {
        formatted_options.entry(*id).or_insert(FormatOptions {
            highlight: false,
            crop: None,
        });
    }
 }
 fn make_document(
    displayed_attributes: &BTreeSet<FieldId>,
    field_ids_map: &FieldsIdsMap,
    obkv: obkv::KvReaderU16,
 ) -> Result<Document> {
    let mut document = serde_json::Map::new();
    // recreate the original json
    for (key, value) in obkv.iter() {
        let value = serde_json::from_slice(value)?;
        let key = field_ids_map
            .name(key)
            .expect("Missing field name")
            .to_string();
        document.insert(key, value);
    }
    // select the attributes to retrieve
    let displayed_attributes = displayed_attributes
        .iter()
        .map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
    let document = permissive_json_pointer::select_values(&document, displayed_attributes);
    Ok(document)
 }
 fn format_fields<'a, A: AsRef<[u8]>>(
    document: &Document,
    field_ids_map: &FieldsIdsMap,
    builder: &MatcherBuilder<'a, A>,
    formatted_options: &BTreeMap<FieldId, FormatOptions>,
    compute_matches: bool,
    displayable_ids: &BTreeSet<FieldId>,
 ) -> Result<(Option<MatchesPosition>, Document)> {
    let mut matches_position = compute_matches.then(BTreeMap::new);
    let mut document = document.clone();
    // select the attributes to retrieve
    let displayable_names = displayable_ids
        .iter()
        .map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
    permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
        // To get the formatting option of each key we need to see all the rules that applies
        // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
        // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
        // highlighted.
        let format = formatted_options
            .iter()
            .filter(|(field, _option)| {
                let name = field_ids_map.name(**field).unwrap();
                milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
            })
            .map(|(_, option)| *option)
            .reduce(|acc, option| acc.merge(option));
        let mut infos = Vec::new();
        *value = format_value(
            std::mem::take(value),
            builder,
            format,
            &mut infos,
            compute_matches,
        );
        if let Some(matches) = matches_position.as_mut() {
            if !infos.is_empty() {
                matches.insert(key.to_owned(), infos);
            }
        }
    });
    let selectors = formatted_options
        .keys()
        // This unwrap must be safe since we got the ids from the fields_ids_map just
        // before.
        .map(|&fid| field_ids_map.name(fid).unwrap());
    let document = permissive_json_pointer::select_values(&document, selectors);
    Ok((matches_position, document))
 }
 fn format_value<'a, A: AsRef<[u8]>>(
    value: Value,
    builder: &MatcherBuilder<'a, A>,
    format_options: Option<FormatOptions>,
    infos: &mut Vec<MatchBounds>,
    compute_matches: bool,
 ) -> Value {
    match value {
        Value::String(old_string) => {
            let mut matcher = builder.build(&old_string);
            if compute_matches {
                let matches = matcher.matches();
                infos.extend_from_slice(&matches[..]);
            }
            match format_options {
                Some(format_options) => {
                    let value = matcher.format(format_options);
                    Value::String(value.into_owned())
                }
                None => Value::String(old_string),
            }
        }
        Value::Array(values) => Value::Array(
            values
                .into_iter()
                .map(|v| {
                    format_value(
                        v,
                        builder,
                        format_options.map(|format_options| FormatOptions {
                            highlight: format_options.highlight,
                            crop: None,
                        }),
                        infos,
                        compute_matches,
                    )
                })
                .collect(),
        ),
        Value::Object(object) => Value::Object(
            object
                .into_iter()
                .map(|(k, v)| {
                    (
                        k,
                        format_value(
                            v,
                            builder,
                            format_options.map(|format_options| FormatOptions {
                                highlight: format_options.highlight,
                                crop: None,
                            }),
                            infos,
                            compute_matches,
                        ),
                    )
                })
                .collect(),
        ),
        Value::Number(number) => {
            let s = number.to_string();
            let mut matcher = builder.build(&s);
            if compute_matches {
                let matches = matcher.matches();
                infos.extend_from_slice(&matches[..]);
            }
            match format_options {
                Some(format_options) => {
                    let value = matcher.format(format_options);
                    Value::String(value.into_owned())
                }
                None => Value::Number(number),
            }
        }
        value => value,
    }
 }
 fn parse_filter(facets: &Value) -> Result<Option<Filter>> {
    match facets {
        Value::String(expr) => {
            let condition = Filter::from_str(expr)?;
            Ok(condition)
        }
        Value::Array(arr) => parse_filter_array(arr),
        v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()),
    }
 }
 fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>> {
    let mut ands = Vec::new();
    for value in arr {
        match value {
            Value::String(s) => ands.push(Either::Right(s.as_str())),
            Value::Array(arr) => {
                let mut ors = Vec::new();
                for value in arr {
                    match value {
                        Value::String(s) => ors.push(s.as_str()),
                        v => {
                            return Err(FacetError::InvalidExpression(&["String"], v.clone()).into())
                        }
                    }
                }
                ands.push(Either::Left(ors));
            }
            v => {
                return Err(
                    FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(),
                )
            }
        }
    }
    Ok(Filter::from_array(ands)?)
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn test_insert_geo_distance() {
        let value: Document = serde_json::from_str(
            r#"{
              "_geo": {
                "lat": 50.629973371633746,
                "lng": 3.0569447399419567
              },
              "city": "Lille",
              "id": "1"
            }"#,
        )
        .unwrap();
        let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()];
        let mut document = value.clone();
        insert_geo_distance(sorters, &mut document);
        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
        let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()];
        let mut document = value.clone();
        insert_geo_distance(sorters, &mut document);
        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
        let sorters =
            &["_geoPoint(   50.629973371633746   ,  3.0569447399419567   ):desc".to_string()];
        let mut document = value.clone();
        insert_geo_distance(sorters, &mut document);
        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
        let sorters = &[
            "prix:asc",
            "villeneuve:desc",
            "_geoPoint(50.629973371633746, 3.0569447399419567):asc",
            "ubu:asc",
        ]
        .map(|s| s.to_string());
        let mut document = value.clone();
        insert_geo_distance(sorters, &mut document);
        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
        // only the first geoPoint is used to compute the distance
        let sorters = &[
            "chien:desc",
            "_geoPoint(50.629973371633746, 3.0569447399419567):asc",
            "pangolin:desc",
            "_geoPoint(100.0, -80.0):asc",
            "chat:asc",
        ]
        .map(|s| s.to_string());
        let mut document = value.clone();
        insert_geo_distance(sorters, &mut document);
        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
        // there was no _geoPoint so nothing is inserted in the document
        let sorters = &["chien:asc".to_string()];
        let mut document = value;
        insert_geo_distance(sorters, &mut document);
        assert_eq!(document.get("_geoDistance"), None);
    }
 }
--- a/meilisearch-lib/src/index/updates.rs
+++ b/meilisearch-lib/src/index/updates.rs
@ -1,559 +0,0 @@
 use std::collections::{BTreeMap, BTreeSet};
 use std::marker::PhantomData;
 use std::num::NonZeroUsize;
 use log::{debug, info, trace};
 use milli::documents::DocumentsBatchReader;
 use milli::update::{
    DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
    Setting,
 };
 use serde::{Deserialize, Serialize, Serializer};
 use uuid::Uuid;
 use super::error::{IndexError, Result};
 use super::index::{Index, IndexMeta};
 use crate::update_file_store::UpdateFileStore;
 fn serialize_with_wildcard<S>(
    field: &Setting<Vec<String>>,
    s: S,
 ) -> std::result::Result<S::Ok, S::Error>
 where
    S: Serializer,
 {
    let wildcard = vec!["*".to_string()];
    match field {
        Setting::Set(value) => Some(value),
        Setting::Reset => Some(&wildcard),
        Setting::NotSet => None,
    }
    .serialize(s)
 }
 #[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
 pub struct Checked;
 #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct Unchecked;
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(deny_unknown_fields)]
 #[serde(rename_all = "camelCase")]
 pub struct MinWordSizeTyposSetting {
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub one_typo: Setting<u8>,
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub two_typos: Setting<u8>,
 }
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(deny_unknown_fields)]
 #[serde(rename_all = "camelCase")]
 pub struct TypoSettings {
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub enabled: Setting<bool>,
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub disable_on_words: Setting<BTreeSet<String>>,
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub disable_on_attributes: Setting<BTreeSet<String>>,
 }
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(deny_unknown_fields)]
 #[serde(rename_all = "camelCase")]
 pub struct FacetingSettings {
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub max_values_per_facet: Setting<usize>,
 }
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(deny_unknown_fields)]
 #[serde(rename_all = "camelCase")]
 pub struct PaginationSettings {
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    pub max_total_hits: Setting<usize>,
 }
 /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
 /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
 /// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(deny_unknown_fields)]
 #[serde(rename_all = "camelCase")]
 #[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 pub struct Settings<T> {
    #[serde(
        default,
        serialize_with = "serialize_with_wildcard",
        skip_serializing_if = "Setting::is_not_set"
    )]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub displayed_attributes: Setting<Vec<String>>,
    #[serde(
        default,
        serialize_with = "serialize_with_wildcard",
        skip_serializing_if = "Setting::is_not_set"
    )]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub searchable_attributes: Setting<Vec<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub filterable_attributes: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub sortable_attributes: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub ranking_rules: Setting<Vec<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub stop_words: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub distinct_attribute: Setting<String>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub typo_tolerance: Setting<TypoSettings>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub faceting: Setting<FacetingSettings>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
    pub pagination: Setting<PaginationSettings>,
    #[serde(skip)]
    pub _kind: PhantomData<T>,
 }
 impl Settings<Checked> {
    pub fn cleared() -> Settings<Checked> {
        Settings {
            displayed_attributes: Setting::Reset,
            searchable_attributes: Setting::Reset,
            filterable_attributes: Setting::Reset,
            sortable_attributes: Setting::Reset,
            ranking_rules: Setting::Reset,
            stop_words: Setting::Reset,
            synonyms: Setting::Reset,
            distinct_attribute: Setting::Reset,
            typo_tolerance: Setting::Reset,
            faceting: Setting::Reset,
            pagination: Setting::Reset,
            _kind: PhantomData,
        }
    }
    pub fn into_unchecked(self) -> Settings<Unchecked> {
        let Self {
            displayed_attributes,
            searchable_attributes,
            filterable_attributes,
            sortable_attributes,
            ranking_rules,
            stop_words,
            synonyms,
            distinct_attribute,
            typo_tolerance,
            faceting,
            pagination,
            ..
        } = self;
        Settings {
            displayed_attributes,
            searchable_attributes,
            filterable_attributes,
            sortable_attributes,
            ranking_rules,
            stop_words,
            synonyms,
            distinct_attribute,
            typo_tolerance,
            faceting,
            pagination,
            _kind: PhantomData,
        }
    }
 }
 impl Settings<Unchecked> {
    pub fn check(self) -> Settings<Checked> {
        let displayed_attributes = match self.displayed_attributes {
            Setting::Set(fields) => {
                if fields.iter().any(|f| f == "*") {
                    Setting::Reset
                } else {
                    Setting::Set(fields)
                }
            }
            otherwise => otherwise,
        };
        let searchable_attributes = match self.searchable_attributes {
            Setting::Set(fields) => {
                if fields.iter().any(|f| f == "*") {
                    Setting::Reset
                } else {
                    Setting::Set(fields)
                }
            }
            otherwise => otherwise,
        };
        Settings {
            displayed_attributes,
            searchable_attributes,
            filterable_attributes: self.filterable_attributes,
            sortable_attributes: self.sortable_attributes,
            ranking_rules: self.ranking_rules,
            stop_words: self.stop_words,
            synonyms: self.synonyms,
            distinct_attribute: self.distinct_attribute,
            typo_tolerance: self.typo_tolerance,
            faceting: self.faceting,
            pagination: self.pagination,
            _kind: PhantomData,
        }
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 #[serde(rename_all = "camelCase")]
 pub struct Facets {
    pub level_group_size: Option<NonZeroUsize>,
    pub min_level_size: Option<NonZeroUsize>,
 }
 impl Index {
    fn update_primary_key_txn<'a, 'b>(
        &'a self,
        txn: &mut milli::heed::RwTxn<'a, 'b>,
        primary_key: String,
    ) -> Result<IndexMeta> {
        let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref());
        builder.set_primary_key(primary_key);
        builder.execute(|_| ())?;
        let meta = IndexMeta::new_txn(self, txn)?;
        Ok(meta)
    }
    pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
        let mut txn = self.write_txn()?;
        let res = self.update_primary_key_txn(&mut txn, primary_key)?;
        txn.commit()?;
        Ok(res)
    }
    /// Deletes `ids` from the index, and returns how many documents were deleted.
    pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
        let mut txn = self.write_txn()?;
        let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?;
        // We ignore unexisting document ids
        ids.iter().for_each(|id| {
            builder.delete_external_id(id);
        });
        let deleted = builder.execute()?;
        txn.commit()?;
        Ok(deleted)
    }
    pub fn clear_documents(&self) -> Result<()> {
        let mut txn = self.write_txn()?;
        milli::update::ClearDocuments::new(&mut txn, self).execute()?;
        txn.commit()?;
        Ok(())
    }
    pub fn update_documents(
        &self,
        method: IndexDocumentsMethod,
        primary_key: Option<String>,
        file_store: UpdateFileStore,
        contents: impl IntoIterator<Item = Uuid>,
    ) -> Result<Vec<Result<DocumentAdditionResult>>> {
        trace!("performing document addition");
        let mut txn = self.write_txn()?;
        if let Some(primary_key) = primary_key {
            if self.primary_key(&txn)?.is_none() {
                self.update_primary_key_txn(&mut txn, primary_key)?;
            }
        }
        let config = IndexDocumentsConfig {
            update_method: method,
            ..Default::default()
        };
        let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step);
        let mut builder = milli::update::IndexDocuments::new(
            &mut txn,
            self,
            self.indexer_config.as_ref(),
            config,
            indexing_callback,
        )?;
        let mut results = Vec::new();
        for content_uuid in contents.into_iter() {
            let content_file = file_store.get_update(content_uuid)?;
            let reader = DocumentsBatchReader::from_reader(content_file)?;
            let (new_builder, user_result) = builder.add_documents(reader)?;
            builder = new_builder;
            let user_result = match user_result {
                Ok(count) => Ok(DocumentAdditionResult {
                    indexed_documents: count,
                    number_of_documents: count,
                }),
                Err(e) => Err(IndexError::from(e)),
            };
            results.push(user_result);
        }
        if results.iter().any(Result::is_ok) {
            let addition = builder.execute()?;
            txn.commit()?;
            info!("document addition done: {:?}", addition);
        }
        Ok(results)
    }
    pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
        // We must use the write transaction of the update here.
        let mut txn = self.write_txn()?;
        let mut builder =
            milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref());
        apply_settings_to_builder(settings, &mut builder);
        builder.execute(|indexing_step| debug!("update: {:?}", indexing_step))?;
        txn.commit()?;
        Ok(())
    }
 }
 pub fn apply_settings_to_builder(
    settings: &Settings<Checked>,
    builder: &mut milli::update::Settings,
 ) {
    match settings.searchable_attributes {
        Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
        Setting::Reset => builder.reset_searchable_fields(),
        Setting::NotSet => (),
    }
    match settings.displayed_attributes {
        Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
        Setting::Reset => builder.reset_displayed_fields(),
        Setting::NotSet => (),
    }
    match settings.filterable_attributes {
        Setting::Set(ref facets) => {
            builder.set_filterable_fields(facets.clone().into_iter().collect())
        }
        Setting::Reset => builder.reset_filterable_fields(),
        Setting::NotSet => (),
    }
    match settings.sortable_attributes {
        Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
        Setting::Reset => builder.reset_sortable_fields(),
        Setting::NotSet => (),
    }
    match settings.ranking_rules {
        Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
        Setting::Reset => builder.reset_criteria(),
        Setting::NotSet => (),
    }
    match settings.stop_words {
        Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
        Setting::Reset => builder.reset_stop_words(),
        Setting::NotSet => (),
    }
    match settings.synonyms {
        Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
        Setting::Reset => builder.reset_synonyms(),
        Setting::NotSet => (),
    }
    match settings.distinct_attribute {
        Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
        Setting::Reset => builder.reset_distinct_field(),
        Setting::NotSet => (),
    }
    match settings.typo_tolerance {
        Setting::Set(ref value) => {
            match value.enabled {
                Setting::Set(val) => builder.set_autorize_typos(val),
                Setting::Reset => builder.reset_authorize_typos(),
                Setting::NotSet => (),
            }
            match value.min_word_size_for_typos {
                Setting::Set(ref setting) => {
                    match setting.one_typo {
                        Setting::Set(val) => builder.set_min_word_len_one_typo(val),
                        Setting::Reset => builder.reset_min_word_len_one_typo(),
                        Setting::NotSet => (),
                    }
                    match setting.two_typos {
                        Setting::Set(val) => builder.set_min_word_len_two_typos(val),
                        Setting::Reset => builder.reset_min_word_len_two_typos(),
                        Setting::NotSet => (),
                    }
                }
                Setting::Reset => {
                    builder.reset_min_word_len_one_typo();
                    builder.reset_min_word_len_two_typos();
                }
                Setting::NotSet => (),
            }
            match value.disable_on_words {
                Setting::Set(ref words) => {
                    builder.set_exact_words(words.clone());
                }
                Setting::Reset => builder.reset_exact_words(),
                Setting::NotSet => (),
            }
            match value.disable_on_attributes {
                Setting::Set(ref words) => {
                    builder.set_exact_attributes(words.iter().cloned().collect())
                }
                Setting::Reset => builder.reset_exact_attributes(),
                Setting::NotSet => (),
            }
        }
        Setting::Reset => {
            // all typo settings need to be reset here.
            builder.reset_authorize_typos();
            builder.reset_min_word_len_one_typo();
            builder.reset_min_word_len_two_typos();
            builder.reset_exact_words();
            builder.reset_exact_attributes();
        }
        Setting::NotSet => (),
    }
    match settings.faceting {
        Setting::Set(ref value) => match value.max_values_per_facet {
            Setting::Set(val) => builder.set_max_values_per_facet(val),
            Setting::Reset => builder.reset_max_values_per_facet(),
            Setting::NotSet => (),
        },
        Setting::Reset => builder.reset_max_values_per_facet(),
        Setting::NotSet => (),
    }
    match settings.pagination {
        Setting::Set(ref value) => match value.max_total_hits {
            Setting::Set(val) => builder.set_pagination_max_total_hits(val),
            Setting::Reset => builder.reset_pagination_max_total_hits(),
            Setting::NotSet => (),
        },
        Setting::Reset => builder.reset_pagination_max_total_hits(),
        Setting::NotSet => (),
    }
 }
 #[cfg(test)]
 pub(crate) mod test {
    use proptest::prelude::*;
    use super::*;
    pub(super) fn setting_strategy<T: Arbitrary + Clone>() -> impl Strategy<Value = Setting<T>> {
        prop_oneof![
            Just(Setting::NotSet),
            Just(Setting::Reset),
            any::<T>().prop_map(Setting::Set)
        ]
    }
    #[test]
    fn test_setting_check() {
        // test no changes
        let settings = Settings {
            displayed_attributes: Setting::Set(vec![String::from("hello")]),
            searchable_attributes: Setting::Set(vec![String::from("hello")]),
            filterable_attributes: Setting::NotSet,
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
            faceting: Setting::NotSet,
            pagination: Setting::NotSet,
            _kind: PhantomData::<Unchecked>,
        };
        let checked = settings.clone().check();
        assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
        assert_eq!(
            settings.searchable_attributes,
            checked.searchable_attributes
        );
        // test wildcard
        // test no changes
        let settings = Settings {
            displayed_attributes: Setting::Set(vec![String::from("*")]),
            searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
            filterable_attributes: Setting::NotSet,
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
            faceting: Setting::NotSet,
            pagination: Setting::NotSet,
            _kind: PhantomData::<Unchecked>,
        };
        let checked = settings.check();
        assert_eq!(checked.displayed_attributes, Setting::Reset);
        assert_eq!(checked.searchable_attributes, Setting::Reset);
    }
 }
--- a/meilisearch-lib/src/index_controller/mod.rs
+++ b/meilisearch-lib/src/index_controller/mod.rs
@ -1,574 +0,0 @@
 use std::collections::BTreeMap;
 use std::fmt;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::Duration;
 use actix_web::error::PayloadError;
 use bytes::Bytes;
 use futures::Stream;
 use index_scheduler::task::{Status, Task};
 use index_scheduler::{IndexScheduler, KindWithContent, TaskId, TaskView};
 use meilisearch_auth::SearchRules;
 use milli::update::{IndexDocumentsMethod, IndexerConfig};
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 use tokio::task::spawn_blocking;
 use tokio::time::sleep;
 use uuid::Uuid;
 // use crate::dump::{self, load_dump, DumpHandler};
 use crate::options::{IndexerOpts, SchedulerConfig};
 // use crate::snapshot::{load_snapshot, SnapshotService};
 use error::Result;
 use index::{
    Checked, Document, Index, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
 };
 pub mod error;
 pub mod versioning;
 pub type Payload = Box<
    dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
 >;
 pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result<milli::heed::Env> {
    let mut options = milli::heed::EnvOpenOptions::new();
    options.map_size(size);
    options.max_dbs(20);
    options.open(path)
 }
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct IndexMetadata {
    #[serde(skip)]
    pub uuid: Uuid,
    pub uid: String,
    #[serde(flatten)]
    pub meta: IndexMeta,
 }
 #[derive(Clone, Debug)]
 pub struct IndexSettings {
    pub uid: Option<String>,
    pub primary_key: Option<String>,
 }
 #[derive(Clone)]
 pub struct Meilisearch {
    index_scheduler: IndexScheduler,
 }
 impl std::ops::Deref for Meilisearch {
    type Target = IndexScheduler;
    fn deref(&self) -> &Self::Target {
        &self.index_scheduler
    }
 }
 #[derive(Debug)]
 pub enum DocumentAdditionFormat {
    Json,
    Csv,
    Ndjson,
 }
 impl fmt::Display for DocumentAdditionFormat {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            DocumentAdditionFormat::Json => write!(f, "json"),
            DocumentAdditionFormat::Ndjson => write!(f, "ndjson"),
            DocumentAdditionFormat::Csv => write!(f, "csv"),
        }
    }
 }
 #[derive(Serialize, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct Stats {
    pub database_size: u64,
    #[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
    pub last_update: Option<OffsetDateTime>,
    pub indexes: BTreeMap<String, IndexStats>,
 }
 #[allow(clippy::large_enum_variant)]
 #[derive(derivative::Derivative)]
 #[derivative(Debug)]
 pub enum Update {
    DeleteDocuments(Vec<String>),
    ClearDocuments,
    Settings {
        settings: Settings<Unchecked>,
        /// Indicates whether the update was a deletion
        is_deletion: bool,
        allow_index_creation: bool,
    },
    DocumentAddition {
        #[derivative(Debug = "ignore")]
        payload: Payload,
        primary_key: Option<String>,
        method: IndexDocumentsMethod,
        format: DocumentAdditionFormat,
        allow_index_creation: bool,
    },
    DeleteIndex,
    CreateIndex {
        primary_key: Option<String>,
    },
    UpdateIndex {
        primary_key: Option<String>,
    },
 }
 #[derive(Default, Debug)]
 pub struct IndexControllerBuilder {
    max_index_size: Option<usize>,
    max_task_store_size: Option<usize>,
    snapshot_dir: Option<PathBuf>,
    import_snapshot: Option<PathBuf>,
    snapshot_interval: Option<Duration>,
    ignore_snapshot_if_db_exists: bool,
    ignore_missing_snapshot: bool,
    schedule_snapshot: bool,
    dump_src: Option<PathBuf>,
    dump_dst: Option<PathBuf>,
    ignore_dump_if_db_exists: bool,
    ignore_missing_dump: bool,
 }
 impl IndexControllerBuilder {
    pub fn build(
        self,
        db_path: impl AsRef<Path>,
        indexer_options: IndexerOpts,
        scheduler_config: SchedulerConfig,
    ) -> anyhow::Result<Meilisearch> {
        let index_size = self
            .max_index_size
            .ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
        let task_store_size = self
            .max_task_store_size
            .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
        /*
        TODO: TAMO: enable dumps and snapshots to happens
        if let Some(ref path) = self.import_snapshot {
            log::info!("Loading from snapshot {:?}", path);
            load_snapshot(
                db_path.as_ref(),
                path,
                self.ignore_snapshot_if_db_exists,
                self.ignore_missing_snapshot,
            )?;
        } else if let Some(ref src_path) = self.dump_src {
            load_dump(
                db_path.as_ref(),
                src_path,
                self.ignore_dump_if_db_exists,
                self.ignore_missing_dump,
                index_size,
                task_store_size,
                &indexer_options,
            )?;
        } else if db_path.as_ref().exists() {
            // Directory could be pre-created without any database in.
            let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
            if !db_is_empty {
                versioning::check_version_file(db_path.as_ref())?;
            }
        }
        */
        std::fs::create_dir_all(db_path.as_ref())?;
        let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);
        // Create or overwrite the version file for this DB
        versioning::create_version_file(db_path.as_ref())?;
        let indexer_config = IndexerConfig {
            log_every_n: Some(indexer_options.log_every_n),
            max_nb_chunks: indexer_options.max_nb_chunks,
            documents_chunk_size: None,
            // TODO: TAMO: Fix this thing
            max_memory: None, // Some(indexer_options.max_indexing_memory.into()),
            chunk_compression_type: milli::CompressionType::None,
            chunk_compression_level: None,
            // TODO: TAMO: do something with the indexing_config.max_indexing_threads
            thread_pool: None,
            max_positions_per_attributes: None,
        };
        let index_scheduler = IndexScheduler::new(
            db_path.as_ref().join("tasks"),
            db_path.as_ref().join("update_files"),
            db_path.as_ref().join("indexes"),
            index_size,
            indexer_config,
        )?;
        /*
        if self.schedule_snapshot {
            let snapshot_period = self
                .snapshot_interval
                .ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?;
            let snapshot_path = self
                .snapshot_dir
                .ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?;
            let snapshot_service = SnapshotService {
                db_path: db_path.as_ref().to_path_buf(),
                snapshot_period,
                snapshot_path,
                index_size,
                meta_env_size: task_store_size,
                scheduler: scheduler.clone(),
            };
            tokio::task::spawn_local(snapshot_service.run());
        }
        */
        Ok(Meilisearch { index_scheduler })
    }
    /// Set the index controller builder's max update store size.
    pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
        let max_update_store_size = clamp_to_page_size(max_update_store_size);
        self.max_task_store_size.replace(max_update_store_size);
        self
    }
    pub fn set_max_index_size(&mut self, size: usize) -> &mut Self {
        let size = clamp_to_page_size(size);
        self.max_index_size.replace(size);
        self
    }
    /// Set the index controller builder's snapshot path.
    pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self {
        self.snapshot_dir.replace(snapshot_dir);
        self
    }
    /// Set the index controller builder's ignore snapshot if db exists.
    pub fn set_ignore_snapshot_if_db_exists(
        &mut self,
        ignore_snapshot_if_db_exists: bool,
    ) -> &mut Self {
        self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
        self
    }
    /// Set the index controller builder's ignore missing snapshot.
    pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self {
        self.ignore_missing_snapshot = ignore_missing_snapshot;
        self
    }
    /// Set the index controller builder's import snapshot.
    pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
        self.import_snapshot.replace(import_snapshot);
        self
    }
    /// Set the index controller builder's snapshot interval sec.
    pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self {
        self.snapshot_interval = Some(snapshot_interval);
        self
    }
    /// Set the index controller builder's schedule snapshot.
    pub fn set_schedule_snapshot(&mut self) -> &mut Self {
        self.schedule_snapshot = true;
        self
    }
    /// Set the index controller builder's dump src.
    pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
        self.dump_src.replace(dump_src);
        self
    }
    /// Set the index controller builder's dump dst.
    pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
        self.dump_dst.replace(dump_dst);
        self
    }
    /// Set the index controller builder's ignore dump if db exists.
    pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self {
        self.ignore_dump_if_db_exists = ignore_dump_if_db_exists;
        self
    }
    /// Set the index controller builder's ignore missing dump.
    pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self {
        self.ignore_missing_dump = ignore_missing_dump;
        self
    }
 }
 impl Meilisearch {
    pub fn builder() -> IndexControllerBuilder {
        IndexControllerBuilder::default()
    }
    pub async fn register_task(&self, task: KindWithContent) -> Result<TaskView> {
        let this = self.clone();
        Ok(
            tokio::task::spawn_blocking(move || this.clone().index_scheduler.register(task))
                .await??,
        )
    }
    pub async fn list_tasks(&self, filter: index_scheduler::Query) -> Result<Vec<TaskView>> {
        Ok(self.index_scheduler.get_tasks(filter)?)
    }
    pub async fn list_indexes(&self) -> Result<Vec<Index>> {
        let this = self.clone();
        Ok(spawn_blocking(move || this.index_scheduler.indexes()).await??)
    }
    /// Return the total number of documents contained in the index + the selected documents.
    pub async fn documents(
        &self,
        uid: String,
        offset: usize,
        limit: usize,
        attributes_to_retrieve: Option<Vec<String>>,
    ) -> Result<(u64, Vec<Document>)> {
        let this = self.clone();
        spawn_blocking(move || -> Result<_> {
            let index = this.index_scheduler.index(&uid)?;
            Ok(index.retrieve_documents(offset, limit, attributes_to_retrieve)?)
        })
        .await?
    }
    pub async fn document(
        &self,
        uid: String,
        doc_id: String,
        attributes_to_retrieve: Option<Vec<String>>,
    ) -> Result<Document> {
        let this = self.clone();
        spawn_blocking(move || -> Result<_> {
            let index = this.index_scheduler.index(&uid)?;
            Ok(index.retrieve_document(doc_id, attributes_to_retrieve)?)
        })
        .await?
    }
    pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
        let this = self.clone();
        spawn_blocking(move || -> Result<_> {
            let index = this.index_scheduler.index(&uid)?;
            Ok(index.perform_search(query)?)
        })
        .await?
    }
    pub async fn get_index(&self, uid: String) -> Result<Index> {
        let this = self.clone();
        Ok(spawn_blocking(move || this.index_scheduler.index(&uid)).await??)
    }
    pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
        let processing_tasks = self
            .index_scheduler
            .get_tasks(index_scheduler::Query::default().with_status(Status::Processing))?;
        // Check if the currently indexing update is from our index.
        let is_indexing = processing_tasks.first().map_or(false, |task| {
            task.index_uid.as_ref().map_or(false, |u| u == &uid)
        });
        let index = self.get_index(uid).await?;
        let mut stats = spawn_blocking(move || index.stats()).await??;
        stats.is_indexing = Some(is_indexing);
        Ok(stats)
    }
    pub async fn get_all_stats(&self, search_rules: &SearchRules) -> Result<Stats> {
        let mut last_task: Option<OffsetDateTime> = None;
        let mut indexes = BTreeMap::new();
        let mut database_size = 0;
        let processing_tasks = self
            .index_scheduler
            .get_tasks(index_scheduler::Query::default().with_status(Status::Processing))?;
        for index in self.list_indexes().await? {
            if !search_rules.is_index_authorized(&index.name) {
                continue;
            }
            let index_name = index.name.clone();
            let (mut stats, meta) =
                spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || {
                    Ok((index.stats()?, index.meta()?))
                })
                .await??;
            database_size += stats.size;
            last_task = last_task.map_or(Some(meta.updated_at), |last| {
                Some(last.max(meta.updated_at))
            });
            // Check if the currently indexing update is from our index.
            stats.is_indexing = processing_tasks
                .first()
                .and_then(|p| p.index_uid.as_ref().map(|u| u == &index_name))
                .or(Some(false));
            indexes.insert(index_name, stats);
        }
        Ok(Stats {
            database_size,
            last_update: last_task,
            indexes,
        })
    }
 }
 pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
    loop {
        match Arc::try_unwrap(item) {
            Ok(item) => return item,
            Err(item_arc) => {
                item = item_arc;
                sleep(Duration::from_millis(100)).await;
                continue;
            }
        }
    }
 }
 // Clamp the provided value to be a multiple of system page size.
 fn clamp_to_page_size(size: usize) -> usize {
    size / page_size::get() * page_size::get()
 }
 /*
 TODO: TAMO: uncomment this test
 #[cfg(test)]
 mod test {
    use futures::future::ok;
    use mockall::predicate::eq;
    use nelson::Mocker;
    use crate::index::error::Result as IndexResult;
    use crate::index::{HitsInfo, Index};
    use crate::index::{
        DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
    };
    use crate::index_resolver::index_store::MockIndexStore;
    use crate::index_resolver::meta_store::MockIndexMetaStore;
    use crate::index_resolver::IndexResolver;
    use super::*;
    #[actix_rt::test]
    async fn test_search_simple() {
        let index_uid = "test";
        let index_uuid = Uuid::new_v4();
        let query = SearchQuery {
            q: Some(String::from("hello world")),
            offset: 10,
            limit: 0,
            page: Some(1),
            hits_per_page: Some(10),
            attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()),
            attributes_to_crop: None,
            crop_length: 18,
            attributes_to_highlight: None,
            show_matches_position: true,
            filter: None,
            sort: None,
            facets: None,
            highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
            highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
            crop_marker: DEFAULT_CROP_MARKER(),
            matching_strategy: Default::default(),
        };
        let result = SearchResult {
            hits: vec![],
            query: "hello world".to_string(),
            hits_info: HitsInfo::OffsetLimit {
                limit: 24,
                offset: 0,
                estimated_total_hits: 29,
            },
            processing_time_ms: 50,
            facet_distribution: None,
        };
        let mut uuid_store = MockIndexMetaStore::new();
        uuid_store
            .expect_get()
            .with(eq(index_uid.to_owned()))
            .returning(move |s| {
                Box::pin(ok((
                    s,
                    Some(crate::index_resolver::meta_store::IndexMeta {
                        uuid: index_uuid,
                        creation_task_id: 0,
                    }),
                )))
            });
        let mut index_store = MockIndexStore::new();
        let result_clone = result.clone();
        let query_clone = query.clone();
        index_store
            .expect_get()
            .with(eq(index_uuid))
            .returning(move |_uuid| {
                let result = result_clone.clone();
                let query = query_clone.clone();
                let mocker = Mocker::default();
                mocker
                    .when::<SearchQuery, IndexResult<SearchResult>>("perform_search")
                    .once()
                    .then(move |q| {
                        assert_eq!(&q, &query);
                        Ok(result.clone())
                    });
                let index = Index::mock(mocker);
                Box::pin(ok(Some(index)))
            });
        let task_store_mocker = nelson::Mocker::default();
        let mocker = Mocker::default();
        let update_file_store = UpdateFileStore::mock(mocker);
        let index_resolver = Arc::new(IndexResolver::new(
            uuid_store,
            index_store,
            update_file_store.clone(),
        ));
        let task_store = TaskStore::mock(task_store_mocker);
        let scheduler = Scheduler::new(
            task_store.clone(),
            vec![index_resolver.clone()],
            SchedulerConfig::default(),
        )
        .unwrap();
        let index_controller =
            IndexController::mock(index_resolver, task_store, update_file_store, scheduler);
        let r = index_controller
            .search(index_uid.to_owned(), query.clone())
            .await
            .unwrap();
        assert_eq!(r, result);
    }
 }
 */
--- a/meilisearch-lib/src/tasks/task.rs
+++ b/meilisearch-lib/src/tasks/task.rs
@ -1,195 +0,0 @@
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 use uuid::Uuid;
 use super::batch::BatchId;
 use crate::index::{Settings, Unchecked};
 pub type TaskId = u32;
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 pub enum TaskResult {
    DocumentAddition { indexed_documents: u64 },
    DocumentDeletion { deleted_documents: u64 },
    ClearAll { deleted_documents: u64 },
    Other,
 }
 impl From<DocumentAdditionResult> for TaskResult {
    fn from(other: DocumentAdditionResult) -> Self {
        Self::DocumentAddition {
            indexed_documents: other.indexed_documents,
        }
    }
 }
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 pub enum TaskEvent {
    Created(
        #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
        #[serde(with = "time::serde::rfc3339")]
        OffsetDateTime,
    ),
    Batched {
        #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
        #[serde(with = "time::serde::rfc3339")]
        timestamp: OffsetDateTime,
        batch_id: BatchId,
    },
    Processing(
        #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
        #[serde(with = "time::serde::rfc3339")]
        OffsetDateTime,
    ),
    Succeeded {
        result: TaskResult,
        #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
        #[serde(with = "time::serde::rfc3339")]
        timestamp: OffsetDateTime,
    },
    Failed {
        error: ResponseError,
        #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
        #[serde(with = "time::serde::rfc3339")]
        timestamp: OffsetDateTime,
    },
 }
 impl TaskEvent {
    pub fn succeeded(result: TaskResult) -> Self {
        Self::Succeeded {
            result,
            timestamp: OffsetDateTime::now_utc(),
        }
    }
    pub fn failed(error: impl Into<ResponseError>) -> Self {
        Self::Failed {
            error: error.into(),
            timestamp: OffsetDateTime::now_utc(),
        }
    }
 }
 /// A task represents an operation that Meilisearch must do.
 /// It's stored on disk and executed from the lowest to highest Task id.
 /// Every time a new task is created it has a higher Task id than the previous one.
 /// See also `Job`.
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 pub struct Task {
    pub id: TaskId,
    /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task)
    /// then this is None
    // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of
    // the TaskContent.
    pub content: TaskContent,
    pub events: Vec<TaskEvent>,
 }
 impl Task {
    /// Return true when a task is finished.
    /// A task is finished when its last state is either `Succeeded` or `Failed`.
    pub fn is_finished(&self) -> bool {
        self.events.last().map_or(false, |event| {
            matches!(
                event,
                TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }
            )
        })
    }
    /// Return the content_uuid of the `Task` if there is one.
    pub fn get_content_uuid(&self) -> Option<Uuid> {
        match self {
            Task {
                content: TaskContent::DocumentAddition { content_uuid, .. },
                ..
            } => Some(*content_uuid),
            _ => None,
        }
    }
    pub fn index_uid(&self) -> Option<&str> {
        match &self.content {
            TaskContent::DocumentAddition { index_uid, .. }
            | TaskContent::DocumentDeletion { index_uid, .. }
            | TaskContent::SettingsUpdate { index_uid, .. }
            | TaskContent::IndexDeletion { index_uid }
            | TaskContent::IndexCreation { index_uid, .. }
            | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()),
            TaskContent::Dump { .. } => None,
        }
    }
 }
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 pub enum DocumentDeletion {
    Clear,
    Ids(Vec<String>),
 }
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 #[cfg_attr(test, derive(proptest_derive::Arbitrary))]
 #[allow(clippy::large_enum_variant)]
 pub enum TaskContent {
    DocumentAddition {
        index_uid: IndexUid,
        #[cfg_attr(test, proptest(value = "Uuid::new_v4()"))]
        content_uuid: Uuid,
        #[cfg_attr(test, proptest(strategy = "test::index_document_method_strategy()"))]
        merge_strategy: IndexDocumentsMethod,
        primary_key: Option<String>,
        documents_count: usize,
        allow_index_creation: bool,
    },
    DocumentDeletion {
        index_uid: IndexUid,
        deletion: DocumentDeletion,
    },
    SettingsUpdate {
        index_uid: IndexUid,
        settings: Settings<Unchecked>,
        /// Indicates whether the task was a deletion
        is_deletion: bool,
        allow_index_creation: bool,
    },
    IndexDeletion {
        index_uid: IndexUid,
    },
    IndexCreation {
        index_uid: IndexUid,
        primary_key: Option<String>,
    },
    IndexUpdate {
        index_uid: IndexUid,
        primary_key: Option<String>,
    },
    Dump {
        uid: String,
    },
 }
 #[cfg(test)]
 mod test {
    use proptest::prelude::*;
    use super::*;
    pub(super) fn index_document_method_strategy() -> impl Strategy<Value = IndexDocumentsMethod> {
        prop_oneof![
            Just(IndexDocumentsMethod::ReplaceDocuments),
            Just(IndexDocumentsMethod::UpdateDocuments),
        ]
    }
    pub(super) fn datetime_strategy() -> impl Strategy<Value = OffsetDateTime> {
        Just(OffsetDateTime::now_utc())
    }
 }