mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Reapply #2601
This commit is contained in:
parent
a99ddf85f7
commit
0dd8e00929
@ -15,7 +15,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
|||||||
use crate::search::{
|
use crate::search::{
|
||||||
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
DEFAULT_SEARCH_OFFSET
|
DEFAULT_SEARCH_OFFSET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
@ -72,9 +72,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
limit: other.limit,
|
limit: other.limit,
|
||||||
page: other.page,
|
page: other.page,
|
||||||
hits_per_page: other.hits_per_page,
|
hits_per_page: other.hits_per_page,
|
||||||
attributes_to_retrieve: other
|
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||||
.attributes_to_retrieve
|
|
||||||
.map(|o| o.into_iter().collect()),
|
|
||||||
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
||||||
crop_length: other.crop_length,
|
crop_length: other.crop_length,
|
||||||
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
||||||
|
@ -19,6 +19,7 @@ use crate::error::MeilisearchHttpError;
|
|||||||
|
|
||||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||||
|
|
||||||
|
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
||||||
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
||||||
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
||||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||||
@ -29,9 +30,12 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
|||||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
pub q: Option<String>,
|
pub q: Option<String>,
|
||||||
pub offset: Option<usize>,
|
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
|
||||||
|
pub offset: usize,
|
||||||
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
|
pub page: Option<usize>,
|
||||||
|
pub hits_per_page: Option<usize>,
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
pub attributes_to_crop: Option<Vec<String>>,
|
pub attributes_to_crop: Option<Vec<String>>,
|
||||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
||||||
@ -53,6 +57,12 @@ pub struct SearchQuery {
|
|||||||
pub matching_strategy: MatchingStrategy,
|
pub matching_strategy: MatchingStrategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl SearchQuery {
|
||||||
|
pub fn is_finite_pagination(&self) -> bool {
|
||||||
|
self.page.or(self.hits_per_page).is_some()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub enum MatchingStrategy {
|
pub enum MatchingStrategy {
|
||||||
@ -91,15 +101,23 @@ pub struct SearchHit {
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub hits: Vec<SearchHit>,
|
pub hits: Vec<SearchHit>,
|
||||||
pub estimated_total_hits: u64,
|
|
||||||
pub query: String,
|
pub query: String,
|
||||||
pub limit: usize,
|
|
||||||
pub offset: usize,
|
|
||||||
pub processing_time_ms: u128,
|
pub processing_time_ms: u128,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub hits_info: HitsInfo,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum HitsInfo {
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize },
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
|
||||||
|
}
|
||||||
|
|
||||||
pub fn perform_search(
|
pub fn perform_search(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
query: SearchQuery,
|
query: SearchQuery,
|
||||||
@ -113,6 +131,7 @@ pub fn perform_search(
|
|||||||
search.query(query);
|
search.query(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let is_finite_pagination = query.is_finite_pagination();
|
||||||
search.terms_matching_strategy(query.matching_strategy.into());
|
search.terms_matching_strategy(query.matching_strategy.into());
|
||||||
|
|
||||||
let max_total_hits = index
|
let max_total_hits = index
|
||||||
@ -120,10 +139,23 @@ pub fn perform_search(
|
|||||||
.map_err(milli::Error::from)?
|
.map_err(milli::Error::from)?
|
||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||||
|
|
||||||
|
search.exhaustive_number_hits(is_finite_pagination);
|
||||||
|
|
||||||
|
// compute the offset on the limit depending on the pagination mode.
|
||||||
|
let (offset, limit) = if is_finite_pagination {
|
||||||
|
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
|
let page = query.page.unwrap_or(1);
|
||||||
|
|
||||||
|
// page 0 gives a limit of 0 forcing Meilisearch to return no document.
|
||||||
|
page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit))
|
||||||
|
} else {
|
||||||
|
(query.offset, query.limit)
|
||||||
|
};
|
||||||
|
|
||||||
// Make sure that a user can't get more documents than the hard limit,
|
// Make sure that a user can't get more documents than the hard limit,
|
||||||
// we align that on the offset too.
|
// we align that on the offset too.
|
||||||
let offset = min(query.offset.unwrap_or(0), max_total_hits);
|
let offset = min(offset, max_total_hits);
|
||||||
let limit = min(query.limit, max_total_hits.saturating_sub(offset));
|
let limit = min(limit, max_total_hits.saturating_sub(offset));
|
||||||
|
|
||||||
search.offset(offset);
|
search.offset(offset);
|
||||||
search.limit(limit);
|
search.limit(limit);
|
||||||
@ -239,7 +271,23 @@ pub fn perform_search(
|
|||||||
documents.push(hit);
|
documents.push(hit);
|
||||||
}
|
}
|
||||||
|
|
||||||
let estimated_total_hits = candidates.len();
|
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||||
|
let hits_info = if is_finite_pagination {
|
||||||
|
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
|
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
|
||||||
|
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
|
||||||
|
.checked_div(hits_per_page)
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
HitsInfo::Pagination {
|
||||||
|
hits_per_page,
|
||||||
|
page: query.page.unwrap_or(1),
|
||||||
|
total_pages,
|
||||||
|
total_hits: number_of_hits,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
|
||||||
|
};
|
||||||
|
|
||||||
let facet_distribution = match query.facets {
|
let facet_distribution = match query.facets {
|
||||||
Some(ref fields) => {
|
Some(ref fields) => {
|
||||||
@ -263,10 +311,8 @@ pub fn perform_search(
|
|||||||
|
|
||||||
let result = SearchResult {
|
let result = SearchResult {
|
||||||
hits: documents,
|
hits: documents,
|
||||||
estimated_total_hits,
|
hits_info,
|
||||||
query: query.q.clone().unwrap_or_default(),
|
query: query.q.clone().unwrap_or_default(),
|
||||||
limit: query.limit,
|
|
||||||
offset: query.offset.unwrap_or_default(),
|
|
||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
facet_distribution,
|
facet_distribution,
|
||||||
};
|
};
|
||||||
|
@ -1,85 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "meilisearch-lib"
|
|
||||||
version = "0.29.1"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
actix-web = { version = "4.2.1", default-features = false }
|
|
||||||
anyhow = { version = "1.0.65", features = ["backtrace"] }
|
|
||||||
async-stream = "0.3.3"
|
|
||||||
async-trait = "0.1.57"
|
|
||||||
atomic_refcell = "0.1.8"
|
|
||||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
|
|
||||||
bytes = "1.2.1"
|
|
||||||
clap = { version = "4.0.9", features = ["derive", "env"] }
|
|
||||||
crossbeam-channel = "0.5.6"
|
|
||||||
csv = "1.1.6"
|
|
||||||
derivative = "2.2.0"
|
|
||||||
either = { version = "1.8.0", features = ["serde"] }
|
|
||||||
flate2 = "1.0.24"
|
|
||||||
fs_extra = "1.2.0"
|
|
||||||
fst = "0.4.7"
|
|
||||||
futures = "0.3.24"
|
|
||||||
futures-util = "0.3.24"
|
|
||||||
http = "0.2.8"
|
|
||||||
indexmap = { version = "1.9.1", features = ["serde-1"] }
|
|
||||||
itertools = "0.10.5"
|
|
||||||
lazy_static = "1.4.0"
|
|
||||||
log = "0.4.17"
|
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false }
|
|
||||||
mime = "0.3.16"
|
|
||||||
num_cpus = "1.13.1"
|
|
||||||
obkv = "0.2.0"
|
|
||||||
once_cell = "1.15.0"
|
|
||||||
page_size = "0.4.2"
|
|
||||||
parking_lot = "0.12.1"
|
|
||||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
|
||||||
rand = "0.8.5"
|
|
||||||
rayon = "1.5.3"
|
|
||||||
regex = "1.6.0"
|
|
||||||
reqwest = { version = "0.11.12", features = ["json", "rustls-tls"], default-features = false, optional = true }
|
|
||||||
roaring = "0.10.1"
|
|
||||||
rustls = "0.20.6"
|
|
||||||
serde = { version = "1.0.145", features = ["derive"] }
|
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
|
||||||
siphasher = "0.3.10"
|
|
||||||
slice-group-by = "0.3.0"
|
|
||||||
sysinfo = "0.26.4"
|
|
||||||
tar = "0.4.38"
|
|
||||||
tempfile = "3.3.0"
|
|
||||||
thiserror = "1.0.37"
|
|
||||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
|
||||||
tokio = { version = "1.21.2", features = ["full"] }
|
|
||||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
|
||||||
walkdir = "2.3.2"
|
|
||||||
whoami = { version = "1.2.3", optional = true }
|
|
||||||
index-scheduler = { path = "../index-scheduler" }
|
|
||||||
index = { path = "../index" }
|
|
||||||
file-store = { path = "../file-store" }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
actix-rt = "2.7.0"
|
|
||||||
meilisearch-types = { path = "../meilisearch-types", features = ["test-traits"] }
|
|
||||||
mockall = "0.11.2"
|
|
||||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
|
||||||
paste = "1.0.9"
|
|
||||||
proptest = "1.0.0"
|
|
||||||
proptest-derive = "0.3.0"
|
|
||||||
|
|
||||||
[features]
|
|
||||||
# all specialized tokenizations
|
|
||||||
default = ["milli/default"]
|
|
||||||
|
|
||||||
# chinese specialized tokenization
|
|
||||||
chinese = ["milli/chinese"]
|
|
||||||
|
|
||||||
# hebrew specialized tokenization
|
|
||||||
hebrew = ["milli/hebrew"]
|
|
||||||
|
|
||||||
# japanese specialized tokenization
|
|
||||||
japanese = ["milli/japanese"]
|
|
||||||
|
|
||||||
# thai specialized tokenization
|
|
||||||
thai = ["milli/thai"]
|
|
@ -1,145 +0,0 @@
|
|||||||
use meilisearch_types::error::ResponseError;
|
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
|
||||||
use milli::update::IndexDocumentsMethod;
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use time::OffsetDateTime;
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use crate::index::{Settings, Unchecked};
|
|
||||||
use crate::tasks::batch::BatchId;
|
|
||||||
use crate::tasks::task::{
|
|
||||||
DocumentDeletion, TaskContent as NewTaskContent, TaskEvent as NewTaskEvent, TaskId, TaskResult,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
|
||||||
pub struct Task {
|
|
||||||
pub id: TaskId,
|
|
||||||
pub index_uid: IndexUid,
|
|
||||||
pub content: TaskContent,
|
|
||||||
pub events: Vec<TaskEvent>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Task> for crate::tasks::task::Task {
|
|
||||||
fn from(other: Task) -> Self {
|
|
||||||
Self {
|
|
||||||
id: other.id,
|
|
||||||
content: NewTaskContent::from((other.index_uid, other.content)),
|
|
||||||
events: other.events.into_iter().map(Into::into).collect(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
|
||||||
pub enum TaskEvent {
|
|
||||||
Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
|
||||||
Batched {
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
timestamp: OffsetDateTime,
|
|
||||||
batch_id: BatchId,
|
|
||||||
},
|
|
||||||
Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
|
||||||
Succeded {
|
|
||||||
result: TaskResult,
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
timestamp: OffsetDateTime,
|
|
||||||
},
|
|
||||||
Failed {
|
|
||||||
error: ResponseError,
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
timestamp: OffsetDateTime,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<TaskEvent> for NewTaskEvent {
|
|
||||||
fn from(other: TaskEvent) -> Self {
|
|
||||||
match other {
|
|
||||||
TaskEvent::Created(x) => NewTaskEvent::Created(x),
|
|
||||||
TaskEvent::Batched {
|
|
||||||
timestamp,
|
|
||||||
batch_id,
|
|
||||||
} => NewTaskEvent::Batched {
|
|
||||||
timestamp,
|
|
||||||
batch_id,
|
|
||||||
},
|
|
||||||
TaskEvent::Processing(x) => NewTaskEvent::Processing(x),
|
|
||||||
TaskEvent::Succeded { result, timestamp } => {
|
|
||||||
NewTaskEvent::Succeeded { result, timestamp }
|
|
||||||
}
|
|
||||||
TaskEvent::Failed { error, timestamp } => NewTaskEvent::Failed { error, timestamp },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[allow(clippy::large_enum_variant)]
|
|
||||||
pub enum TaskContent {
|
|
||||||
DocumentAddition {
|
|
||||||
content_uuid: Uuid,
|
|
||||||
merge_strategy: IndexDocumentsMethod,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
documents_count: usize,
|
|
||||||
allow_index_creation: bool,
|
|
||||||
},
|
|
||||||
DocumentDeletion(DocumentDeletion),
|
|
||||||
SettingsUpdate {
|
|
||||||
settings: Settings<Unchecked>,
|
|
||||||
/// Indicates whether the task was a deletion
|
|
||||||
is_deletion: bool,
|
|
||||||
allow_index_creation: bool,
|
|
||||||
},
|
|
||||||
IndexDeletion,
|
|
||||||
IndexCreation {
|
|
||||||
primary_key: Option<String>,
|
|
||||||
},
|
|
||||||
IndexUpdate {
|
|
||||||
primary_key: Option<String>,
|
|
||||||
},
|
|
||||||
Dump {
|
|
||||||
uid: String,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<(IndexUid, TaskContent)> for NewTaskContent {
|
|
||||||
fn from((index_uid, content): (IndexUid, TaskContent)) -> Self {
|
|
||||||
match content {
|
|
||||||
TaskContent::DocumentAddition {
|
|
||||||
content_uuid,
|
|
||||||
merge_strategy,
|
|
||||||
primary_key,
|
|
||||||
documents_count,
|
|
||||||
allow_index_creation,
|
|
||||||
} => NewTaskContent::DocumentAddition {
|
|
||||||
index_uid,
|
|
||||||
content_uuid,
|
|
||||||
merge_strategy,
|
|
||||||
primary_key,
|
|
||||||
documents_count,
|
|
||||||
allow_index_creation,
|
|
||||||
},
|
|
||||||
TaskContent::DocumentDeletion(deletion) => NewTaskContent::DocumentDeletion {
|
|
||||||
index_uid,
|
|
||||||
deletion,
|
|
||||||
},
|
|
||||||
TaskContent::SettingsUpdate {
|
|
||||||
settings,
|
|
||||||
is_deletion,
|
|
||||||
allow_index_creation,
|
|
||||||
} => NewTaskContent::SettingsUpdate {
|
|
||||||
index_uid,
|
|
||||||
settings,
|
|
||||||
is_deletion,
|
|
||||||
allow_index_creation,
|
|
||||||
},
|
|
||||||
TaskContent::IndexDeletion => NewTaskContent::IndexDeletion { index_uid },
|
|
||||||
TaskContent::IndexCreation { primary_key } => NewTaskContent::IndexCreation {
|
|
||||||
index_uid,
|
|
||||||
primary_key,
|
|
||||||
},
|
|
||||||
TaskContent::IndexUpdate { primary_key } => NewTaskContent::IndexUpdate {
|
|
||||||
index_uid,
|
|
||||||
primary_key,
|
|
||||||
},
|
|
||||||
TaskContent::Dump { uid } => NewTaskContent::Dump { uid },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,250 +0,0 @@
|
|||||||
pub use search::{
|
|
||||||
HitsInfo, MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH,
|
|
||||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
|
||||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
|
||||||
};
|
|
||||||
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
|
|
||||||
|
|
||||||
mod dump;
|
|
||||||
pub mod error;
|
|
||||||
mod search;
|
|
||||||
pub mod updates;
|
|
||||||
|
|
||||||
#[allow(clippy::module_inception)]
|
|
||||||
mod index;
|
|
||||||
|
|
||||||
pub use index::{Document, IndexMeta, IndexStats};
|
|
||||||
|
|
||||||
#[cfg(not(test))]
|
|
||||||
pub use index::Index;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub use test::MockIndex as Index;
|
|
||||||
|
|
||||||
/// The index::test module provides means of mocking an index instance. I can be used throughout the
|
|
||||||
/// code for unit testing, in places where an index would normally be used.
|
|
||||||
#[cfg(test)]
|
|
||||||
pub mod test {
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use milli::update::{
|
|
||||||
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig,
|
|
||||||
};
|
|
||||||
use nelson::Mocker;
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use super::error::Result;
|
|
||||||
use super::index::Index;
|
|
||||||
use super::Document;
|
|
||||||
use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
|
|
||||||
use crate::update_file_store::UpdateFileStore;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub enum MockIndex {
|
|
||||||
Real(Index),
|
|
||||||
Mock(Arc<Mocker>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MockIndex {
|
|
||||||
pub fn mock(mocker: Mocker) -> Self {
|
|
||||||
Self::Mock(Arc::new(mocker))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn open(
|
|
||||||
path: impl AsRef<Path>,
|
|
||||||
size: usize,
|
|
||||||
uuid: Uuid,
|
|
||||||
update_handler: Arc<IndexerConfig>,
|
|
||||||
) -> Result<Self> {
|
|
||||||
let index = Index::open(path, size, uuid, update_handler)?;
|
|
||||||
Ok(Self::Real(index))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn load_dump(
|
|
||||||
src: impl AsRef<Path>,
|
|
||||||
dst: impl AsRef<Path>,
|
|
||||||
size: usize,
|
|
||||||
update_handler: &IndexerConfig,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
Index::load_dump(src, dst, size, update_handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn uuid(&self) -> Uuid {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.uuid(),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("uuid").call(()) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn stats(&self) -> Result<IndexStats> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.stats(),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("stats").call(()) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn meta(&self) -> Result<IndexMeta> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.meta(),
|
|
||||||
MockIndex::Mock(_) => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn settings(&self) -> Result<Settings<Checked>> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.settings(),
|
|
||||||
MockIndex::Mock(_) => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn retrieve_documents<S: AsRef<str>>(
|
|
||||||
&self,
|
|
||||||
offset: usize,
|
|
||||||
limit: usize,
|
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
|
||||||
) -> Result<(u64, Vec<Document>)> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => {
|
|
||||||
index.retrieve_documents(offset, limit, attributes_to_retrieve)
|
|
||||||
}
|
|
||||||
MockIndex::Mock(_) => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn retrieve_document<S: AsRef<str>>(
|
|
||||||
&self,
|
|
||||||
doc_id: String,
|
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
|
||||||
) -> Result<Document> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.retrieve_document(doc_id, attributes_to_retrieve),
|
|
||||||
MockIndex::Mock(_) => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn size(&self) -> u64 {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.size(),
|
|
||||||
MockIndex::Mock(_) => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.snapshot(path),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("snapshot").call(path.as_ref()) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn close(self) {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.close(),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("close").call(()) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.perform_search(query),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("perform_search").call(query) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.dump(path),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("dump").call(path.as_ref()) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_documents(
|
|
||||||
&self,
|
|
||||||
method: IndexDocumentsMethod,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
file_store: UpdateFileStore,
|
|
||||||
contents: impl Iterator<Item = Uuid>,
|
|
||||||
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => {
|
|
||||||
index.update_documents(method, primary_key, file_store, contents)
|
|
||||||
}
|
|
||||||
MockIndex::Mock(mocker) => unsafe {
|
|
||||||
mocker
|
|
||||||
.get("update_documents")
|
|
||||||
.call((method, primary_key, file_store, contents))
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.update_settings(settings),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("update_settings").call(settings) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.update_primary_key(primary_key),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("update_primary_key").call(primary_key) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.delete_documents(ids),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("delete_documents").call(ids) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn clear_documents(&self) -> Result<()> {
|
|
||||||
match self {
|
|
||||||
MockIndex::Real(index) => index.clear_documents(),
|
|
||||||
MockIndex::Mock(m) => unsafe { m.get("clear_documents").call(()) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_faux_index() {
|
|
||||||
let faux = Mocker::default();
|
|
||||||
faux.when("snapshot")
|
|
||||||
.times(2)
|
|
||||||
.then(|_: &Path| -> Result<()> { Ok(()) });
|
|
||||||
|
|
||||||
let index = MockIndex::mock(faux);
|
|
||||||
|
|
||||||
let path = PathBuf::from("hello");
|
|
||||||
index.snapshot(&path).unwrap();
|
|
||||||
index.snapshot(&path).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[should_panic]
|
|
||||||
fn test_faux_unexisting_method_stub() {
|
|
||||||
let faux = Mocker::default();
|
|
||||||
|
|
||||||
let index = MockIndex::mock(faux);
|
|
||||||
|
|
||||||
let path = PathBuf::from("hello");
|
|
||||||
index.snapshot(&path).unwrap();
|
|
||||||
index.snapshot(&path).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[should_panic]
|
|
||||||
fn test_faux_panic() {
|
|
||||||
let faux = Mocker::default();
|
|
||||||
faux.when("snapshot")
|
|
||||||
.times(2)
|
|
||||||
.then(|_: &Path| -> Result<()> {
|
|
||||||
panic!();
|
|
||||||
});
|
|
||||||
|
|
||||||
let index = MockIndex::mock(faux);
|
|
||||||
|
|
||||||
let path = PathBuf::from("hello");
|
|
||||||
index.snapshot(&path).unwrap();
|
|
||||||
index.snapshot(&path).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,747 +0,0 @@
|
|||||||
use std::cmp::min;
|
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
|
||||||
use std::str::FromStr;
|
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
use either::Either;
|
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
|
||||||
use milli::{
|
|
||||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
|
|
||||||
TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
|
||||||
};
|
|
||||||
use regex::Regex;
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use serde_json::{json, Value};
|
|
||||||
|
|
||||||
use crate::index::error::FacetError;
|
|
||||||
|
|
||||||
use super::error::{IndexError, Result};
|
|
||||||
use super::index::Index;
|
|
||||||
|
|
||||||
pub type Document = serde_json::Map<String, Value>;
|
|
||||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
|
||||||
|
|
||||||
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
|
||||||
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
|
||||||
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
|
||||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
|
||||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
|
||||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
|
||||||
|
|
||||||
/// The maximum number of results that the engine
|
|
||||||
/// will be able to return in one search call.
|
|
||||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
||||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
|
||||||
pub struct SearchQuery {
|
|
||||||
pub q: Option<String>,
|
|
||||||
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
|
|
||||||
pub offset: usize,
|
|
||||||
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
|
||||||
pub limit: usize,
|
|
||||||
pub page: Option<usize>,
|
|
||||||
pub hits_per_page: Option<usize>,
|
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
|
||||||
pub attributes_to_crop: Option<Vec<String>>,
|
|
||||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
|
||||||
pub crop_length: usize,
|
|
||||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
|
||||||
// Default to false
|
|
||||||
#[serde(default = "Default::default")]
|
|
||||||
pub show_matches_position: bool,
|
|
||||||
pub filter: Option<Value>,
|
|
||||||
pub sort: Option<Vec<String>>,
|
|
||||||
pub facets: Option<Vec<String>>,
|
|
||||||
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
|
|
||||||
pub highlight_pre_tag: String,
|
|
||||||
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
|
|
||||||
pub highlight_post_tag: String,
|
|
||||||
#[serde(default = "DEFAULT_CROP_MARKER")]
|
|
||||||
pub crop_marker: String,
|
|
||||||
#[serde(default)]
|
|
||||||
pub matching_strategy: MatchingStrategy,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SearchQuery {
|
|
||||||
pub fn is_finite_pagination(&self) -> bool {
|
|
||||||
self.page.or(self.hits_per_page).is_some()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub enum MatchingStrategy {
|
|
||||||
/// Remove query words from last to first
|
|
||||||
Last,
|
|
||||||
/// All query words are mandatory
|
|
||||||
All,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for MatchingStrategy {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::Last
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
|
||||||
fn from(other: MatchingStrategy) -> Self {
|
|
||||||
match other {
|
|
||||||
MatchingStrategy::Last => Self::Last,
|
|
||||||
MatchingStrategy::All => Self::All,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
|
||||||
pub struct SearchHit {
|
|
||||||
#[serde(flatten)]
|
|
||||||
pub document: Document,
|
|
||||||
#[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
|
|
||||||
pub formatted: Document,
|
|
||||||
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
|
||||||
pub matches_position: Option<MatchesPosition>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct SearchResult {
|
|
||||||
pub hits: Vec<SearchHit>,
|
|
||||||
pub query: String,
|
|
||||||
pub processing_time_ms: u128,
|
|
||||||
#[serde(flatten)]
|
|
||||||
pub hits_info: HitsInfo,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
|
||||||
#[serde(untagged)]
|
|
||||||
pub enum HitsInfo {
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
Pagination {
|
|
||||||
hits_per_page: usize,
|
|
||||||
page: usize,
|
|
||||||
total_pages: usize,
|
|
||||||
total_hits: usize,
|
|
||||||
},
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
OffsetLimit {
|
|
||||||
limit: usize,
|
|
||||||
offset: usize,
|
|
||||||
estimated_total_hits: usize,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Index {
|
|
||||||
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
|
||||||
let before_search = Instant::now();
|
|
||||||
let rtxn = self.read_txn()?;
|
|
||||||
|
|
||||||
let mut search = self.search(&rtxn);
|
|
||||||
|
|
||||||
if let Some(ref query) = query.q {
|
|
||||||
search.query(query);
|
|
||||||
}
|
|
||||||
|
|
||||||
let is_finite_pagination = query.is_finite_pagination();
|
|
||||||
search.terms_matching_strategy(query.matching_strategy.into());
|
|
||||||
|
|
||||||
let max_total_hits = self
|
|
||||||
.pagination_max_total_hits(&rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
|
||||||
|
|
||||||
search.exhaustive_number_hits(is_finite_pagination);
|
|
||||||
|
|
||||||
// compute the offset on the limit depending on the pagination mode.
|
|
||||||
let (offset, limit) = if is_finite_pagination {
|
|
||||||
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
|
||||||
let page = query.page.unwrap_or(1);
|
|
||||||
|
|
||||||
// page 0 gives a limit of 0 forcing Meilisearch to return no document.
|
|
||||||
page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit))
|
|
||||||
} else {
|
|
||||||
(query.offset, query.limit)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Make sure that a user can't get more documents than the hard limit,
|
|
||||||
// we align that on the offset too.
|
|
||||||
let offset = min(offset, max_total_hits);
|
|
||||||
let limit = min(limit, max_total_hits.saturating_sub(offset));
|
|
||||||
|
|
||||||
search.offset(offset);
|
|
||||||
search.limit(limit);
|
|
||||||
|
|
||||||
if let Some(ref filter) = query.filter {
|
|
||||||
if let Some(facets) = parse_filter(filter)? {
|
|
||||||
search.filter(facets);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref sort) = query.sort {
|
|
||||||
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
|
|
||||||
Ok(sorts) => sorts,
|
|
||||||
Err(asc_desc_error) => {
|
|
||||||
return Err(IndexError::Milli(SortError::from(asc_desc_error).into()))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
search.sort_criteria(sort);
|
|
||||||
}
|
|
||||||
|
|
||||||
let milli::SearchResult {
|
|
||||||
documents_ids,
|
|
||||||
matching_words,
|
|
||||||
candidates,
|
|
||||||
..
|
|
||||||
} = search.execute()?;
|
|
||||||
|
|
||||||
let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
|
|
||||||
|
|
||||||
let displayed_ids = self
|
|
||||||
.displayed_fields_ids(&rtxn)?
|
|
||||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
|
||||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
|
||||||
|
|
||||||
let fids = |attrs: &BTreeSet<String>| {
|
|
||||||
let mut ids = BTreeSet::new();
|
|
||||||
for attr in attrs {
|
|
||||||
if attr == "*" {
|
|
||||||
ids = displayed_ids.clone();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr) {
|
|
||||||
ids.insert(id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ids
|
|
||||||
};
|
|
||||||
|
|
||||||
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
|
|
||||||
// but these attributes must be also be present
|
|
||||||
// - in the fields_ids_map
|
|
||||||
// - in the the displayed attributes
|
|
||||||
let to_retrieve_ids: BTreeSet<_> = query
|
|
||||||
.attributes_to_retrieve
|
|
||||||
.as_ref()
|
|
||||||
.map(fids)
|
|
||||||
.unwrap_or_else(|| displayed_ids.clone())
|
|
||||||
.intersection(&displayed_ids)
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
|
|
||||||
|
|
||||||
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
|
|
||||||
|
|
||||||
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
|
|
||||||
// These attributes are:
|
|
||||||
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
|
|
||||||
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
|
|
||||||
// But these attributes must be also present in displayed attributes
|
|
||||||
let formatted_options = compute_formatted_options(
|
|
||||||
&attr_to_highlight,
|
|
||||||
&attr_to_crop,
|
|
||||||
query.crop_length,
|
|
||||||
&to_retrieve_ids,
|
|
||||||
&fields_ids_map,
|
|
||||||
&displayed_ids,
|
|
||||||
);
|
|
||||||
|
|
||||||
let tokenizer = TokenizerBuilder::default().build();
|
|
||||||
|
|
||||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
|
||||||
formatter_builder.crop_marker(query.crop_marker);
|
|
||||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
|
||||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
|
||||||
|
|
||||||
let documents_iter = self.documents(&rtxn, documents_ids)?;
|
|
||||||
|
|
||||||
for (_id, obkv) in documents_iter {
|
|
||||||
// First generate a document with all the displayed fields
|
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
|
||||||
|
|
||||||
// select the attributes to retrieve
|
|
||||||
let attributes_to_retrieve = to_retrieve_ids
|
|
||||||
.iter()
|
|
||||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
|
||||||
let mut document =
|
|
||||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
|
||||||
|
|
||||||
let (matches_position, formatted) = format_fields(
|
|
||||||
&displayed_document,
|
|
||||||
&fields_ids_map,
|
|
||||||
&formatter_builder,
|
|
||||||
&formatted_options,
|
|
||||||
query.show_matches_position,
|
|
||||||
&displayed_ids,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
if let Some(sort) = query.sort.as_ref() {
|
|
||||||
insert_geo_distance(sort, &mut document);
|
|
||||||
}
|
|
||||||
|
|
||||||
let hit = SearchHit {
|
|
||||||
document,
|
|
||||||
formatted,
|
|
||||||
matches_position,
|
|
||||||
};
|
|
||||||
documents.push(hit);
|
|
||||||
}
|
|
||||||
|
|
||||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
|
||||||
let hits_info = if is_finite_pagination {
|
|
||||||
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
|
||||||
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
|
|
||||||
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
|
|
||||||
.checked_div(hits_per_page)
|
|
||||||
.unwrap_or(0);
|
|
||||||
|
|
||||||
HitsInfo::Pagination {
|
|
||||||
hits_per_page,
|
|
||||||
page: query.page.unwrap_or(1),
|
|
||||||
total_pages,
|
|
||||||
total_hits: number_of_hits,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
HitsInfo::OffsetLimit {
|
|
||||||
limit: query.limit,
|
|
||||||
offset,
|
|
||||||
estimated_total_hits: number_of_hits,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let facet_distribution = match query.facets {
|
|
||||||
Some(ref fields) => {
|
|
||||||
let mut facet_distribution = self.facets_distribution(&rtxn);
|
|
||||||
|
|
||||||
let max_values_by_facet = self
|
|
||||||
.max_values_per_facet(&rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
|
||||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
|
||||||
|
|
||||||
if fields.iter().all(|f| f != "*") {
|
|
||||||
facet_distribution.facets(fields);
|
|
||||||
}
|
|
||||||
let distribution = facet_distribution.candidates(candidates).execute()?;
|
|
||||||
|
|
||||||
Some(distribution)
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = SearchResult {
|
|
||||||
hits: documents,
|
|
||||||
hits_info,
|
|
||||||
query: query.q.clone().unwrap_or_default(),
|
|
||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
|
||||||
facet_distribution,
|
|
||||||
};
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
|
||||||
lazy_static::lazy_static! {
|
|
||||||
static ref GEO_REGEX: Regex =
|
|
||||||
Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap();
|
|
||||||
};
|
|
||||||
if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) {
|
|
||||||
// TODO: TAMO: milli encountered an internal error, what do we want to do?
|
|
||||||
let base = [
|
|
||||||
capture_group[1].parse().unwrap(),
|
|
||||||
capture_group[2].parse().unwrap(),
|
|
||||||
];
|
|
||||||
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
|
|
||||||
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
|
|
||||||
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
|
|
||||||
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn compute_formatted_options(
|
|
||||||
attr_to_highlight: &HashSet<String>,
|
|
||||||
attr_to_crop: &[String],
|
|
||||||
query_crop_length: usize,
|
|
||||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
|
||||||
fields_ids_map: &FieldsIdsMap,
|
|
||||||
displayed_ids: &BTreeSet<FieldId>,
|
|
||||||
) -> BTreeMap<FieldId, FormatOptions> {
|
|
||||||
let mut formatted_options = BTreeMap::new();
|
|
||||||
|
|
||||||
add_highlight_to_formatted_options(
|
|
||||||
&mut formatted_options,
|
|
||||||
attr_to_highlight,
|
|
||||||
fields_ids_map,
|
|
||||||
displayed_ids,
|
|
||||||
);
|
|
||||||
|
|
||||||
add_crop_to_formatted_options(
|
|
||||||
&mut formatted_options,
|
|
||||||
attr_to_crop,
|
|
||||||
query_crop_length,
|
|
||||||
fields_ids_map,
|
|
||||||
displayed_ids,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Should not return `_formatted` if no valid attributes to highlight/crop
|
|
||||||
if !formatted_options.is_empty() {
|
|
||||||
add_non_formatted_ids_to_formatted_options(&mut formatted_options, to_retrieve_ids);
|
|
||||||
}
|
|
||||||
|
|
||||||
formatted_options
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_highlight_to_formatted_options(
|
|
||||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
|
||||||
attr_to_highlight: &HashSet<String>,
|
|
||||||
fields_ids_map: &FieldsIdsMap,
|
|
||||||
displayed_ids: &BTreeSet<FieldId>,
|
|
||||||
) {
|
|
||||||
for attr in attr_to_highlight {
|
|
||||||
let new_format = FormatOptions {
|
|
||||||
highlight: true,
|
|
||||||
crop: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
if attr == "*" {
|
|
||||||
for id in displayed_ids {
|
|
||||||
formatted_options.insert(*id, new_format);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr) {
|
|
||||||
if displayed_ids.contains(&id) {
|
|
||||||
formatted_options.insert(id, new_format);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_crop_to_formatted_options(
|
|
||||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
|
||||||
attr_to_crop: &[String],
|
|
||||||
crop_length: usize,
|
|
||||||
fields_ids_map: &FieldsIdsMap,
|
|
||||||
displayed_ids: &BTreeSet<FieldId>,
|
|
||||||
) {
|
|
||||||
for attr in attr_to_crop {
|
|
||||||
let mut split = attr.rsplitn(2, ':');
|
|
||||||
let (attr_name, attr_len) = match split.next().zip(split.next()) {
|
|
||||||
Some((len, name)) => {
|
|
||||||
let crop_len = len.parse::<usize>().unwrap_or(crop_length);
|
|
||||||
(name, crop_len)
|
|
||||||
}
|
|
||||||
None => (attr.as_str(), crop_length),
|
|
||||||
};
|
|
||||||
|
|
||||||
if attr_name == "*" {
|
|
||||||
for id in displayed_ids {
|
|
||||||
formatted_options
|
|
||||||
.entry(*id)
|
|
||||||
.and_modify(|f| f.crop = Some(attr_len))
|
|
||||||
.or_insert(FormatOptions {
|
|
||||||
highlight: false,
|
|
||||||
crop: Some(attr_len),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr_name) {
|
|
||||||
if displayed_ids.contains(&id) {
|
|
||||||
formatted_options
|
|
||||||
.entry(id)
|
|
||||||
.and_modify(|f| f.crop = Some(attr_len))
|
|
||||||
.or_insert(FormatOptions {
|
|
||||||
highlight: false,
|
|
||||||
crop: Some(attr_len),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_non_formatted_ids_to_formatted_options(
|
|
||||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
|
||||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
|
||||||
) {
|
|
||||||
for id in to_retrieve_ids {
|
|
||||||
formatted_options.entry(*id).or_insert(FormatOptions {
|
|
||||||
highlight: false,
|
|
||||||
crop: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn make_document(
|
|
||||||
displayed_attributes: &BTreeSet<FieldId>,
|
|
||||||
field_ids_map: &FieldsIdsMap,
|
|
||||||
obkv: obkv::KvReaderU16,
|
|
||||||
) -> Result<Document> {
|
|
||||||
let mut document = serde_json::Map::new();
|
|
||||||
|
|
||||||
// recreate the original json
|
|
||||||
for (key, value) in obkv.iter() {
|
|
||||||
let value = serde_json::from_slice(value)?;
|
|
||||||
let key = field_ids_map
|
|
||||||
.name(key)
|
|
||||||
.expect("Missing field name")
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
document.insert(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// select the attributes to retrieve
|
|
||||||
let displayed_attributes = displayed_attributes
|
|
||||||
.iter()
|
|
||||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
|
||||||
|
|
||||||
let document = permissive_json_pointer::select_values(&document, displayed_attributes);
|
|
||||||
Ok(document)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn format_fields<'a, A: AsRef<[u8]>>(
|
|
||||||
document: &Document,
|
|
||||||
field_ids_map: &FieldsIdsMap,
|
|
||||||
builder: &MatcherBuilder<'a, A>,
|
|
||||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
|
||||||
compute_matches: bool,
|
|
||||||
displayable_ids: &BTreeSet<FieldId>,
|
|
||||||
) -> Result<(Option<MatchesPosition>, Document)> {
|
|
||||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
|
||||||
let mut document = document.clone();
|
|
||||||
|
|
||||||
// select the attributes to retrieve
|
|
||||||
let displayable_names = displayable_ids
|
|
||||||
.iter()
|
|
||||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
|
||||||
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
|
|
||||||
// To get the formatting option of each key we need to see all the rules that applies
|
|
||||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
|
||||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
|
||||||
// highlighted.
|
|
||||||
let format = formatted_options
|
|
||||||
.iter()
|
|
||||||
.filter(|(field, _option)| {
|
|
||||||
let name = field_ids_map.name(**field).unwrap();
|
|
||||||
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
|
||||||
})
|
|
||||||
.map(|(_, option)| *option)
|
|
||||||
.reduce(|acc, option| acc.merge(option));
|
|
||||||
let mut infos = Vec::new();
|
|
||||||
|
|
||||||
*value = format_value(
|
|
||||||
std::mem::take(value),
|
|
||||||
builder,
|
|
||||||
format,
|
|
||||||
&mut infos,
|
|
||||||
compute_matches,
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Some(matches) = matches_position.as_mut() {
|
|
||||||
if !infos.is_empty() {
|
|
||||||
matches.insert(key.to_owned(), infos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let selectors = formatted_options
|
|
||||||
.keys()
|
|
||||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
|
||||||
// before.
|
|
||||||
.map(|&fid| field_ids_map.name(fid).unwrap());
|
|
||||||
let document = permissive_json_pointer::select_values(&document, selectors);
|
|
||||||
|
|
||||||
Ok((matches_position, document))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn format_value<'a, A: AsRef<[u8]>>(
|
|
||||||
value: Value,
|
|
||||||
builder: &MatcherBuilder<'a, A>,
|
|
||||||
format_options: Option<FormatOptions>,
|
|
||||||
infos: &mut Vec<MatchBounds>,
|
|
||||||
compute_matches: bool,
|
|
||||||
) -> Value {
|
|
||||||
match value {
|
|
||||||
Value::String(old_string) => {
|
|
||||||
let mut matcher = builder.build(&old_string);
|
|
||||||
if compute_matches {
|
|
||||||
let matches = matcher.matches();
|
|
||||||
infos.extend_from_slice(&matches[..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
match format_options {
|
|
||||||
Some(format_options) => {
|
|
||||||
let value = matcher.format(format_options);
|
|
||||||
Value::String(value.into_owned())
|
|
||||||
}
|
|
||||||
None => Value::String(old_string),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Value::Array(values) => Value::Array(
|
|
||||||
values
|
|
||||||
.into_iter()
|
|
||||||
.map(|v| {
|
|
||||||
format_value(
|
|
||||||
v,
|
|
||||||
builder,
|
|
||||||
format_options.map(|format_options| FormatOptions {
|
|
||||||
highlight: format_options.highlight,
|
|
||||||
crop: None,
|
|
||||||
}),
|
|
||||||
infos,
|
|
||||||
compute_matches,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
),
|
|
||||||
Value::Object(object) => Value::Object(
|
|
||||||
object
|
|
||||||
.into_iter()
|
|
||||||
.map(|(k, v)| {
|
|
||||||
(
|
|
||||||
k,
|
|
||||||
format_value(
|
|
||||||
v,
|
|
||||||
builder,
|
|
||||||
format_options.map(|format_options| FormatOptions {
|
|
||||||
highlight: format_options.highlight,
|
|
||||||
crop: None,
|
|
||||||
}),
|
|
||||||
infos,
|
|
||||||
compute_matches,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
),
|
|
||||||
Value::Number(number) => {
|
|
||||||
let s = number.to_string();
|
|
||||||
|
|
||||||
let mut matcher = builder.build(&s);
|
|
||||||
if compute_matches {
|
|
||||||
let matches = matcher.matches();
|
|
||||||
infos.extend_from_slice(&matches[..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
match format_options {
|
|
||||||
Some(format_options) => {
|
|
||||||
let value = matcher.format(format_options);
|
|
||||||
Value::String(value.into_owned())
|
|
||||||
}
|
|
||||||
None => Value::Number(number),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
value => value,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_filter(facets: &Value) -> Result<Option<Filter>> {
|
|
||||||
match facets {
|
|
||||||
Value::String(expr) => {
|
|
||||||
let condition = Filter::from_str(expr)?;
|
|
||||||
Ok(condition)
|
|
||||||
}
|
|
||||||
Value::Array(arr) => parse_filter_array(arr),
|
|
||||||
v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>> {
|
|
||||||
let mut ands = Vec::new();
|
|
||||||
for value in arr {
|
|
||||||
match value {
|
|
||||||
Value::String(s) => ands.push(Either::Right(s.as_str())),
|
|
||||||
Value::Array(arr) => {
|
|
||||||
let mut ors = Vec::new();
|
|
||||||
for value in arr {
|
|
||||||
match value {
|
|
||||||
Value::String(s) => ors.push(s.as_str()),
|
|
||||||
v => {
|
|
||||||
return Err(FacetError::InvalidExpression(&["String"], v.clone()).into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ands.push(Either::Left(ors));
|
|
||||||
}
|
|
||||||
v => {
|
|
||||||
return Err(
|
|
||||||
FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Filter::from_array(ands)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_insert_geo_distance() {
|
|
||||||
let value: Document = serde_json::from_str(
|
|
||||||
r#"{
|
|
||||||
"_geo": {
|
|
||||||
"lat": 50.629973371633746,
|
|
||||||
"lng": 3.0569447399419567
|
|
||||||
},
|
|
||||||
"city": "Lille",
|
|
||||||
"id": "1"
|
|
||||||
}"#,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()];
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()];
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
let sorters =
|
|
||||||
&["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()];
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
let sorters = &[
|
|
||||||
"prix:asc",
|
|
||||||
"villeneuve:desc",
|
|
||||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
|
||||||
"ubu:asc",
|
|
||||||
]
|
|
||||||
.map(|s| s.to_string());
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
// only the first geoPoint is used to compute the distance
|
|
||||||
let sorters = &[
|
|
||||||
"chien:desc",
|
|
||||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
|
||||||
"pangolin:desc",
|
|
||||||
"_geoPoint(100.0, -80.0):asc",
|
|
||||||
"chat:asc",
|
|
||||||
]
|
|
||||||
.map(|s| s.to_string());
|
|
||||||
let mut document = value.clone();
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
|
||||||
|
|
||||||
// there was no _geoPoint so nothing is inserted in the document
|
|
||||||
let sorters = &["chien:asc".to_string()];
|
|
||||||
let mut document = value;
|
|
||||||
insert_geo_distance(sorters, &mut document);
|
|
||||||
assert_eq!(document.get("_geoDistance"), None);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,559 +0,0 @@
|
|||||||
use std::collections::{BTreeMap, BTreeSet};
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
|
|
||||||
use log::{debug, info, trace};
|
|
||||||
use milli::documents::DocumentsBatchReader;
|
|
||||||
use milli::update::{
|
|
||||||
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
|
|
||||||
Setting,
|
|
||||||
};
|
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use super::error::{IndexError, Result};
|
|
||||||
use super::index::{Index, IndexMeta};
|
|
||||||
use crate::update_file_store::UpdateFileStore;
|
|
||||||
|
|
||||||
fn serialize_with_wildcard<S>(
|
|
||||||
field: &Setting<Vec<String>>,
|
|
||||||
s: S,
|
|
||||||
) -> std::result::Result<S::Ok, S::Error>
|
|
||||||
where
|
|
||||||
S: Serializer,
|
|
||||||
{
|
|
||||||
let wildcard = vec!["*".to_string()];
|
|
||||||
match field {
|
|
||||||
Setting::Set(value) => Some(value),
|
|
||||||
Setting::Reset => Some(&wildcard),
|
|
||||||
Setting::NotSet => None,
|
|
||||||
}
|
|
||||||
.serialize(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
|
|
||||||
pub struct Checked;
|
|
||||||
|
|
||||||
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
pub struct Unchecked;
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct MinWordSizeTyposSetting {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub one_typo: Setting<u8>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub two_typos: Setting<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct TypoSettings {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub enabled: Setting<bool>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub disable_on_words: Setting<BTreeSet<String>>,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub disable_on_attributes: Setting<BTreeSet<String>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct FacetingSettings {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub max_values_per_facet: Setting<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct PaginationSettings {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
pub max_total_hits: Setting<usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
|
||||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
|
||||||
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
pub struct Settings<T> {
|
|
||||||
#[serde(
|
|
||||||
default,
|
|
||||||
serialize_with = "serialize_with_wildcard",
|
|
||||||
skip_serializing_if = "Setting::is_not_set"
|
|
||||||
)]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub displayed_attributes: Setting<Vec<String>>,
|
|
||||||
|
|
||||||
#[serde(
|
|
||||||
default,
|
|
||||||
serialize_with = "serialize_with_wildcard",
|
|
||||||
skip_serializing_if = "Setting::is_not_set"
|
|
||||||
)]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub searchable_attributes: Setting<Vec<String>>,
|
|
||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub ranking_rules: Setting<Vec<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub stop_words: Setting<BTreeSet<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub distinct_attribute: Setting<String>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub typo_tolerance: Setting<TypoSettings>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub faceting: Setting<FacetingSettings>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
|
||||||
pub pagination: Setting<PaginationSettings>,
|
|
||||||
|
|
||||||
#[serde(skip)]
|
|
||||||
pub _kind: PhantomData<T>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Settings<Checked> {
|
|
||||||
pub fn cleared() -> Settings<Checked> {
|
|
||||||
Settings {
|
|
||||||
displayed_attributes: Setting::Reset,
|
|
||||||
searchable_attributes: Setting::Reset,
|
|
||||||
filterable_attributes: Setting::Reset,
|
|
||||||
sortable_attributes: Setting::Reset,
|
|
||||||
ranking_rules: Setting::Reset,
|
|
||||||
stop_words: Setting::Reset,
|
|
||||||
synonyms: Setting::Reset,
|
|
||||||
distinct_attribute: Setting::Reset,
|
|
||||||
typo_tolerance: Setting::Reset,
|
|
||||||
faceting: Setting::Reset,
|
|
||||||
pagination: Setting::Reset,
|
|
||||||
_kind: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
|
||||||
let Self {
|
|
||||||
displayed_attributes,
|
|
||||||
searchable_attributes,
|
|
||||||
filterable_attributes,
|
|
||||||
sortable_attributes,
|
|
||||||
ranking_rules,
|
|
||||||
stop_words,
|
|
||||||
synonyms,
|
|
||||||
distinct_attribute,
|
|
||||||
typo_tolerance,
|
|
||||||
faceting,
|
|
||||||
pagination,
|
|
||||||
..
|
|
||||||
} = self;
|
|
||||||
|
|
||||||
Settings {
|
|
||||||
displayed_attributes,
|
|
||||||
searchable_attributes,
|
|
||||||
filterable_attributes,
|
|
||||||
sortable_attributes,
|
|
||||||
ranking_rules,
|
|
||||||
stop_words,
|
|
||||||
synonyms,
|
|
||||||
distinct_attribute,
|
|
||||||
typo_tolerance,
|
|
||||||
faceting,
|
|
||||||
pagination,
|
|
||||||
_kind: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Settings<Unchecked> {
|
|
||||||
pub fn check(self) -> Settings<Checked> {
|
|
||||||
let displayed_attributes = match self.displayed_attributes {
|
|
||||||
Setting::Set(fields) => {
|
|
||||||
if fields.iter().any(|f| f == "*") {
|
|
||||||
Setting::Reset
|
|
||||||
} else {
|
|
||||||
Setting::Set(fields)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
otherwise => otherwise,
|
|
||||||
};
|
|
||||||
|
|
||||||
let searchable_attributes = match self.searchable_attributes {
|
|
||||||
Setting::Set(fields) => {
|
|
||||||
if fields.iter().any(|f| f == "*") {
|
|
||||||
Setting::Reset
|
|
||||||
} else {
|
|
||||||
Setting::Set(fields)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
otherwise => otherwise,
|
|
||||||
};
|
|
||||||
|
|
||||||
Settings {
|
|
||||||
displayed_attributes,
|
|
||||||
searchable_attributes,
|
|
||||||
filterable_attributes: self.filterable_attributes,
|
|
||||||
sortable_attributes: self.sortable_attributes,
|
|
||||||
ranking_rules: self.ranking_rules,
|
|
||||||
stop_words: self.stop_words,
|
|
||||||
synonyms: self.synonyms,
|
|
||||||
distinct_attribute: self.distinct_attribute,
|
|
||||||
typo_tolerance: self.typo_tolerance,
|
|
||||||
faceting: self.faceting,
|
|
||||||
pagination: self.pagination,
|
|
||||||
_kind: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct Facets {
|
|
||||||
pub level_group_size: Option<NonZeroUsize>,
|
|
||||||
pub min_level_size: Option<NonZeroUsize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Index {
|
|
||||||
fn update_primary_key_txn<'a, 'b>(
|
|
||||||
&'a self,
|
|
||||||
txn: &mut milli::heed::RwTxn<'a, 'b>,
|
|
||||||
primary_key: String,
|
|
||||||
) -> Result<IndexMeta> {
|
|
||||||
let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref());
|
|
||||||
builder.set_primary_key(primary_key);
|
|
||||||
builder.execute(|_| ())?;
|
|
||||||
let meta = IndexMeta::new_txn(self, txn)?;
|
|
||||||
|
|
||||||
Ok(meta)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
|
|
||||||
let mut txn = self.write_txn()?;
|
|
||||||
let res = self.update_primary_key_txn(&mut txn, primary_key)?;
|
|
||||||
txn.commit()?;
|
|
||||||
|
|
||||||
Ok(res)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Deletes `ids` from the index, and returns how many documents were deleted.
|
|
||||||
pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
|
|
||||||
let mut txn = self.write_txn()?;
|
|
||||||
let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?;
|
|
||||||
|
|
||||||
// We ignore unexisting document ids
|
|
||||||
ids.iter().for_each(|id| {
|
|
||||||
builder.delete_external_id(id);
|
|
||||||
});
|
|
||||||
|
|
||||||
let deleted = builder.execute()?;
|
|
||||||
|
|
||||||
txn.commit()?;
|
|
||||||
|
|
||||||
Ok(deleted)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn clear_documents(&self) -> Result<()> {
|
|
||||||
let mut txn = self.write_txn()?;
|
|
||||||
milli::update::ClearDocuments::new(&mut txn, self).execute()?;
|
|
||||||
txn.commit()?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_documents(
|
|
||||||
&self,
|
|
||||||
method: IndexDocumentsMethod,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
file_store: UpdateFileStore,
|
|
||||||
contents: impl IntoIterator<Item = Uuid>,
|
|
||||||
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
|
||||||
trace!("performing document addition");
|
|
||||||
let mut txn = self.write_txn()?;
|
|
||||||
|
|
||||||
if let Some(primary_key) = primary_key {
|
|
||||||
if self.primary_key(&txn)?.is_none() {
|
|
||||||
self.update_primary_key_txn(&mut txn, primary_key)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let config = IndexDocumentsConfig {
|
|
||||||
update_method: method,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
|
|
||||||
let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step);
|
|
||||||
let mut builder = milli::update::IndexDocuments::new(
|
|
||||||
&mut txn,
|
|
||||||
self,
|
|
||||||
self.indexer_config.as_ref(),
|
|
||||||
config,
|
|
||||||
indexing_callback,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut results = Vec::new();
|
|
||||||
for content_uuid in contents.into_iter() {
|
|
||||||
let content_file = file_store.get_update(content_uuid)?;
|
|
||||||
let reader = DocumentsBatchReader::from_reader(content_file)?;
|
|
||||||
let (new_builder, user_result) = builder.add_documents(reader)?;
|
|
||||||
builder = new_builder;
|
|
||||||
|
|
||||||
let user_result = match user_result {
|
|
||||||
Ok(count) => Ok(DocumentAdditionResult {
|
|
||||||
indexed_documents: count,
|
|
||||||
number_of_documents: count,
|
|
||||||
}),
|
|
||||||
Err(e) => Err(IndexError::from(e)),
|
|
||||||
};
|
|
||||||
|
|
||||||
results.push(user_result);
|
|
||||||
}
|
|
||||||
|
|
||||||
if results.iter().any(Result::is_ok) {
|
|
||||||
let addition = builder.execute()?;
|
|
||||||
txn.commit()?;
|
|
||||||
info!("document addition done: {:?}", addition);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
|
|
||||||
// We must use the write transaction of the update here.
|
|
||||||
let mut txn = self.write_txn()?;
|
|
||||||
let mut builder =
|
|
||||||
milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref());
|
|
||||||
|
|
||||||
apply_settings_to_builder(settings, &mut builder);
|
|
||||||
|
|
||||||
builder.execute(|indexing_step| debug!("update: {:?}", indexing_step))?;
|
|
||||||
|
|
||||||
txn.commit()?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn apply_settings_to_builder(
|
|
||||||
settings: &Settings<Checked>,
|
|
||||||
builder: &mut milli::update::Settings,
|
|
||||||
) {
|
|
||||||
match settings.searchable_attributes {
|
|
||||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
|
||||||
Setting::Reset => builder.reset_searchable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.displayed_attributes {
|
|
||||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
|
||||||
Setting::Reset => builder.reset_displayed_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.filterable_attributes {
|
|
||||||
Setting::Set(ref facets) => {
|
|
||||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_filterable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.sortable_attributes {
|
|
||||||
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
|
|
||||||
Setting::Reset => builder.reset_sortable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.ranking_rules {
|
|
||||||
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
|
||||||
Setting::Reset => builder.reset_criteria(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.stop_words {
|
|
||||||
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
|
||||||
Setting::Reset => builder.reset_stop_words(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.synonyms {
|
|
||||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
|
||||||
Setting::Reset => builder.reset_synonyms(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.distinct_attribute {
|
|
||||||
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
|
||||||
Setting::Reset => builder.reset_distinct_field(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.typo_tolerance {
|
|
||||||
Setting::Set(ref value) => {
|
|
||||||
match value.enabled {
|
|
||||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
|
||||||
Setting::Reset => builder.reset_authorize_typos(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match value.min_word_size_for_typos {
|
|
||||||
Setting::Set(ref setting) => {
|
|
||||||
match setting.one_typo {
|
|
||||||
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
|
|
||||||
Setting::Reset => builder.reset_min_word_len_one_typo(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
match setting.two_typos {
|
|
||||||
Setting::Set(val) => builder.set_min_word_len_two_typos(val),
|
|
||||||
Setting::Reset => builder.reset_min_word_len_two_typos(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Setting::Reset => {
|
|
||||||
builder.reset_min_word_len_one_typo();
|
|
||||||
builder.reset_min_word_len_two_typos();
|
|
||||||
}
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match value.disable_on_words {
|
|
||||||
Setting::Set(ref words) => {
|
|
||||||
builder.set_exact_words(words.clone());
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_exact_words(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match value.disable_on_attributes {
|
|
||||||
Setting::Set(ref words) => {
|
|
||||||
builder.set_exact_attributes(words.iter().cloned().collect())
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_exact_attributes(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Setting::Reset => {
|
|
||||||
// all typo settings need to be reset here.
|
|
||||||
builder.reset_authorize_typos();
|
|
||||||
builder.reset_min_word_len_one_typo();
|
|
||||||
builder.reset_min_word_len_two_typos();
|
|
||||||
builder.reset_exact_words();
|
|
||||||
builder.reset_exact_attributes();
|
|
||||||
}
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.faceting {
|
|
||||||
Setting::Set(ref value) => match value.max_values_per_facet {
|
|
||||||
Setting::Set(val) => builder.set_max_values_per_facet(val),
|
|
||||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
},
|
|
||||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.pagination {
|
|
||||||
Setting::Set(ref value) => match value.max_total_hits {
|
|
||||||
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
|
|
||||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
},
|
|
||||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub(crate) mod test {
|
|
||||||
use proptest::prelude::*;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
pub(super) fn setting_strategy<T: Arbitrary + Clone>() -> impl Strategy<Value = Setting<T>> {
|
|
||||||
prop_oneof![
|
|
||||||
Just(Setting::NotSet),
|
|
||||||
Just(Setting::Reset),
|
|
||||||
any::<T>().prop_map(Setting::Set)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_setting_check() {
|
|
||||||
// test no changes
|
|
||||||
let settings = Settings {
|
|
||||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
|
||||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
|
||||||
filterable_attributes: Setting::NotSet,
|
|
||||||
sortable_attributes: Setting::NotSet,
|
|
||||||
ranking_rules: Setting::NotSet,
|
|
||||||
stop_words: Setting::NotSet,
|
|
||||||
synonyms: Setting::NotSet,
|
|
||||||
distinct_attribute: Setting::NotSet,
|
|
||||||
typo_tolerance: Setting::NotSet,
|
|
||||||
faceting: Setting::NotSet,
|
|
||||||
pagination: Setting::NotSet,
|
|
||||||
_kind: PhantomData::<Unchecked>,
|
|
||||||
};
|
|
||||||
|
|
||||||
let checked = settings.clone().check();
|
|
||||||
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
|
||||||
assert_eq!(
|
|
||||||
settings.searchable_attributes,
|
|
||||||
checked.searchable_attributes
|
|
||||||
);
|
|
||||||
|
|
||||||
// test wildcard
|
|
||||||
// test no changes
|
|
||||||
let settings = Settings {
|
|
||||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
|
||||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
|
||||||
filterable_attributes: Setting::NotSet,
|
|
||||||
sortable_attributes: Setting::NotSet,
|
|
||||||
ranking_rules: Setting::NotSet,
|
|
||||||
stop_words: Setting::NotSet,
|
|
||||||
synonyms: Setting::NotSet,
|
|
||||||
distinct_attribute: Setting::NotSet,
|
|
||||||
typo_tolerance: Setting::NotSet,
|
|
||||||
faceting: Setting::NotSet,
|
|
||||||
pagination: Setting::NotSet,
|
|
||||||
_kind: PhantomData::<Unchecked>,
|
|
||||||
};
|
|
||||||
|
|
||||||
let checked = settings.check();
|
|
||||||
assert_eq!(checked.displayed_attributes, Setting::Reset);
|
|
||||||
assert_eq!(checked.searchable_attributes, Setting::Reset);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,574 +0,0 @@
|
|||||||
use std::collections::BTreeMap;
|
|
||||||
use std::fmt;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use actix_web::error::PayloadError;
|
|
||||||
use bytes::Bytes;
|
|
||||||
use futures::Stream;
|
|
||||||
use index_scheduler::task::{Status, Task};
|
|
||||||
use index_scheduler::{IndexScheduler, KindWithContent, TaskId, TaskView};
|
|
||||||
use meilisearch_auth::SearchRules;
|
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use time::OffsetDateTime;
|
|
||||||
use tokio::task::spawn_blocking;
|
|
||||||
use tokio::time::sleep;
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
// use crate::dump::{self, load_dump, DumpHandler};
|
|
||||||
use crate::options::{IndexerOpts, SchedulerConfig};
|
|
||||||
// use crate::snapshot::{load_snapshot, SnapshotService};
|
|
||||||
use error::Result;
|
|
||||||
use index::{
|
|
||||||
Checked, Document, Index, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub mod error;
|
|
||||||
pub mod versioning;
|
|
||||||
|
|
||||||
pub type Payload = Box<
|
|
||||||
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
|
|
||||||
>;
|
|
||||||
|
|
||||||
pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result<milli::heed::Env> {
|
|
||||||
let mut options = milli::heed::EnvOpenOptions::new();
|
|
||||||
options.map_size(size);
|
|
||||||
options.max_dbs(20);
|
|
||||||
options.open(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct IndexMetadata {
|
|
||||||
#[serde(skip)]
|
|
||||||
pub uuid: Uuid,
|
|
||||||
pub uid: String,
|
|
||||||
#[serde(flatten)]
|
|
||||||
pub meta: IndexMeta,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct IndexSettings {
|
|
||||||
pub uid: Option<String>,
|
|
||||||
pub primary_key: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Meilisearch {
|
|
||||||
index_scheduler: IndexScheduler,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::ops::Deref for Meilisearch {
|
|
||||||
type Target = IndexScheduler;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
&self.index_scheduler
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum DocumentAdditionFormat {
|
|
||||||
Json,
|
|
||||||
Csv,
|
|
||||||
Ndjson,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for DocumentAdditionFormat {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
DocumentAdditionFormat::Json => write!(f, "json"),
|
|
||||||
DocumentAdditionFormat::Ndjson => write!(f, "ndjson"),
|
|
||||||
DocumentAdditionFormat::Csv => write!(f, "csv"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct Stats {
|
|
||||||
pub database_size: u64,
|
|
||||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
|
||||||
pub last_update: Option<OffsetDateTime>,
|
|
||||||
pub indexes: BTreeMap<String, IndexStats>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(clippy::large_enum_variant)]
|
|
||||||
#[derive(derivative::Derivative)]
|
|
||||||
#[derivative(Debug)]
|
|
||||||
pub enum Update {
|
|
||||||
DeleteDocuments(Vec<String>),
|
|
||||||
ClearDocuments,
|
|
||||||
Settings {
|
|
||||||
settings: Settings<Unchecked>,
|
|
||||||
/// Indicates whether the update was a deletion
|
|
||||||
is_deletion: bool,
|
|
||||||
allow_index_creation: bool,
|
|
||||||
},
|
|
||||||
DocumentAddition {
|
|
||||||
#[derivative(Debug = "ignore")]
|
|
||||||
payload: Payload,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
method: IndexDocumentsMethod,
|
|
||||||
format: DocumentAdditionFormat,
|
|
||||||
allow_index_creation: bool,
|
|
||||||
},
|
|
||||||
DeleteIndex,
|
|
||||||
CreateIndex {
|
|
||||||
primary_key: Option<String>,
|
|
||||||
},
|
|
||||||
UpdateIndex {
|
|
||||||
primary_key: Option<String>,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Debug)]
|
|
||||||
pub struct IndexControllerBuilder {
|
|
||||||
max_index_size: Option<usize>,
|
|
||||||
max_task_store_size: Option<usize>,
|
|
||||||
snapshot_dir: Option<PathBuf>,
|
|
||||||
import_snapshot: Option<PathBuf>,
|
|
||||||
snapshot_interval: Option<Duration>,
|
|
||||||
ignore_snapshot_if_db_exists: bool,
|
|
||||||
ignore_missing_snapshot: bool,
|
|
||||||
schedule_snapshot: bool,
|
|
||||||
dump_src: Option<PathBuf>,
|
|
||||||
dump_dst: Option<PathBuf>,
|
|
||||||
ignore_dump_if_db_exists: bool,
|
|
||||||
ignore_missing_dump: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IndexControllerBuilder {
|
|
||||||
pub fn build(
|
|
||||||
self,
|
|
||||||
db_path: impl AsRef<Path>,
|
|
||||||
indexer_options: IndexerOpts,
|
|
||||||
scheduler_config: SchedulerConfig,
|
|
||||||
) -> anyhow::Result<Meilisearch> {
|
|
||||||
let index_size = self
|
|
||||||
.max_index_size
|
|
||||||
.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
|
|
||||||
let task_store_size = self
|
|
||||||
.max_task_store_size
|
|
||||||
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
|
|
||||||
|
|
||||||
/*
|
|
||||||
TODO: TAMO: enable dumps and snapshots to happens
|
|
||||||
if let Some(ref path) = self.import_snapshot {
|
|
||||||
log::info!("Loading from snapshot {:?}", path);
|
|
||||||
load_snapshot(
|
|
||||||
db_path.as_ref(),
|
|
||||||
path,
|
|
||||||
self.ignore_snapshot_if_db_exists,
|
|
||||||
self.ignore_missing_snapshot,
|
|
||||||
)?;
|
|
||||||
} else if let Some(ref src_path) = self.dump_src {
|
|
||||||
load_dump(
|
|
||||||
db_path.as_ref(),
|
|
||||||
src_path,
|
|
||||||
self.ignore_dump_if_db_exists,
|
|
||||||
self.ignore_missing_dump,
|
|
||||||
index_size,
|
|
||||||
task_store_size,
|
|
||||||
&indexer_options,
|
|
||||||
)?;
|
|
||||||
} else if db_path.as_ref().exists() {
|
|
||||||
// Directory could be pre-created without any database in.
|
|
||||||
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
|
|
||||||
if !db_is_empty {
|
|
||||||
versioning::check_version_file(db_path.as_ref())?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
std::fs::create_dir_all(db_path.as_ref())?;
|
|
||||||
|
|
||||||
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);
|
|
||||||
|
|
||||||
// Create or overwrite the version file for this DB
|
|
||||||
versioning::create_version_file(db_path.as_ref())?;
|
|
||||||
|
|
||||||
let indexer_config = IndexerConfig {
|
|
||||||
log_every_n: Some(indexer_options.log_every_n),
|
|
||||||
max_nb_chunks: indexer_options.max_nb_chunks,
|
|
||||||
documents_chunk_size: None,
|
|
||||||
// TODO: TAMO: Fix this thing
|
|
||||||
max_memory: None, // Some(indexer_options.max_indexing_memory.into()),
|
|
||||||
chunk_compression_type: milli::CompressionType::None,
|
|
||||||
chunk_compression_level: None,
|
|
||||||
// TODO: TAMO: do something with the indexing_config.max_indexing_threads
|
|
||||||
thread_pool: None,
|
|
||||||
max_positions_per_attributes: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let index_scheduler = IndexScheduler::new(
|
|
||||||
db_path.as_ref().join("tasks"),
|
|
||||||
db_path.as_ref().join("update_files"),
|
|
||||||
db_path.as_ref().join("indexes"),
|
|
||||||
index_size,
|
|
||||||
indexer_config,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
/*
|
|
||||||
if self.schedule_snapshot {
|
|
||||||
let snapshot_period = self
|
|
||||||
.snapshot_interval
|
|
||||||
.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?;
|
|
||||||
let snapshot_path = self
|
|
||||||
.snapshot_dir
|
|
||||||
.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?;
|
|
||||||
|
|
||||||
let snapshot_service = SnapshotService {
|
|
||||||
db_path: db_path.as_ref().to_path_buf(),
|
|
||||||
snapshot_period,
|
|
||||||
snapshot_path,
|
|
||||||
index_size,
|
|
||||||
meta_env_size: task_store_size,
|
|
||||||
scheduler: scheduler.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
tokio::task::spawn_local(snapshot_service.run());
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
Ok(Meilisearch { index_scheduler })
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's max update store size.
|
|
||||||
pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
|
|
||||||
let max_update_store_size = clamp_to_page_size(max_update_store_size);
|
|
||||||
self.max_task_store_size.replace(max_update_store_size);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_max_index_size(&mut self, size: usize) -> &mut Self {
|
|
||||||
let size = clamp_to_page_size(size);
|
|
||||||
self.max_index_size.replace(size);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's snapshot path.
|
|
||||||
pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self {
|
|
||||||
self.snapshot_dir.replace(snapshot_dir);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's ignore snapshot if db exists.
|
|
||||||
pub fn set_ignore_snapshot_if_db_exists(
|
|
||||||
&mut self,
|
|
||||||
ignore_snapshot_if_db_exists: bool,
|
|
||||||
) -> &mut Self {
|
|
||||||
self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's ignore missing snapshot.
|
|
||||||
pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self {
|
|
||||||
self.ignore_missing_snapshot = ignore_missing_snapshot;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's import snapshot.
|
|
||||||
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
|
|
||||||
self.import_snapshot.replace(import_snapshot);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's snapshot interval sec.
|
|
||||||
pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self {
|
|
||||||
self.snapshot_interval = Some(snapshot_interval);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's schedule snapshot.
|
|
||||||
pub fn set_schedule_snapshot(&mut self) -> &mut Self {
|
|
||||||
self.schedule_snapshot = true;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's dump src.
|
|
||||||
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
|
|
||||||
self.dump_src.replace(dump_src);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's dump dst.
|
|
||||||
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
|
|
||||||
self.dump_dst.replace(dump_dst);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's ignore dump if db exists.
|
|
||||||
pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self {
|
|
||||||
self.ignore_dump_if_db_exists = ignore_dump_if_db_exists;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the index controller builder's ignore missing dump.
|
|
||||||
pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self {
|
|
||||||
self.ignore_missing_dump = ignore_missing_dump;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Meilisearch {
|
|
||||||
pub fn builder() -> IndexControllerBuilder {
|
|
||||||
IndexControllerBuilder::default()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn register_task(&self, task: KindWithContent) -> Result<TaskView> {
|
|
||||||
let this = self.clone();
|
|
||||||
Ok(
|
|
||||||
tokio::task::spawn_blocking(move || this.clone().index_scheduler.register(task))
|
|
||||||
.await??,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn list_tasks(&self, filter: index_scheduler::Query) -> Result<Vec<TaskView>> {
|
|
||||||
Ok(self.index_scheduler.get_tasks(filter)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn list_indexes(&self) -> Result<Vec<Index>> {
|
|
||||||
let this = self.clone();
|
|
||||||
Ok(spawn_blocking(move || this.index_scheduler.indexes()).await??)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the total number of documents contained in the index + the selected documents.
|
|
||||||
pub async fn documents(
|
|
||||||
&self,
|
|
||||||
uid: String,
|
|
||||||
offset: usize,
|
|
||||||
limit: usize,
|
|
||||||
attributes_to_retrieve: Option<Vec<String>>,
|
|
||||||
) -> Result<(u64, Vec<Document>)> {
|
|
||||||
let this = self.clone();
|
|
||||||
spawn_blocking(move || -> Result<_> {
|
|
||||||
let index = this.index_scheduler.index(&uid)?;
|
|
||||||
Ok(index.retrieve_documents(offset, limit, attributes_to_retrieve)?)
|
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn document(
|
|
||||||
&self,
|
|
||||||
uid: String,
|
|
||||||
doc_id: String,
|
|
||||||
attributes_to_retrieve: Option<Vec<String>>,
|
|
||||||
) -> Result<Document> {
|
|
||||||
let this = self.clone();
|
|
||||||
spawn_blocking(move || -> Result<_> {
|
|
||||||
let index = this.index_scheduler.index(&uid)?;
|
|
||||||
Ok(index.retrieve_document(doc_id, attributes_to_retrieve)?)
|
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
|
|
||||||
let this = self.clone();
|
|
||||||
spawn_blocking(move || -> Result<_> {
|
|
||||||
let index = this.index_scheduler.index(&uid)?;
|
|
||||||
Ok(index.perform_search(query)?)
|
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_index(&self, uid: String) -> Result<Index> {
|
|
||||||
let this = self.clone();
|
|
||||||
Ok(spawn_blocking(move || this.index_scheduler.index(&uid)).await??)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
|
|
||||||
let processing_tasks = self
|
|
||||||
.index_scheduler
|
|
||||||
.get_tasks(index_scheduler::Query::default().with_status(Status::Processing))?;
|
|
||||||
// Check if the currently indexing update is from our index.
|
|
||||||
let is_indexing = processing_tasks.first().map_or(false, |task| {
|
|
||||||
task.index_uid.as_ref().map_or(false, |u| u == &uid)
|
|
||||||
});
|
|
||||||
|
|
||||||
let index = self.get_index(uid).await?;
|
|
||||||
let mut stats = spawn_blocking(move || index.stats()).await??;
|
|
||||||
stats.is_indexing = Some(is_indexing);
|
|
||||||
|
|
||||||
Ok(stats)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_all_stats(&self, search_rules: &SearchRules) -> Result<Stats> {
|
|
||||||
let mut last_task: Option<OffsetDateTime> = None;
|
|
||||||
let mut indexes = BTreeMap::new();
|
|
||||||
let mut database_size = 0;
|
|
||||||
let processing_tasks = self
|
|
||||||
.index_scheduler
|
|
||||||
.get_tasks(index_scheduler::Query::default().with_status(Status::Processing))?;
|
|
||||||
|
|
||||||
for index in self.list_indexes().await? {
|
|
||||||
if !search_rules.is_index_authorized(&index.name) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let index_name = index.name.clone();
|
|
||||||
|
|
||||||
let (mut stats, meta) =
|
|
||||||
spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || {
|
|
||||||
Ok((index.stats()?, index.meta()?))
|
|
||||||
})
|
|
||||||
.await??;
|
|
||||||
|
|
||||||
database_size += stats.size;
|
|
||||||
|
|
||||||
last_task = last_task.map_or(Some(meta.updated_at), |last| {
|
|
||||||
Some(last.max(meta.updated_at))
|
|
||||||
});
|
|
||||||
|
|
||||||
// Check if the currently indexing update is from our index.
|
|
||||||
stats.is_indexing = processing_tasks
|
|
||||||
.first()
|
|
||||||
.and_then(|p| p.index_uid.as_ref().map(|u| u == &index_name))
|
|
||||||
.or(Some(false));
|
|
||||||
|
|
||||||
indexes.insert(index_name, stats);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Stats {
|
|
||||||
database_size,
|
|
||||||
last_update: last_task,
|
|
||||||
indexes,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
|
||||||
loop {
|
|
||||||
match Arc::try_unwrap(item) {
|
|
||||||
Ok(item) => return item,
|
|
||||||
Err(item_arc) => {
|
|
||||||
item = item_arc;
|
|
||||||
sleep(Duration::from_millis(100)).await;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clamp the provided value to be a multiple of system page size.
|
|
||||||
fn clamp_to_page_size(size: usize) -> usize {
|
|
||||||
size / page_size::get() * page_size::get()
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
TODO: TAMO: uncomment this test
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use futures::future::ok;
|
|
||||||
use mockall::predicate::eq;
|
|
||||||
use nelson::Mocker;
|
|
||||||
|
|
||||||
use crate::index::error::Result as IndexResult;
|
|
||||||
use crate::index::{HitsInfo, Index};
|
|
||||||
use crate::index::{
|
|
||||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
|
||||||
};
|
|
||||||
use crate::index_resolver::index_store::MockIndexStore;
|
|
||||||
use crate::index_resolver::meta_store::MockIndexMetaStore;
|
|
||||||
use crate::index_resolver::IndexResolver;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn test_search_simple() {
|
|
||||||
let index_uid = "test";
|
|
||||||
let index_uuid = Uuid::new_v4();
|
|
||||||
let query = SearchQuery {
|
|
||||||
q: Some(String::from("hello world")),
|
|
||||||
offset: 10,
|
|
||||||
limit: 0,
|
|
||||||
page: Some(1),
|
|
||||||
hits_per_page: Some(10),
|
|
||||||
attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()),
|
|
||||||
attributes_to_crop: None,
|
|
||||||
crop_length: 18,
|
|
||||||
attributes_to_highlight: None,
|
|
||||||
show_matches_position: true,
|
|
||||||
filter: None,
|
|
||||||
sort: None,
|
|
||||||
facets: None,
|
|
||||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
|
||||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
|
||||||
crop_marker: DEFAULT_CROP_MARKER(),
|
|
||||||
matching_strategy: Default::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = SearchResult {
|
|
||||||
hits: vec![],
|
|
||||||
query: "hello world".to_string(),
|
|
||||||
hits_info: HitsInfo::OffsetLimit {
|
|
||||||
limit: 24,
|
|
||||||
offset: 0,
|
|
||||||
estimated_total_hits: 29,
|
|
||||||
},
|
|
||||||
processing_time_ms: 50,
|
|
||||||
facet_distribution: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut uuid_store = MockIndexMetaStore::new();
|
|
||||||
uuid_store
|
|
||||||
.expect_get()
|
|
||||||
.with(eq(index_uid.to_owned()))
|
|
||||||
.returning(move |s| {
|
|
||||||
Box::pin(ok((
|
|
||||||
s,
|
|
||||||
Some(crate::index_resolver::meta_store::IndexMeta {
|
|
||||||
uuid: index_uuid,
|
|
||||||
creation_task_id: 0,
|
|
||||||
}),
|
|
||||||
)))
|
|
||||||
});
|
|
||||||
|
|
||||||
let mut index_store = MockIndexStore::new();
|
|
||||||
let result_clone = result.clone();
|
|
||||||
let query_clone = query.clone();
|
|
||||||
index_store
|
|
||||||
.expect_get()
|
|
||||||
.with(eq(index_uuid))
|
|
||||||
.returning(move |_uuid| {
|
|
||||||
let result = result_clone.clone();
|
|
||||||
let query = query_clone.clone();
|
|
||||||
let mocker = Mocker::default();
|
|
||||||
mocker
|
|
||||||
.when::<SearchQuery, IndexResult<SearchResult>>("perform_search")
|
|
||||||
.once()
|
|
||||||
.then(move |q| {
|
|
||||||
assert_eq!(&q, &query);
|
|
||||||
Ok(result.clone())
|
|
||||||
});
|
|
||||||
let index = Index::mock(mocker);
|
|
||||||
Box::pin(ok(Some(index)))
|
|
||||||
});
|
|
||||||
|
|
||||||
let task_store_mocker = nelson::Mocker::default();
|
|
||||||
let mocker = Mocker::default();
|
|
||||||
let update_file_store = UpdateFileStore::mock(mocker);
|
|
||||||
let index_resolver = Arc::new(IndexResolver::new(
|
|
||||||
uuid_store,
|
|
||||||
index_store,
|
|
||||||
update_file_store.clone(),
|
|
||||||
));
|
|
||||||
let task_store = TaskStore::mock(task_store_mocker);
|
|
||||||
let scheduler = Scheduler::new(
|
|
||||||
task_store.clone(),
|
|
||||||
vec![index_resolver.clone()],
|
|
||||||
SchedulerConfig::default(),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let index_controller =
|
|
||||||
IndexController::mock(index_resolver, task_store, update_file_store, scheduler);
|
|
||||||
|
|
||||||
let r = index_controller
|
|
||||||
.search(index_uid.to_owned(), query.clone())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(r, result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
@ -1,195 +0,0 @@
|
|||||||
use meilisearch_types::error::ResponseError;
|
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
|
||||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use time::OffsetDateTime;
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use super::batch::BatchId;
|
|
||||||
use crate::index::{Settings, Unchecked};
|
|
||||||
|
|
||||||
pub type TaskId = u32;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
pub enum TaskResult {
|
|
||||||
DocumentAddition { indexed_documents: u64 },
|
|
||||||
DocumentDeletion { deleted_documents: u64 },
|
|
||||||
ClearAll { deleted_documents: u64 },
|
|
||||||
Other,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<DocumentAdditionResult> for TaskResult {
|
|
||||||
fn from(other: DocumentAdditionResult) -> Self {
|
|
||||||
Self::DocumentAddition {
|
|
||||||
indexed_documents: other.indexed_documents,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
pub enum TaskEvent {
|
|
||||||
Created(
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
OffsetDateTime,
|
|
||||||
),
|
|
||||||
Batched {
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
timestamp: OffsetDateTime,
|
|
||||||
batch_id: BatchId,
|
|
||||||
},
|
|
||||||
Processing(
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
OffsetDateTime,
|
|
||||||
),
|
|
||||||
Succeeded {
|
|
||||||
result: TaskResult,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
timestamp: OffsetDateTime,
|
|
||||||
},
|
|
||||||
Failed {
|
|
||||||
error: ResponseError,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
timestamp: OffsetDateTime,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TaskEvent {
|
|
||||||
pub fn succeeded(result: TaskResult) -> Self {
|
|
||||||
Self::Succeeded {
|
|
||||||
result,
|
|
||||||
timestamp: OffsetDateTime::now_utc(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn failed(error: impl Into<ResponseError>) -> Self {
|
|
||||||
Self::Failed {
|
|
||||||
error: error.into(),
|
|
||||||
timestamp: OffsetDateTime::now_utc(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A task represents an operation that Meilisearch must do.
|
|
||||||
/// It's stored on disk and executed from the lowest to highest Task id.
|
|
||||||
/// Every time a new task is created it has a higher Task id than the previous one.
|
|
||||||
/// See also `Job`.
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
pub struct Task {
|
|
||||||
pub id: TaskId,
|
|
||||||
/// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task)
|
|
||||||
/// then this is None
|
|
||||||
// TODO: when next forward breaking dumps, it would be a good idea to move this field inside of
|
|
||||||
// the TaskContent.
|
|
||||||
pub content: TaskContent,
|
|
||||||
pub events: Vec<TaskEvent>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Task {
|
|
||||||
/// Return true when a task is finished.
|
|
||||||
/// A task is finished when its last state is either `Succeeded` or `Failed`.
|
|
||||||
pub fn is_finished(&self) -> bool {
|
|
||||||
self.events.last().map_or(false, |event| {
|
|
||||||
matches!(
|
|
||||||
event,
|
|
||||||
TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the content_uuid of the `Task` if there is one.
|
|
||||||
pub fn get_content_uuid(&self) -> Option<Uuid> {
|
|
||||||
match self {
|
|
||||||
Task {
|
|
||||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
|
||||||
..
|
|
||||||
} => Some(*content_uuid),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn index_uid(&self) -> Option<&str> {
|
|
||||||
match &self.content {
|
|
||||||
TaskContent::DocumentAddition { index_uid, .. }
|
|
||||||
| TaskContent::DocumentDeletion { index_uid, .. }
|
|
||||||
| TaskContent::SettingsUpdate { index_uid, .. }
|
|
||||||
| TaskContent::IndexDeletion { index_uid }
|
|
||||||
| TaskContent::IndexCreation { index_uid, .. }
|
|
||||||
| TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()),
|
|
||||||
TaskContent::Dump { .. } => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
pub enum DocumentDeletion {
|
|
||||||
Clear,
|
|
||||||
Ids(Vec<String>),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
|
||||||
#[allow(clippy::large_enum_variant)]
|
|
||||||
pub enum TaskContent {
|
|
||||||
DocumentAddition {
|
|
||||||
index_uid: IndexUid,
|
|
||||||
#[cfg_attr(test, proptest(value = "Uuid::new_v4()"))]
|
|
||||||
content_uuid: Uuid,
|
|
||||||
#[cfg_attr(test, proptest(strategy = "test::index_document_method_strategy()"))]
|
|
||||||
merge_strategy: IndexDocumentsMethod,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
documents_count: usize,
|
|
||||||
allow_index_creation: bool,
|
|
||||||
},
|
|
||||||
DocumentDeletion {
|
|
||||||
index_uid: IndexUid,
|
|
||||||
deletion: DocumentDeletion,
|
|
||||||
},
|
|
||||||
SettingsUpdate {
|
|
||||||
index_uid: IndexUid,
|
|
||||||
settings: Settings<Unchecked>,
|
|
||||||
/// Indicates whether the task was a deletion
|
|
||||||
is_deletion: bool,
|
|
||||||
allow_index_creation: bool,
|
|
||||||
},
|
|
||||||
IndexDeletion {
|
|
||||||
index_uid: IndexUid,
|
|
||||||
},
|
|
||||||
IndexCreation {
|
|
||||||
index_uid: IndexUid,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
},
|
|
||||||
IndexUpdate {
|
|
||||||
index_uid: IndexUid,
|
|
||||||
primary_key: Option<String>,
|
|
||||||
},
|
|
||||||
Dump {
|
|
||||||
uid: String,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use proptest::prelude::*;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
pub(super) fn index_document_method_strategy() -> impl Strategy<Value = IndexDocumentsMethod> {
|
|
||||||
prop_oneof![
|
|
||||||
Just(IndexDocumentsMethod::ReplaceDocuments),
|
|
||||||
Just(IndexDocumentsMethod::UpdateDocuments),
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn datetime_strategy() -> impl Strategy<Value = OffsetDateTime> {
|
|
||||||
Just(OffsetDateTime::now_utc())
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user