mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Merge #2601
2601: Ease search result pagination r=Kerollmops a=ManyTheFish # Summary This PR is a prototype enhancing search results pagination (#2577) # Todo - [x] Update the API to return the number of pages and allow users to directly choose a page instead of computing an offset - [x] Change computation of `total_pages` in order to have an exact count - [x] compute query tree exhaustively - [x] compute distinct exhaustively # Small Documentation ## Default search query **request**: ```sh curl \ -X POST 'http://localhost:7700/indexes/movies/search' \ -H 'Content-Type: application/json' \ --data-binary '{ "q": "botman" }' ``` **result**: ```json { "hits":[...], "query":"botman", "processingTimeMs":5, "hitsPerPage":20, "page":1, "totalPages":4, "totalHits":66 } ``` ## Search query with offset parameter **request**: ```sh curl \ -X POST 'http://localhost:7700/indexes/movies/search' \ -H 'Content-Type: application/json' \ --data-binary '{ "q": "botman", "offset": 0 }' ``` **result**: ```json { "hits":[...], "query":"botman", "processingTimeMs":3, "limit":20, "offset":0, "estimatedTotalHits":66 } ``` ## Search query selecting page with page parameter **request**: ```sh curl \ -X POST 'http://localhost:7700/indexes/movies/search' \ -H 'Content-Type: application/json' \ --data-binary '{ "q": "botman", "page": 2 }' ``` **result**: ```json { "hits":[...], "query":"botman", "processingTimeMs":5, "hitsPerPage":20, "page":2, "totalPages":4, "totalHits":66 } ``` # Related fixes #2577 ## In charge of the feature Core: `@ManyTheFish` Docs: `@guimachiavelli` Integration: `@bidoubiwa` Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
25ec51e783
34
Cargo.lock
generated
34
Cargo.lock
generated
@ -1183,8 +1183,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "0.33.4"
|
version = "0.34.0"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"nom",
|
"nom",
|
||||||
"nom_locate",
|
"nom_locate",
|
||||||
@ -1202,8 +1202,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "0.33.4"
|
version = "0.34.0"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
@ -1360,9 +1360,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "geoutils"
|
name = "geoutils"
|
||||||
version = "0.4.1"
|
version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9e006f616a407d396ace1d2ebb3f43ed73189db8b098079bd129928d7645dd1e"
|
checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
@ -1713,8 +1713,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "0.33.4"
|
version = "0.34.0"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
@ -2249,12 +2249,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "0.33.4"
|
version = "0.34.0"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bimap",
|
"bimap",
|
||||||
"bincode",
|
"bincode",
|
||||||
"bstr 0.2.17",
|
"bstr 1.0.1",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"charabia",
|
"charabia",
|
||||||
"concat-arrays",
|
"concat-arrays",
|
||||||
@ -2506,9 +2506,9 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ordered-float"
|
name = "ordered-float"
|
||||||
version = "2.10.0"
|
version = "3.3.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87"
|
checksum = "1f74e330193f90ec45e2b257fa3ef6df087784157ac1ad2c1e71c62837b03aa7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
]
|
]
|
||||||
@ -3015,9 +3015,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "retain_mut"
|
name = "retain_mut"
|
||||||
version = "0.1.9"
|
version = "0.1.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0"
|
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
@ -3057,9 +3057,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "roaring"
|
name = "roaring"
|
||||||
version = "0.9.0"
|
version = "0.10.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dd539cab4e32019956fe7e0cf160bb6d4802f4be2b52c4253d76d3bb0f85a5f7"
|
checksum = "ef0fb5e826a8bde011ecae6a8539dd333884335c57ff0f003fbe27c25bbe8f71"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
|
@ -7,7 +7,7 @@ edition = "2021"
|
|||||||
enum-iterator = "1.1.2"
|
enum-iterator = "1.1.2"
|
||||||
hmac = "0.12.1"
|
hmac = "0.12.1"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
|
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
serde = { version = "1.0.145", features = ["derive"] }
|
serde = { version = "1.0.145", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||||
|
@ -10,7 +10,7 @@ use http::header::CONTENT_TYPE;
|
|||||||
use meilisearch_auth::SearchRules;
|
use meilisearch_auth::SearchRules;
|
||||||
use meilisearch_lib::index::{
|
use meilisearch_lib::index::{
|
||||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
};
|
};
|
||||||
use meilisearch_lib::index_controller::Stats;
|
use meilisearch_lib::index_controller::Stats;
|
||||||
use meilisearch_lib::MeiliSearch;
|
use meilisearch_lib::MeiliSearch;
|
||||||
@ -373,6 +373,7 @@ pub struct SearchAggregator {
|
|||||||
// pagination
|
// pagination
|
||||||
max_limit: usize,
|
max_limit: usize,
|
||||||
max_offset: usize,
|
max_offset: usize,
|
||||||
|
finite_pagination: usize,
|
||||||
|
|
||||||
// formatting
|
// formatting
|
||||||
highlight_pre_tag: bool,
|
highlight_pre_tag: bool,
|
||||||
@ -427,12 +428,20 @@ impl SearchAggregator {
|
|||||||
ret.max_terms_number = q.split_whitespace().count();
|
ret.max_terms_number = q.split_whitespace().count();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if query.is_finite_pagination() {
|
||||||
|
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
|
ret.max_limit = limit;
|
||||||
|
ret.max_offset = query.page.unwrap_or(1).saturating_sub(1) * limit;
|
||||||
|
ret.finite_pagination = 1;
|
||||||
|
} else {
|
||||||
|
ret.max_limit = query.limit;
|
||||||
|
ret.max_offset = query.offset;
|
||||||
|
ret.finite_pagination = 0;
|
||||||
|
}
|
||||||
|
|
||||||
ret.matching_strategy
|
ret.matching_strategy
|
||||||
.insert(format!("{:?}", query.matching_strategy), 1);
|
.insert(format!("{:?}", query.matching_strategy), 1);
|
||||||
|
|
||||||
ret.max_limit = query.limit;
|
|
||||||
ret.max_offset = query.offset.unwrap_or_default();
|
|
||||||
|
|
||||||
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||||
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||||
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER();
|
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER();
|
||||||
@ -491,6 +500,7 @@ impl SearchAggregator {
|
|||||||
// pagination
|
// pagination
|
||||||
self.max_limit = self.max_limit.max(other.max_limit);
|
self.max_limit = self.max_limit.max(other.max_limit);
|
||||||
self.max_offset = self.max_offset.max(other.max_offset);
|
self.max_offset = self.max_offset.max(other.max_offset);
|
||||||
|
self.finite_pagination += other.finite_pagination;
|
||||||
|
|
||||||
self.highlight_pre_tag |= other.highlight_pre_tag;
|
self.highlight_pre_tag |= other.highlight_pre_tag;
|
||||||
self.highlight_post_tag |= other.highlight_post_tag;
|
self.highlight_post_tag |= other.highlight_post_tag;
|
||||||
@ -534,6 +544,7 @@ impl SearchAggregator {
|
|||||||
"pagination": {
|
"pagination": {
|
||||||
"max_limit": self.max_limit,
|
"max_limit": self.max_limit,
|
||||||
"max_offset": self.max_offset,
|
"max_offset": self.max_offset,
|
||||||
|
"finite_pagination": self.finite_pagination > self.total_received / 2,
|
||||||
},
|
},
|
||||||
"formatting": {
|
"formatting": {
|
||||||
"highlight_pre_tag": self.highlight_pre_tag,
|
"highlight_pre_tag": self.highlight_pre_tag,
|
||||||
|
@ -4,6 +4,7 @@ use meilisearch_auth::IndexSearchRules;
|
|||||||
use meilisearch_lib::index::{
|
use meilisearch_lib::index::{
|
||||||
MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
|
DEFAULT_SEARCH_OFFSET,
|
||||||
};
|
};
|
||||||
use meilisearch_lib::MeiliSearch;
|
use meilisearch_lib::MeiliSearch;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
@ -27,8 +28,12 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||||
pub struct SearchQueryGet {
|
pub struct SearchQueryGet {
|
||||||
q: Option<String>,
|
q: Option<String>,
|
||||||
offset: Option<usize>,
|
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
|
||||||
limit: Option<usize>,
|
offset: usize,
|
||||||
|
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
||||||
|
limit: usize,
|
||||||
|
page: Option<usize>,
|
||||||
|
hits_per_page: Option<usize>,
|
||||||
attributes_to_retrieve: Option<CS<String>>,
|
attributes_to_retrieve: Option<CS<String>>,
|
||||||
attributes_to_crop: Option<CS<String>>,
|
attributes_to_crop: Option<CS<String>>,
|
||||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
||||||
@ -62,7 +67,9 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
Self {
|
Self {
|
||||||
q: other.q,
|
q: other.q,
|
||||||
offset: other.offset,
|
offset: other.offset,
|
||||||
limit: other.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT),
|
limit: other.limit,
|
||||||
|
page: other.page,
|
||||||
|
hits_per_page: other.hits_per_page,
|
||||||
attributes_to_retrieve: other
|
attributes_to_retrieve: other
|
||||||
.attributes_to_retrieve
|
.attributes_to_retrieve
|
||||||
.map(|o| o.into_iter().collect()),
|
.map(|o| o.into_iter().collect()),
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
mod errors;
|
mod errors;
|
||||||
mod formatted;
|
mod formatted;
|
||||||
|
mod pagination;
|
||||||
|
|
||||||
use crate::common::Server;
|
use crate::common::Server;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
112
meilisearch-http/tests/search/pagination.rs
Normal file
112
meilisearch-http/tests/search/pagination.rs
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
use crate::common::Server;
|
||||||
|
use crate::search::DOCUMENTS;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn default_search_should_return_estimated_total_hit() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("basic");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert!(response.get("estimatedTotalHits").is_some());
|
||||||
|
assert!(response.get("limit").is_some());
|
||||||
|
assert!(response.get("offset").is_some());
|
||||||
|
|
||||||
|
// these fields shouldn't be present
|
||||||
|
assert!(response.get("totalHits").is_none());
|
||||||
|
assert!(response.get("page").is_none());
|
||||||
|
assert!(response.get("totalPages").is_none());
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_search() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("basic");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"page": 1}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
|
||||||
|
assert!(response.get("totalHits").is_some());
|
||||||
|
assert_eq!(response["page"], 1);
|
||||||
|
assert_eq!(response["totalPages"], 1);
|
||||||
|
|
||||||
|
// these fields shouldn't be present
|
||||||
|
assert!(response.get("estimatedTotalHits").is_none());
|
||||||
|
assert!(response.get("limit").is_none());
|
||||||
|
assert!(response.get("offset").is_none());
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn page_zero_should_not_return_any_result() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("basic");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"page": 0}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 0);
|
||||||
|
assert!(response.get("totalHits").is_some());
|
||||||
|
assert_eq!(response["page"], 0);
|
||||||
|
assert_eq!(response["totalPages"], 1);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn hits_per_page_1() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("basic");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"hitsPerPage": 1}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||||
|
assert_eq!(response["totalHits"], 5);
|
||||||
|
assert_eq!(response["page"], 1);
|
||||||
|
assert_eq!(response["totalPages"], 5);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn hits_per_page_0_should_not_return_any_result() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("basic");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"hitsPerPage": 0}), |response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 0);
|
||||||
|
assert_eq!(response["totalHits"], 5);
|
||||||
|
assert_eq!(response["page"], 1);
|
||||||
|
assert_eq!(response["totalPages"], 0);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
@ -28,7 +28,7 @@ lazy_static = "1.4.0"
|
|||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
|
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false }
|
||||||
mime = "0.3.16"
|
mime = "0.3.16"
|
||||||
num_cpus = "1.13.1"
|
num_cpus = "1.13.1"
|
||||||
obkv = "0.2.0"
|
obkv = "0.2.0"
|
||||||
@ -40,7 +40,7 @@ rand = "0.8.5"
|
|||||||
rayon = "1.5.3"
|
rayon = "1.5.3"
|
||||||
regex = "1.6.0"
|
regex = "1.6.0"
|
||||||
reqwest = { version = "0.11.12", features = ["json", "rustls-tls"], default-features = false, optional = true }
|
reqwest = { version = "0.11.12", features = ["json", "rustls-tls"], default-features = false, optional = true }
|
||||||
roaring = "0.9.0"
|
roaring = "0.10.1"
|
||||||
rustls = "0.20.6"
|
rustls = "0.20.6"
|
||||||
serde = { version = "1.0.145", features = ["derive"] }
|
serde = { version = "1.0.145", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||||
|
@ -70,7 +70,7 @@ impl From<TaskEvent> for NewTaskEvent {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[allow(clippy::large_enum_variant)]
|
#[allow(clippy::large_enum_variant)]
|
||||||
pub enum TaskContent {
|
pub enum TaskContent {
|
||||||
DocumentAddition {
|
DocumentAddition {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
pub use search::{
|
pub use search::{
|
||||||
MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
HitsInfo, MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||||
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
};
|
};
|
||||||
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
|
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ use super::index::Index;
|
|||||||
pub type Document = serde_json::Map<String, Value>;
|
pub type Document = serde_json::Map<String, Value>;
|
||||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||||
|
|
||||||
|
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
||||||
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
||||||
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
||||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||||
@ -35,9 +36,12 @@ pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
|||||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
pub q: Option<String>,
|
pub q: Option<String>,
|
||||||
pub offset: Option<usize>,
|
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
|
||||||
|
pub offset: usize,
|
||||||
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
|
pub page: Option<usize>,
|
||||||
|
pub hits_per_page: Option<usize>,
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
pub attributes_to_crop: Option<Vec<String>>,
|
pub attributes_to_crop: Option<Vec<String>>,
|
||||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
||||||
@ -59,6 +63,12 @@ pub struct SearchQuery {
|
|||||||
pub matching_strategy: MatchingStrategy,
|
pub matching_strategy: MatchingStrategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl SearchQuery {
|
||||||
|
pub fn is_finite_pagination(&self) -> bool {
|
||||||
|
self.page.or(self.hits_per_page).is_some()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub enum MatchingStrategy {
|
pub enum MatchingStrategy {
|
||||||
@ -97,15 +107,32 @@ pub struct SearchHit {
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub hits: Vec<SearchHit>,
|
pub hits: Vec<SearchHit>,
|
||||||
pub estimated_total_hits: u64,
|
|
||||||
pub query: String,
|
pub query: String,
|
||||||
pub limit: usize,
|
|
||||||
pub offset: usize,
|
|
||||||
pub processing_time_ms: u128,
|
pub processing_time_ms: u128,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub hits_info: HitsInfo,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum HitsInfo {
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
Pagination {
|
||||||
|
hits_per_page: usize,
|
||||||
|
page: usize,
|
||||||
|
total_pages: usize,
|
||||||
|
total_hits: usize,
|
||||||
|
},
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
OffsetLimit {
|
||||||
|
limit: usize,
|
||||||
|
offset: usize,
|
||||||
|
estimated_total_hits: usize,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
@ -117,16 +144,30 @@ impl Index {
|
|||||||
search.query(query);
|
search.query(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let is_finite_pagination = query.is_finite_pagination();
|
||||||
search.terms_matching_strategy(query.matching_strategy.into());
|
search.terms_matching_strategy(query.matching_strategy.into());
|
||||||
|
|
||||||
let max_total_hits = self
|
let max_total_hits = self
|
||||||
.pagination_max_total_hits(&rtxn)?
|
.pagination_max_total_hits(&rtxn)?
|
||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||||
|
|
||||||
|
search.exhaustive_number_hits(is_finite_pagination);
|
||||||
|
|
||||||
|
// compute the offset on the limit depending on the pagination mode.
|
||||||
|
let (offset, limit) = if is_finite_pagination {
|
||||||
|
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
|
let page = query.page.unwrap_or(1);
|
||||||
|
|
||||||
|
// page 0 gives a limit of 0 forcing Meilisearch to return no document.
|
||||||
|
page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit))
|
||||||
|
} else {
|
||||||
|
(query.offset, query.limit)
|
||||||
|
};
|
||||||
|
|
||||||
// Make sure that a user can't get more documents than the hard limit,
|
// Make sure that a user can't get more documents than the hard limit,
|
||||||
// we align that on the offset too.
|
// we align that on the offset too.
|
||||||
let offset = min(query.offset.unwrap_or(0), max_total_hits);
|
let offset = min(offset, max_total_hits);
|
||||||
let limit = min(query.limit, max_total_hits.saturating_sub(offset));
|
let limit = min(limit, max_total_hits.saturating_sub(offset));
|
||||||
|
|
||||||
search.offset(offset);
|
search.offset(offset);
|
||||||
search.limit(limit);
|
search.limit(limit);
|
||||||
@ -251,7 +292,27 @@ impl Index {
|
|||||||
documents.push(hit);
|
documents.push(hit);
|
||||||
}
|
}
|
||||||
|
|
||||||
let estimated_total_hits = candidates.len();
|
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||||
|
let hits_info = if is_finite_pagination {
|
||||||
|
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
|
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
|
||||||
|
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
|
||||||
|
.checked_div(hits_per_page)
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
HitsInfo::Pagination {
|
||||||
|
hits_per_page,
|
||||||
|
page: query.page.unwrap_or(1),
|
||||||
|
total_pages,
|
||||||
|
total_hits: number_of_hits,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
HitsInfo::OffsetLimit {
|
||||||
|
limit: query.limit,
|
||||||
|
offset,
|
||||||
|
estimated_total_hits: number_of_hits,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let facet_distribution = match query.facets {
|
let facet_distribution = match query.facets {
|
||||||
Some(ref fields) => {
|
Some(ref fields) => {
|
||||||
@ -274,10 +335,8 @@ impl Index {
|
|||||||
|
|
||||||
let result = SearchResult {
|
let result = SearchResult {
|
||||||
hits: documents,
|
hits: documents,
|
||||||
estimated_total_hits,
|
hits_info,
|
||||||
query: query.q.clone().unwrap_or_default(),
|
query: query.q.clone().unwrap_or_default(),
|
||||||
limit: query.limit,
|
|
||||||
offset: query.offset.unwrap_or_default(),
|
|
||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
facet_distribution,
|
facet_distribution,
|
||||||
};
|
};
|
||||||
|
@ -38,7 +38,7 @@ pub struct Checked;
|
|||||||
pub struct Unchecked;
|
pub struct Unchecked;
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct MinWordSizeTyposSetting {
|
pub struct MinWordSizeTyposSetting {
|
||||||
@ -51,7 +51,7 @@ pub struct MinWordSizeTyposSetting {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct TypoSettings {
|
pub struct TypoSettings {
|
||||||
@ -70,7 +70,7 @@ pub struct TypoSettings {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct FacetingSettings {
|
pub struct FacetingSettings {
|
||||||
@ -80,7 +80,7 @@ pub struct FacetingSettings {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct PaginationSettings {
|
pub struct PaginationSettings {
|
||||||
@ -92,7 +92,7 @@ pub struct PaginationSettings {
|
|||||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
||||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
||||||
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||||
|
@ -659,7 +659,7 @@ mod test {
|
|||||||
use nelson::Mocker;
|
use nelson::Mocker;
|
||||||
|
|
||||||
use crate::index::error::Result as IndexResult;
|
use crate::index::error::Result as IndexResult;
|
||||||
use crate::index::Index;
|
use crate::index::{HitsInfo, Index};
|
||||||
use crate::index::{
|
use crate::index::{
|
||||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||||
};
|
};
|
||||||
@ -691,8 +691,10 @@ mod test {
|
|||||||
let index_uuid = Uuid::new_v4();
|
let index_uuid = Uuid::new_v4();
|
||||||
let query = SearchQuery {
|
let query = SearchQuery {
|
||||||
q: Some(String::from("hello world")),
|
q: Some(String::from("hello world")),
|
||||||
offset: Some(10),
|
offset: 10,
|
||||||
limit: 0,
|
limit: 0,
|
||||||
|
page: Some(1),
|
||||||
|
hits_per_page: Some(10),
|
||||||
attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()),
|
attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()),
|
||||||
attributes_to_crop: None,
|
attributes_to_crop: None,
|
||||||
crop_length: 18,
|
crop_length: 18,
|
||||||
@ -709,10 +711,12 @@ mod test {
|
|||||||
|
|
||||||
let result = SearchResult {
|
let result = SearchResult {
|
||||||
hits: vec![],
|
hits: vec![],
|
||||||
estimated_total_hits: 29,
|
|
||||||
query: "hello world".to_string(),
|
query: "hello world".to_string(),
|
||||||
|
hits_info: HitsInfo::OffsetLimit {
|
||||||
limit: 24,
|
limit: 24,
|
||||||
offset: 0,
|
offset: 0,
|
||||||
|
estimated_total_hits: 29,
|
||||||
|
},
|
||||||
processing_time_ms: 50,
|
processing_time_ms: 50,
|
||||||
facet_distribution: None,
|
facet_distribution: None,
|
||||||
};
|
};
|
||||||
|
@ -80,7 +80,7 @@ impl TaskEvent {
|
|||||||
/// It's stored on disk and executed from the lowest to highest Task id.
|
/// It's stored on disk and executed from the lowest to highest Task id.
|
||||||
/// Every time a new task is created it has a higher Task id than the previous one.
|
/// Every time a new task is created it has a higher Task id than the previous one.
|
||||||
/// See also `Job`.
|
/// See also `Job`.
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||||
pub struct Task {
|
pub struct Task {
|
||||||
pub id: TaskId,
|
pub id: TaskId,
|
||||||
@ -135,7 +135,7 @@ pub enum DocumentDeletion {
|
|||||||
Ids(Vec<String>),
|
Ids(Vec<String>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||||
#[allow(clippy::large_enum_variant)]
|
#[allow(clippy::large_enum_variant)]
|
||||||
pub enum TaskContent {
|
pub enum TaskContent {
|
||||||
|
Loading…
Reference in New Issue
Block a user