mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
Merge #1615
1615: Integrate the query time sort feature r=Kerollmops a=Kerollmops This pull request integrates the sort at query time feature that was implemented on the milli side https://github.com/meilisearch/milli/pull/320. It follows the specification file https://github.com/meilisearch/specifications/blob/develop/text/0055-sort.md. A bunch of tests has been added to ensure that the search works correctly and that the settings are fine too! Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
96f72f009a
17
Cargo.lock
generated
17
Cargo.lock
generated
@ -1585,7 +1585,6 @@ dependencies = [
|
||||
"fst",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"grenad",
|
||||
"heed",
|
||||
"hex",
|
||||
"http",
|
||||
@ -1676,8 +1675,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "0.10.2"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.10.2#879d5e8799836d93f8995810965b6797be4f69d1"
|
||||
version = "0.11.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.11.0#c51bb6789cb3fbb6511138374b3443f9116a445c"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"byteorder",
|
||||
@ -1704,7 +1703,6 @@ dependencies = [
|
||||
"pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)",
|
||||
"pest_derive",
|
||||
"rayon",
|
||||
"regex",
|
||||
"roaring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@ -1712,7 +1710,6 @@ dependencies = [
|
||||
"smallstr",
|
||||
"smallvec",
|
||||
"tempfile",
|
||||
"tinytemplate",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@ -2937,16 +2934,6 @@ dependencies = [
|
||||
"syn 1.0.73",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinytemplate"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d3dc76004a03cec1c5932bca4cdc2e39aaa798e3f82363dd94f9adf6098c12f"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.2.0"
|
||||
|
@ -63,6 +63,7 @@ pub enum Code {
|
||||
|
||||
Facet,
|
||||
Filter,
|
||||
Sort,
|
||||
|
||||
BadParameter,
|
||||
BadRequest,
|
||||
@ -116,6 +117,8 @@ impl Code {
|
||||
Facet => ErrCode::invalid("invalid_facet", StatusCode::BAD_REQUEST),
|
||||
// error related to filters
|
||||
Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST),
|
||||
// error related to sorts
|
||||
Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST),
|
||||
|
||||
BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST),
|
||||
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),
|
||||
|
@ -41,7 +41,6 @@ flate2 = "1.0.19"
|
||||
fst = "0.4.5"
|
||||
futures = "0.3.7"
|
||||
futures-util = "0.3.8"
|
||||
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
|
||||
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
|
||||
http = "0.2.1"
|
||||
indexmap = { version = "1.3.2", features = ["serde-1"] }
|
||||
@ -51,7 +50,7 @@ main_error = "0.1.0"
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
|
||||
memmap = "0.7.0"
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.10.2" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.11.0" }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.0"
|
||||
once_cell = "1.5.2"
|
||||
|
@ -103,6 +103,7 @@ impl ErrorCode for MilliError<'_> {
|
||||
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
|
||||
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::Sort,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -63,6 +63,8 @@ impl Index {
|
||||
|
||||
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let criteria = self
|
||||
.criteria(txn)?
|
||||
.into_iter()
|
||||
@ -101,6 +103,7 @@ impl Index {
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
|
@ -1,15 +1,17 @@
|
||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use either::Either;
|
||||
use heed::RoTxn;
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
|
||||
use milli::{FieldId, FieldsIdsMap, FilterCondition, MatchingWords};
|
||||
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::index::error::FacetError;
|
||||
use crate::index::IndexError;
|
||||
|
||||
use super::error::Result;
|
||||
use super::Index;
|
||||
@ -49,6 +51,7 @@ pub struct SearchQuery {
|
||||
#[serde(default = "Default::default")]
|
||||
pub matches: bool,
|
||||
pub filter: Option<Value>,
|
||||
pub sort: Option<Vec<String>>,
|
||||
pub facets_distribution: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
@ -104,6 +107,15 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref sort) = query.sort {
|
||||
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
|
||||
Ok(sorts) => sorts,
|
||||
Err(err) => return Err(IndexError::Milli(err.into())),
|
||||
};
|
||||
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
|
||||
let milli::SearchResult {
|
||||
documents_ids,
|
||||
matching_words,
|
||||
|
@ -1,8 +1,8 @@
|
||||
use std::fs::File;
|
||||
|
||||
use crate::index::Index;
|
||||
use grenad::CompressionType;
|
||||
use milli::update::UpdateBuilder;
|
||||
use milli::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::index_controller::UpdateMeta;
|
||||
|
@ -55,7 +55,9 @@ pub struct Settings<T> {
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub filterable_attributes: Setting<HashSet<String>>,
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@ -75,6 +77,7 @@ impl Settings<Checked> {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
@ -88,6 +91,7 @@ impl Settings<Checked> {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
@ -99,6 +103,7 @@ impl Settings<Checked> {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
@ -136,6 +141,7 @@ impl Settings<Unchecked> {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
@ -248,8 +254,21 @@ impl Index {
|
||||
}
|
||||
|
||||
match settings.filterable_attributes {
|
||||
Setting::Set(ref facet_types) => builder.set_filterable_fields(facet_types.clone()),
|
||||
Setting::Reset => builder.set_filterable_fields(HashSet::new()),
|
||||
Setting::Set(ref facets) => {
|
||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_filterable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.sortable_attributes {
|
||||
Setting::Set(ref fields) => {
|
||||
builder.set_sortable_fields(fields.iter().cloned().collect())
|
||||
}
|
||||
Setting::Reset => {
|
||||
// TODO we must use the reset_sortable_fields in a futur PR.
|
||||
builder.set_sortable_fields(HashSet::new())
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
@ -328,6 +347,7 @@ mod test {
|
||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
@ -348,6 +368,7 @@ mod test {
|
||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
|
@ -158,15 +158,12 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
// we previously had a `Vec<String>` but now we have a `HashMap<String, String>`
|
||||
// representing the name of the faceted field + the type of the field. Since the type
|
||||
// was not known in the V1 of the dump we are just going to assume everything is a
|
||||
// String
|
||||
filterable_attributes: match settings.attributes_for_faceting {
|
||||
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
sortable_attributes: Setting::NotSet,
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
ranking_rules: match settings.ranking_rules {
|
||||
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| {
|
||||
|
@ -4,7 +4,7 @@ use std::sync::Arc;
|
||||
use std::{error, fs};
|
||||
|
||||
use byte_unit::Byte;
|
||||
use grenad::CompressionType;
|
||||
use milli::CompressionType;
|
||||
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use rustls::{
|
||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
|
||||
|
@ -1,5 +1,3 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use actix_web::{web, HttpResponse};
|
||||
use log::debug;
|
||||
use serde::Deserialize;
|
||||
@ -31,6 +29,7 @@ pub struct SearchQueryGet {
|
||||
crop_length: usize,
|
||||
attributes_to_highlight: Option<String>,
|
||||
filter: Option<String>,
|
||||
sort: Option<String>,
|
||||
#[serde(default = "Default::default")]
|
||||
matches: bool,
|
||||
facets_distribution: Option<String>,
|
||||
@ -40,19 +39,19 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
fn from(other: SearchQueryGet) -> Self {
|
||||
let attributes_to_retrieve = other
|
||||
.attributes_to_retrieve
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect::<BTreeSet<_>>());
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect());
|
||||
|
||||
let attributes_to_crop = other
|
||||
.attributes_to_crop
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect());
|
||||
|
||||
let attributes_to_highlight = other
|
||||
.attributes_to_highlight
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect::<HashSet<_>>());
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect());
|
||||
|
||||
let facets_distribution = other
|
||||
.facets_distribution
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect());
|
||||
|
||||
let filter = match other.filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
@ -62,6 +61,10 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let sort = other
|
||||
.sort
|
||||
.map(|attrs| attrs.split(',').map(String::from).collect());
|
||||
|
||||
Self {
|
||||
q: other.q,
|
||||
offset: other.offset,
|
||||
@ -71,6 +74,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
crop_length: other.crop_length,
|
||||
attributes_to_highlight,
|
||||
filter,
|
||||
sort,
|
||||
matches: other.matches,
|
||||
facets_distribution,
|
||||
}
|
||||
|
@ -75,11 +75,18 @@ macro_rules! make_setting_route {
|
||||
|
||||
make_setting_route!(
|
||||
"/filterable-attributes",
|
||||
std::collections::HashSet<String>,
|
||||
std::collections::BTreeSet<String>,
|
||||
filterable_attributes,
|
||||
"filterableAttributes"
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/sortable-attributes",
|
||||
std::collections::BTreeSet<String>,
|
||||
sortable_attributes,
|
||||
"sortableAttributes"
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/displayed-attributes",
|
||||
Vec<String>,
|
||||
@ -132,6 +139,7 @@ macro_rules! generate_configure {
|
||||
|
||||
generate_configure!(
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
|
@ -146,6 +146,80 @@ async fn search_with_filter_array_notation() {
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_sort_on_numbers() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index
|
||||
.update_settings(json!({"sortableAttributes": ["id"]}))
|
||||
.await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"sort": ["id:asc"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_sort_on_strings() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index
|
||||
.update_settings(json!({"sortableAttributes": ["title"]}))
|
||||
.await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"sort": ["title:desc"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_multiple_sort() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index
|
||||
.update_settings(json!({"sortableAttributes": ["id", "title"]}))
|
||||
.await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"sort": ["id:asc", "title:desc"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_facet_distribution() {
|
||||
let server = Server::new().await;
|
||||
|
@ -13,7 +13,14 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
||||
map.insert("distinct_attribute", json!(Value::Null));
|
||||
map.insert(
|
||||
"ranking_rules",
|
||||
json!(["words", "typo", "proximity", "attribute", "exactness"]),
|
||||
json!([
|
||||
"words",
|
||||
"typo",
|
||||
"sort",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
]),
|
||||
);
|
||||
map.insert("stop_words", json!([]));
|
||||
map.insert("synonyms", json!({}));
|
||||
@ -35,14 +42,22 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 7);
|
||||
assert_eq!(settings.keys().len(), 8);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
assert_eq!(settings["sortableAttributes"], json!([]));
|
||||
assert_eq!(settings["distinctAttribute"], json!(null));
|
||||
assert_eq!(
|
||||
settings["rankingRules"],
|
||||
json!(["words", "typo", "proximity", "attribute", "exactness"])
|
||||
json!([
|
||||
"words",
|
||||
"typo",
|
||||
"sort",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
])
|
||||
);
|
||||
assert_eq!(settings["stopWords"], json!([]));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user