diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs new file mode 100644 index 000000000..be3af4f3e --- /dev/null +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -0,0 +1,178 @@ +use std::collections::{BTreeSet, HashSet}; + +use actix_web::web::Data; +use actix_web::{web, HttpRequest, HttpResponse}; +use deserr::actix_web::{AwebJson, AwebQueryParameter}; +use index_scheduler::IndexScheduler; +use log::debug; +use meilisearch_types::deserr::query_params::Param; +use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::facet; +use meilisearch_types::serde_cs::vec::CS; +use serde_json::Value; + +use crate::analytics::{Analytics, SearchAggregator}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::search::{ + add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, + DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, + DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, +}; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(search))); +} + +// #[derive(Debug, deserr::Deserr)] +// #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +// pub struct FacetSearchQuery { +// #[deserr(default, error = DeserrQueryParamError)] +// facetQuery: Option, +// #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError)] +// offset: Param, +// #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError)] +// limit: Param, +// #[deserr(default, error = DeserrQueryParamError)] +// page: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// hits_per_page: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// attributes_to_retrieve: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// attributes_to_crop: Option>, +// #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError)] +// crop_length: Param, +// #[deserr(default, error = DeserrQueryParamError)] +// attributes_to_highlight: Option>, +// #[deserr(default, error = DeserrQueryParamError)] +// filter: Option, +// #[deserr(default, error = DeserrQueryParamError)] +// sort: Option, +// #[deserr(default, error = DeserrQueryParamError)] +// show_matches_position: Param, +// #[deserr(default, error = DeserrQueryParamError)] +// facets: Option>, +// #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] +// highlight_pre_tag: String, +// #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] +// highlight_post_tag: String, +// #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError)] +// crop_marker: String, +// #[deserr(default, error = DeserrQueryParamError)] +// matching_strategy: MatchingStrategy, +// } + +// TODO improve the error messages +#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct FacetSearchQuery { + #[deserr(default, error = DeserrJsonError)] + pub facet_query: Option, + #[deserr(default, error = DeserrJsonError)] + pub facet_name: String, + #[deserr(default, error = DeserrJsonError)] + pub q: Option, + #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] + pub offset: usize, + #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] + pub limit: usize, + #[deserr(default, error = DeserrJsonError)] + pub page: Option, + #[deserr(default, error = DeserrJsonError)] + pub hits_per_page: Option, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_retrieve: Option>, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_crop: Option>, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_LENGTH())] + pub crop_length: usize, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_highlight: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub show_matches_position: bool, + #[deserr(default, error = DeserrJsonError)] + pub filter: Option, + #[deserr(default, error = DeserrJsonError)] + pub sort: Option>, + #[deserr(default, error = DeserrJsonError)] + pub facets: Option>, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] + pub highlight_pre_tag: String, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] + pub highlight_post_tag: String, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_MARKER())] + pub crop_marker: String, + #[deserr(default, error = DeserrJsonError, default)] + pub matching_strategy: MatchingStrategy, +} + +pub async fn search( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, + params: AwebJson, + req: HttpRequest, + analytics: web::Data, +) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let mut query = params.into_inner(); + debug!("facet search called with params: {:?}", query); + + let facet_query = query.facet_query.clone(); + let facet_name = query.facet_name.clone(); + let mut search_query = SearchQuery::from(query); + + // Tenant token search_rules. + if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { + add_search_rules(&mut search_query, search_rules); + } + + // TODO log stuff + // let mut aggregate = SearchAggregator::from_query(&query, &req); + + let index = index_scheduler.index(&index_uid)?; + let search_result = tokio::task::spawn_blocking(move || { + perform_facet_search(&index, search_query, facet_query, facet_name) + }) + .await?; + + // TODO log stuff + // if let Ok(ref search_result) = search_result { + // aggregate.succeed(search_result); + // } + // TODO analytics + // analytics.post_search(aggregate); + + let search_result = search_result?; + + debug!("returns: {:?}", search_result); + Ok(HttpResponse::Ok().json(search_result)) +} + +impl From for SearchQuery { + fn from(value: FacetSearchQuery) -> Self { + SearchQuery { + q: value.q, + offset: value.offset, + limit: value.limit, + page: value.page, + hits_per_page: value.hits_per_page, + attributes_to_retrieve: value.attributes_to_retrieve, + attributes_to_crop: value.attributes_to_crop, + crop_length: value.crop_length, + attributes_to_highlight: value.attributes_to_highlight, + show_matches_position: value.show_matches_position, + filter: value.filter, + sort: value.sort, + facets: value.facets, + highlight_pre_tag: value.highlight_pre_tag, + highlight_post_tag: value.highlight_post_tag, + crop_marker: value.crop_marker, + matching_strategy: value.matching_strategy, + } + } +} diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index ba925b3d5..81b5c3f2e 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; pub mod documents; +pub mod facet_search; pub mod search; pub mod settings; @@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats)))) .service(web::scope("/documents").configure(documents::configure)) .service(web::scope("/search").configure(search::configure)) + .service(web::scope("/facet-search").configure(facet_search::configure)) .service(web::scope("/settings").configure(settings::configure)), ); } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 87cfdadb3..2093a27ef 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -3,6 +3,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::str::FromStr; use std::time::Instant; +use actix_http::header::q; use deserr::Deserr; use either::Either; use index_scheduler::RoFeatures; @@ -10,9 +11,10 @@ use log::warn; use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; -use meilisearch_types::milli::{dot_product_similarity, InternalError}; +use meilisearch_types::milli::{dot_product_similarity, FacetSearchResult, InternalError}; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -26,6 +28,7 @@ use serde::Serialize; use serde_json::{json, Value}; use crate::error::MeilisearchHttpError; +use crate::routes::indexes::facet_search::FacetSearchQuery; type MatchesPosition = BTreeMap>; @@ -199,7 +202,7 @@ impl SearchQueryWithIndex { } } -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)] #[deserr(rename_all = camelCase)] pub enum MatchingStrategy { /// Remove query words from last to first @@ -298,14 +301,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) { } } -pub fn perform_search( - index: &Index, - query: SearchQuery, +fn prepare_search<'t>( + index: &'t Index, + rtxn: &'t RoTxn, + query: &'t SearchQuery, features: RoFeatures, -) -> Result { - let before_search = Instant::now(); - let rtxn = index.read_txn()?; - +) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { let mut search = index.search(&rtxn); if query.vector.is_some() && query.q.is_some() { @@ -383,6 +384,20 @@ pub fn perform_search( search.sort_criteria(sort); } + Ok((search, is_finite_pagination, max_total_hits, offset)) +} + +pub fn perform_search( + index: &Index, + query: SearchQuery, + features: RoFeatures, +) -> Result { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; + + let (search, is_finite_pagination, max_total_hits, offset) = + prepare_search(index, &rtxn, &query, features)?; + let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = search.execute()?; @@ -557,6 +572,21 @@ pub fn perform_search( Ok(result) } +pub fn perform_facet_search( + index: &Index, + search_query: SearchQuery, + facet_query: Option, + facet_name: String, +) -> Result, MeilisearchHttpError> { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; + + let (search, is_finite_pagination, max_total_hits, offset) = + prepare_search(index, &rtxn, &search_query)?; + + todo!("Execute the search") +} + fn insert_geo_distance(sorts: &[String], document: &mut Document) { lazy_static::lazy_static! { static ref GEO_REGEX: Regex = diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 184ba409e..85ba3df3a 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -341,6 +341,7 @@ impl<'a> SearchForFacetValue<'a> { } } +#[derive(Debug, serde::Serialize)] pub struct FacetSearchResult { /// The original facet value pub value: String,