diff --git a/meilisearch-types/src/deserr/mod.rs b/meilisearch-types/src/deserr/mod.rs index bbaa42dc0..df304cc2f 100644 --- a/meilisearch-types/src/deserr/mod.rs +++ b/meilisearch-types/src/deserr/mod.rs @@ -151,6 +151,10 @@ make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_ make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes); make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes); make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter); +make_missing_field_convenience_builder!( + MissingFacetSearchFacetName, + missing_facet_search_facet_name +); // Integrate a sub-error into a [`DeserrError`] by taking its error message but using // the default error code (C) from `Self` diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 09c8b7b8c..c25683e3e 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -233,6 +233,7 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ; InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ; @@ -242,6 +243,8 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ; InvalidSearchOffset , InvalidRequest , BAD_REQUEST ; InvalidSearchPage , InvalidRequest , BAD_REQUEST ; InvalidSearchQ , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ; +InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ; InvalidSearchVector , InvalidRequest , BAD_REQUEST ; InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ; @@ -284,6 +287,7 @@ MissingApiKeyIndexes , InvalidRequest , BAD_REQUEST ; MissingAuthorizationHeader , Auth , UNAUTHORIZED ; MissingContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; MissingDocumentId , InvalidRequest , BAD_REQUEST ; +MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; MissingIndexUid , InvalidRequest , BAD_REQUEST ; MissingMasterKey , Auth , UNAUTHORIZED ; MissingPayload , InvalidRequest , BAD_REQUEST ; @@ -340,6 +344,9 @@ impl ErrorCode for milli::Error { UserError::InvalidSearchableAttribute { .. } => { Code::InvalidAttributesToSearchOn } + UserError::InvalidFacetSearchFacetName { .. } => { + Code::InvalidFacetSearchFacetName + } UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index 68c3a7dff..4bd190f87 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -38,6 +38,18 @@ impl MultiSearchAggregator { pub fn succeed(&mut self) {} } +#[derive(Default)] +pub struct FacetSearchAggregator; + +#[allow(dead_code)] +impl FacetSearchAggregator { + pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { + Self::default() + } + + pub fn succeed(&mut self, _: &dyn Any) {} +} + impl MockAnalytics { #[allow(clippy::new_ret_no_self)] pub fn new(opt: &Opt) -> Arc { @@ -56,6 +68,7 @@ impl Analytics for MockAnalytics { fn get_search(&self, _aggregate: super::SearchAggregator) {} fn post_search(&self, _aggregate: super::SearchAggregator) {} fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {} + fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {} fn add_documents( &self, _documents_query: &UpdateDocumentsQuery, diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index c48564dff..86f9c1abe 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics; pub type SearchAggregator = mock_analytics::SearchAggregator; #[cfg(any(debug_assertions, not(feature = "analytics")))] pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator; +#[cfg(any(debug_assertions, not(feature = "analytics")))] +pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator; // if we are in release mode and the feature analytics was enabled // we use the real analytics @@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; pub type SearchAggregator = segment_analytics::SearchAggregator; #[cfg(all(not(debug_assertions), feature = "analytics"))] pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; /// The Meilisearch config dir: /// `~/.config/Meilisearch` on *NIX or *BSD. @@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send { /// This method should be called to aggregate a post array of searches fn post_multi_search(&self, aggregate: MultiSearchAggregator); + /// This method should be called to aggregate post facet values searches + fn post_facet_search(&self, aggregate: FacetSearchAggregator); + // this method should be called to aggregate a add documents request fn add_documents( &self, diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 9a96c4650..25aa20a9a 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1,5 +1,6 @@ use std::collections::{BinaryHeap, HashMap, HashSet}; use std::fs; +use std::mem::take; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -29,11 +30,13 @@ use super::{ use crate::analytics::Analytics; use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot}; use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::tasks::TasksFilterQuery; use crate::routes::{create_all_stats, Stats}; use crate::search::{ - SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, }; use crate::Opt; @@ -71,6 +74,7 @@ pub enum AnalyticsMsg { AggregateGetSearch(SearchAggregator), AggregatePostSearch(SearchAggregator), AggregatePostMultiSearch(MultiSearchAggregator), + AggregatePostFacetSearch(FacetSearchAggregator), AggregateAddDocuments(DocumentsAggregator), AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateUpdateDocuments(DocumentsAggregator), @@ -139,6 +143,7 @@ impl SegmentAnalytics { batcher, post_search_aggregator: SearchAggregator::default(), post_multi_search_aggregator: MultiSearchAggregator::default(), + post_facet_search_aggregator: FacetSearchAggregator::default(), get_search_aggregator: SearchAggregator::default(), add_documents_aggregator: DocumentsAggregator::default(), delete_documents_aggregator: DocumentsDeletionAggregator::default(), @@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); } + fn post_facet_search(&self, aggregate: FacetSearchAggregator) { + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate)); + } + fn post_multi_search(&self, aggregate: MultiSearchAggregator) { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate)); } @@ -354,6 +363,7 @@ pub struct Segment { get_search_aggregator: SearchAggregator, post_search_aggregator: SearchAggregator, post_multi_search_aggregator: MultiSearchAggregator, + post_facet_search_aggregator: FacetSearchAggregator, add_documents_aggregator: DocumentsAggregator, delete_documents_aggregator: DocumentsDeletionAggregator, update_documents_aggregator: DocumentsAggregator, @@ -418,6 +428,7 @@ impl Segment { Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), @@ -461,55 +472,74 @@ impl Segment { }) .await; } - let get_search = std::mem::take(&mut self.get_search_aggregator) - .into_event(&self.user, "Documents Searched GET"); - let post_search = std::mem::take(&mut self.post_search_aggregator) - .into_event(&self.user, "Documents Searched POST"); - let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator) - .into_event(&self.user, "Documents Searched by Multi-Search POST"); - let add_documents = std::mem::take(&mut self.add_documents_aggregator) - .into_event(&self.user, "Documents Added"); - let delete_documents = std::mem::take(&mut self.delete_documents_aggregator) - .into_event(&self.user, "Documents Deleted"); - let update_documents = std::mem::take(&mut self.update_documents_aggregator) - .into_event(&self.user, "Documents Updated"); - let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator) - .into_event(&self.user, "Documents Fetched GET"); - let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator) - .into_event(&self.user, "Documents Fetched POST"); - let get_tasks = - std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen"); - let health = - std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen"); - if let Some(get_search) = get_search { + let Segment { + inbox: _, + opt: _, + batcher: _, + user, + get_search_aggregator, + post_search_aggregator, + post_multi_search_aggregator, + post_facet_search_aggregator, + add_documents_aggregator, + delete_documents_aggregator, + update_documents_aggregator, + get_fetch_documents_aggregator, + post_fetch_documents_aggregator, + get_tasks_aggregator, + health_aggregator, + } = self; + + if let Some(get_search) = + take(get_search_aggregator).into_event(&user, "Documents Searched GET") + { let _ = self.batcher.push(get_search).await; } - if let Some(post_search) = post_search { + if let Some(post_search) = + take(post_search_aggregator).into_event(&user, "Documents Searched POST") + { let _ = self.batcher.push(post_search).await; } - if let Some(post_multi_search) = post_multi_search { + if let Some(post_multi_search) = take(post_multi_search_aggregator) + .into_event(&user, "Documents Searched by Multi-Search POST") + { let _ = self.batcher.push(post_multi_search).await; } - if let Some(add_documents) = add_documents { + if let Some(post_facet_search) = + take(post_facet_search_aggregator).into_event(&user, "Facet Searched POST") + { + let _ = self.batcher.push(post_facet_search).await; + } + if let Some(add_documents) = + take(add_documents_aggregator).into_event(&user, "Documents Added") + { let _ = self.batcher.push(add_documents).await; } - if let Some(delete_documents) = delete_documents { + if let Some(delete_documents) = + take(delete_documents_aggregator).into_event(&user, "Documents Deleted") + { let _ = self.batcher.push(delete_documents).await; } - if let Some(update_documents) = update_documents { + if let Some(update_documents) = + take(update_documents_aggregator).into_event(&user, "Documents Updated") + { let _ = self.batcher.push(update_documents).await; } - if let Some(get_fetch_documents) = get_fetch_documents { + if let Some(get_fetch_documents) = + take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET") + { let _ = self.batcher.push(get_fetch_documents).await; } - if let Some(post_fetch_documents) = post_fetch_documents { + if let Some(post_fetch_documents) = + take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST") + { let _ = self.batcher.push(post_fetch_documents).await; } - if let Some(get_tasks) = get_tasks { + if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") { let _ = self.batcher.push(get_tasks).await; } - if let Some(health) = health { + if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") { let _ = self.batcher.push(health).await; } let _ = self.batcher.flush().await; @@ -909,6 +939,120 @@ impl MultiSearchAggregator { } } +#[derive(Default)] +pub struct FacetSearchAggregator { + timestamp: Option, + + // context + user_agents: HashSet, + + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // The set of all facetNames that were used + facet_names: HashSet, + + // As there been any other parameter than the facetName or facetQuery ones? + additional_search_parameters_provided: bool, +} + +impl FacetSearchAggregator { + pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name, + vector, + q, + filter, + matching_strategy, + attributes_to_search_on, + } = query; + + let mut ret = Self::default(); + ret.timestamp = Some(OffsetDateTime::now_utc()); + + ret.total_received = 1; + ret.user_agents = extract_user_agents(request).into_iter().collect(); + ret.facet_names = Some(facet_name.clone()).into_iter().collect(); + + ret.additional_search_parameters_provided = q.is_some() + || vector.is_some() + || filter.is_some() + || *matching_strategy != MatchingStrategy::default() + || attributes_to_search_on.is_some(); + + ret + } + + pub fn succeed(&mut self, result: &FacetSearchResult) { + self.total_succeeded = self.total_succeeded.saturating_add(1); + self.time_spent.push(result.processing_time_ms as usize); + } + + /// Aggregate one [SearchAggregator] into another. + pub fn aggregate(&mut self, mut other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + + // context + for user_agent in other.user_agents.into_iter() { + self.user_agents.insert(user_agent); + } + + // request + self.total_received = self.total_received.saturating_add(other.total_received); + self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded); + self.time_spent.append(&mut other.time_spent); + + // facet_names + for facet_name in other.facet_names.into_iter() { + self.facet_names.insert(facet_name); + } + + // additional_search_parameters_provided + self.additional_search_parameters_provided = self.additional_search_parameters_provided + | other.additional_search_parameters_provided; + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + if self.total_received == 0 { + None + } else { + // the index of the 99th percentage of value + let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; + // we get all the values in a sorted manner + let time_spent = self.time_spent.into_sorted_vec(); + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th as usize); + + let properties = json!({ + "user-agent": self.user_agents, + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": self.total_succeeded, + "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics + "total_received": self.total_received, + }, + "facets": { + "total_distinct_facet_count": self.facet_names.len(), + "additional_search_parameters_provided": self.additional_search_parameters_provided, + }, + }); + + Some(Track { + timestamp: self.timestamp, + user: user.clone(), + event: event_name.to_string(), + properties, + ..Default::default() + }) + } + } +} + #[derive(Default)] pub struct DocumentsAggregator { timestamp: Option, diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs new file mode 100644 index 000000000..5a5c04f99 --- /dev/null +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -0,0 +1,124 @@ +use actix_web::web::Data; +use actix_web::{web, HttpRequest, HttpResponse}; +use deserr::actix_web::AwebJson; +use index_scheduler::IndexScheduler; +use log::debug; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use serde_json::Value; + +use crate::analytics::{Analytics, FacetSearchAggregator}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::search::{ + add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, + DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, + DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, +}; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(search))); +} + +/// # Important +/// +/// Intentionally don't use `deny_unknown_fields` to ignore search parameters sent by user +#[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase)] +pub struct FacetSearchQuery { + #[deserr(default, error = DeserrJsonError)] + pub facet_query: Option, + #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)] + pub facet_name: String, + #[deserr(default, error = DeserrJsonError)] + pub q: Option, + #[deserr(default, error = DeserrJsonError)] + pub vector: Option>, + #[deserr(default, error = DeserrJsonError)] + pub filter: Option, + #[deserr(default, error = DeserrJsonError, default)] + pub matching_strategy: MatchingStrategy, + #[deserr(default, error = DeserrJsonError, default)] + pub attributes_to_search_on: Option>, +} + +pub async fn search( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, + params: AwebJson, + req: HttpRequest, + analytics: web::Data, +) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let query = params.into_inner(); + debug!("facet search called with params: {:?}", query); + + let mut aggregate = FacetSearchAggregator::from_query(&query, &req); + + let facet_query = query.facet_query.clone(); + let facet_name = query.facet_name.clone(); + let mut search_query = SearchQuery::from(query); + + // Tenant token search_rules. + if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { + add_search_rules(&mut search_query, search_rules); + } + + let index = index_scheduler.index(&index_uid)?; + let features = index_scheduler.features()?; + let search_result = tokio::task::spawn_blocking(move || { + perform_facet_search(&index, search_query, facet_query, facet_name, features) + }) + .await?; + + if let Ok(ref search_result) = search_result { + aggregate.succeed(search_result); + } + analytics.post_facet_search(aggregate); + + let search_result = search_result?; + + debug!("returns: {:?}", search_result); + Ok(HttpResponse::Ok().json(search_result)) +} + +impl From for SearchQuery { + fn from(value: FacetSearchQuery) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name: _, + q, + vector, + filter, + matching_strategy, + attributes_to_search_on, + } = value; + + SearchQuery { + q, + offset: DEFAULT_SEARCH_OFFSET(), + limit: DEFAULT_SEARCH_LIMIT(), + page: None, + hits_per_page: None, + attributes_to_retrieve: None, + attributes_to_crop: None, + crop_length: DEFAULT_CROP_LENGTH(), + attributes_to_highlight: None, + show_matches_position: false, + show_ranking_score: false, + show_ranking_score_details: false, + filter, + sort: None, + facets: None, + highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), + highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), + crop_marker: DEFAULT_CROP_MARKER(), + matching_strategy, + vector, + attributes_to_search_on, + } + } +} diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index ba925b3d5..81b5c3f2e 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; pub mod documents; +pub mod facet_search; pub mod search; pub mod settings; @@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats)))) .service(web::scope("/documents").configure(documents::configure)) .service(web::scope("/search").configure(search::configure)) + .service(web::scope("/facet-search").configure(facet_search::configure)) .service(web::scope("/settings").configure(settings::configure)), ); } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 87cfdadb3..bebf80084 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -10,9 +10,12 @@ use log::warn; use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; -use meilisearch_types::milli::{dot_product_similarity, InternalError}; +use meilisearch_types::milli::{ + dot_product_similarity, FacetValueHit, InternalError, SearchForFacetValues, +}; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -199,7 +202,7 @@ impl SearchQueryWithIndex { } } -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)] #[deserr(rename_all = camelCase)] pub enum MatchingStrategy { /// Remove query words from last to first @@ -278,6 +281,14 @@ pub struct FacetStats { pub max: f64, } +#[derive(Serialize, Debug, Clone, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct FacetSearchResult { + pub facet_hits: Vec, + pub facet_query: Option, + pub processing_time_ms: u128, +} + /// Incorporate search rules in search query pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) { query.filter = match (query.filter.take(), rules.filter) { @@ -298,15 +309,13 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) { } } -pub fn perform_search( - index: &Index, - query: SearchQuery, +fn prepare_search<'t>( + index: &'t Index, + rtxn: &'t RoTxn, + query: &'t SearchQuery, features: RoFeatures, -) -> Result { - let before_search = Instant::now(); - let rtxn = index.read_txn()?; - - let mut search = index.search(&rtxn); +) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { + let mut search = index.search(rtxn); if query.vector.is_some() && query.q.is_some() { warn!("Ignoring the query string `q` when used with the `vector` parameter."); @@ -328,7 +337,7 @@ pub fn perform_search( search.terms_matching_strategy(query.matching_strategy.into()); let max_total_hits = index - .pagination_max_total_hits(&rtxn) + .pagination_max_total_hits(rtxn) .map_err(milli::Error::from)? .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); @@ -383,6 +392,20 @@ pub fn perform_search( search.sort_criteria(sort); } + Ok((search, is_finite_pagination, max_total_hits, offset)) +} + +pub fn perform_search( + index: &Index, + query: SearchQuery, + features: RoFeatures, +) -> Result { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; + + let (search, is_finite_pagination, max_total_hits, offset) = + prepare_search(index, &rtxn, &query, features)?; + let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = search.execute()?; @@ -557,6 +580,29 @@ pub fn perform_search( Ok(result) } +pub fn perform_facet_search( + index: &Index, + search_query: SearchQuery, + facet_query: Option, + facet_name: String, + features: RoFeatures, +) -> Result { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; + + let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?; + let mut facet_search = SearchForFacetValues::new(facet_name, search); + if let Some(facet_query) = &facet_query { + facet_search.query(facet_query); + } + + Ok(FacetSearchResult { + facet_hits: facet_search.execute()?, + facet_query, + processing_time_ms: before_search.elapsed().as_millis(), + }) +} + fn insert_geo_distance(sorts: &[String], document: &mut Document) { lazy_static::lazy_static! { static ref GEO_REGEX: Regex = diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 517024c74..1bb4345e8 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -377,6 +377,11 @@ impl Index<'_> { self.service.get(url).await } + pub async fn facet_search(&self, query: Value) -> (Value, StatusCode) { + let url = format!("/indexes/{}/facet-search", urlencode(self.uid.as_ref())); + self.service.post_encoded(url, query, self.encoder).await + } + pub async fn update_distinct_attribute(&self, value: Value) -> (Value, StatusCode) { let url = format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute"); diff --git a/meilisearch/tests/search/facet_search.rs b/meilisearch/tests/search/facet_search.rs new file mode 100644 index 000000000..7628f2fed --- /dev/null +++ b/meilisearch/tests/search/facet_search.rs @@ -0,0 +1,92 @@ +use once_cell::sync::Lazy; +use serde_json::{json, Value}; + +use crate::common::Server; + +pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "title": "Shazam!", + "genres": ["Action", "Adventure"], + "id": "287947", + }, + { + "title": "Captain Marvel", + "genres": ["Action", "Adventure"], + "id": "299537", + }, + { + "title": "Escape Room", + "genres": ["Horror", "Thriller", "Multiple Words"], + "id": "522681", + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "genres": ["Action", "Comedy"], + "id": "166428", + }, + { + "title": "Gläss", + "genres": ["Thriller"], + "id": "450465", + } + ]) +}); + +#[actix_rt::test] +async fn simple_facet_search() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; + + assert_eq!(code, 200, "{}", response); + assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2); + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "adventure"})).await; + + assert_eq!(code, 200, "{}", response); + assert_eq!(response["facetHits"].as_array().unwrap().len(), 1); +} + +#[actix_rt::test] +async fn non_filterable_facet_search_error() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; + assert_eq!(code, 400, "{}", response); + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "adv"})).await; + assert_eq!(code, 400, "{}", response); +} + +#[actix_rt::test] +async fn facet_search_dont_support_words() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "words"})).await; + + assert_eq!(code, 200, "{}", response); + assert_eq!(response["facetHits"].as_array().unwrap().len(), 0); +} diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 6a55c7569..97556ae2a 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -2,6 +2,7 @@ // should be tested in its own module to isolate tests and keep the tests readable. mod errors; +mod facet_search; mod formatted; mod multi; mod pagination; diff --git a/milli/src/error.rs b/milli/src/error.rs index 61597faf3..1e8e767d3 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -128,6 +128,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco } )] InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, + #[error("Attribute `{}` is not facet-searchable. {}", + .field, + match .valid_fields.is_empty() { + true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(), + false => format!("Available facet-searchable attributes are: `{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.", + valid_fields.iter().map(AsRef::as_ref).collect::>().join(", ") + ), + } + )] + InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet }, #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.", .field, .valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), diff --git a/milli/src/heed_codec/fst_set_codec.rs b/milli/src/heed_codec/fst_set_codec.rs new file mode 100644 index 000000000..fc79acf29 --- /dev/null +++ b/milli/src/heed_codec/fst_set_codec.rs @@ -0,0 +1,23 @@ +use std::borrow::Cow; + +use fst::Set; +use heed::{BytesDecode, BytesEncode}; + +/// A codec for values of type `Set<&[u8]>`. +pub struct FstSetCodec; + +impl<'a> BytesEncode<'a> for FstSetCodec { + type EItem = Set>; + + fn bytes_encode(item: &'a Self::EItem) -> Option> { + Some(Cow::Borrowed(item.as_fst().as_bytes())) + } +} + +impl<'a> BytesDecode<'a> for FstSetCodec { + type DItem = Set<&'a [u8]>; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + Set::new(bytes).ok() + } +} diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index c54168a36..666f68e28 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -2,6 +2,7 @@ mod beu32_str_codec; mod byte_slice_ref; pub mod facet; mod field_id_word_count_codec; +mod fst_set_codec; mod obkv_codec; mod roaring_bitmap; mod roaring_bitmap_length; @@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec; pub use self::beu32_str_codec::BEU32StrCodec; pub use self::field_id_word_count_codec::FieldIdWordCountCodec; +pub use self::fst_set_codec::FstSetCodec; pub use self::obkv_codec::ObkvCodec; pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; pub use self::roaring_bitmap_length::{ diff --git a/milli/src/index.rs b/milli/src/index.rs index a22901993..5c32f75f5 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec, OrderedF64Codec, }; -use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec}; +use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec}; use crate::readable_slices::ReadableSlices; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, @@ -94,6 +94,7 @@ pub mod db_name { pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids"; pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; + pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst"; pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; pub const VECTOR_ID_DOCID: &str = "vector-id-docids"; @@ -154,6 +155,8 @@ pub struct Index { pub facet_id_f64_docids: Database, FacetGroupValueCodec>, /// Maps the facet field id and ranges of strings with the docids that corresponds to them. pub facet_id_string_docids: Database, FacetGroupValueCodec>, + /// Maps the facet field id of the string facets with an FST containing all the facets values. + pub facet_id_string_fst: Database, FstSetCodec>, /// Maps the document id, the facet field id and the numbers. pub field_id_docid_facet_f64s: Database, @@ -206,6 +209,7 @@ impl Index { let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?; let facet_id_string_docids = env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?; + let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?; let facet_id_exists_docids = env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?; let facet_id_is_null_docids = @@ -240,6 +244,7 @@ impl Index { field_id_word_count_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_string_fst, facet_id_exists_docids, facet_id_is_null_docids, facet_id_is_empty_docids, diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 99126f60e..4360eb38e 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -57,8 +57,9 @@ pub use self::heed_codec::{ }; pub use self::index::Index; pub use self::search::{ - FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search, - SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, + FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, + MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, + DEFAULT_VALUES_PER_FACET, }; pub type Result = std::result::Result; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index dc25c0f23..e05562f8e 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,15 +1,21 @@ use std::fmt; +use fst::automaton::{Automaton, Str}; +use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; +use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords}; use self::new::PartialSearchResult; +use crate::error::UserError; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, + execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, + Result, SearchContext, BEU16, }; // Building these factories is not free. @@ -17,6 +23,9 @@ static LEVDIST0: Lazy = Lazy::new(|| LevBuilder::new(0, true)); static LEVDIST1: Lazy = Lazy::new(|| LevBuilder::new(1, true)); static LEVDIST2: Lazy = Lazy::new(|| LevBuilder::new(2, true)); +/// The maximum number of facets returned by the facet search route. +const MAX_NUMBER_OF_FACETS: usize = 100; + pub mod facet; mod fst_utils; pub mod new; @@ -234,6 +243,195 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { } } +pub struct SearchForFacetValues<'a> { + query: Option, + facet: String, + search_query: Search<'a>, +} + +impl<'a> SearchForFacetValues<'a> { + pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> { + SearchForFacetValues { query: None, facet, search_query } + } + + pub fn query(&mut self, query: impl Into) -> &mut Self { + self.query = Some(query.into()); + self + } + + fn one_original_value_of( + &self, + field_id: FieldId, + facet_str: &str, + any_docid: DocumentId, + ) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); + Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) + } + + pub fn execute(&self) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + + let filterable_fields = index.filterable_fields(rtxn)?; + if !filterable_fields.contains(&self.facet) { + return Err(UserError::InvalidFacetSearchFacetName { + field: self.facet.clone(), + valid_fields: filterable_fields.into_iter().collect(), + } + .into()); + } + + let fields_ids_map = index.fields_ids_map(rtxn)?; + let fid = match fields_ids_map.id(&self.facet) { + Some(fid) => fid, + // we return an empty list of results when the attribute has been + // set as filterable but no document contains this field (yet). + None => return Ok(Vec::new()), + }; + + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { + Some(fst) => fst, + None => return Ok(vec![]), + }; + + let search_candidates = self.search_query.execute()?.candidates; + + match self.query.as_ref() { + Some(query) => { + let query = normalize_facet(query); + let query = query.as_str(); + let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; + let field_authorizes_typos = + !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); + + if authorize_typos && field_authorizes_typos { + let mut results = vec![]; + + let exact_words_fst = self.search_query.index.exact_words(rtxn)?; + if exact_words_fst.map_or(false, |fst| fst.contains(query)) { + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query }; + if let Some(FacetGroupValue { bitmap, .. }) = + index.facet_id_string_docids.get(rtxn, &key)? + { + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, query, bitmap.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); + } + } + } else { + let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; + let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; + + let is_prefix = true; + let automaton = if query.len() < one_typo as usize { + build_dfa(query, 0, is_prefix) + } else if query.len() < two_typos as usize { + build_dfa(query, 1, is_prefix) + } else { + build_dfa(query, 2, is_prefix) + }; + + let mut stream = fst.search(automaton).into_stream(); + let mut length = 0; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!( + "the facet value is missing from the facet database: {key:?}" + ); + continue; + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + let value = self + .one_original_value_of(fid, value, docids.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; + } + } + } + + Ok(results) + } else { + let automaton = Str::new(query).starts_with(); + let mut stream = fst.search(automaton).into_stream(); + let mut results = vec![]; + let mut length = 0; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!( + "the facet value is missing from the facet database: {key:?}" + ); + continue; + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + let value = self + .one_original_value_of(fid, value, docids.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; + } + } + + Ok(results) + } + } + None => { + let mut results = vec![]; + let mut length = 0; + let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + results.push(FacetValueHit { value, count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; + } + } + Ok(results) + } + } + } +} + +#[derive(Debug, Clone, serde::Serialize, PartialEq)] +pub struct FacetValueHit { + /// The original facet value + pub value: String, + /// The number of documents associated to this facet + pub count: u64, +} + #[cfg(test)] mod test { #[allow(unused_imports)] diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index f4a2d43fe..5fdf8ef49 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -34,6 +34,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { script_language_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_string_fst, facet_id_exists_docids, facet_id_is_null_docids, facet_id_is_empty_docids, @@ -91,6 +92,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { word_prefix_fid_docids.clear(self.wtxn)?; script_language_docids.clear(self.wtxn)?; facet_id_f64_docids.clear(self.wtxn)?; + facet_id_string_fst.clear(self.wtxn)?; facet_id_exists_docids.clear(self.wtxn)?; facet_id_is_null_docids.clear(self.wtxn)?; facet_id_is_empty_docids.clear(self.wtxn)?; diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 766f0e16e..c9124e591 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -237,6 +237,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { word_prefix_fid_docids, facet_id_f64_docids: _, facet_id_string_docids: _, + facet_id_string_fst: _, field_id_docid_facet_f64s: _, field_id_docid_facet_strings: _, script_language_docids, diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 2fd748d4d..0e6fd494c 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5; use std::fs::File; +use heed::types::DecodeIgnore; use log::debug; use time::OffsetDateTime; use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; -use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::ByteSliceRefCodec; -use crate::{Index, Result}; +use crate::{Index, Result, BEU16}; pub mod bulk; pub mod delete; @@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> { ); incremental_update.execute(wtxn)?; } + + // We compute one FST by string facet + let mut text_fsts = vec![]; + let mut current_fst: Option<(u16, fst::SetBuilder>)> = None; + let database = self.index.facet_id_string_docids.remap_data_type::(); + for result in database.iter(wtxn)? { + let (facet_group_key, _) = result?; + if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { + current_fst = match current_fst.take() { + Some((fid, fst_builder)) if fid != field_id => { + let fst = fst_builder.into_set(); + text_fsts.push((fid, fst)); + Some((field_id, fst::SetBuilder::memory())) + } + Some((field_id, fst_builder)) => Some((field_id, fst_builder)), + None => Some((field_id, fst::SetBuilder::memory())), + }; + + if let Some((_, fst_builder)) = current_fst.as_mut() { + fst_builder.insert(left_bound)?; + } + } + } + + if let Some((field_id, fst_builder)) = current_fst { + let fst = fst_builder.into_set(); + text_fsts.push((field_id, fst)); + } + + // We remove all of the previous FSTs that were in this database + self.index.facet_id_string_fst.clear(wtxn)?; + + // We write those FSTs in LMDB now + for (field_id, fst) in text_fsts { + self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?; + } + Ok(()) } } diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs deleted file mode 100644 index 8b1378917..000000000 --- a/milli/src/update/facets.rs +++ /dev/null @@ -1 +0,0 @@ -