mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Support a basic facet based query system
This commit is contained in:
parent
1d5795d134
commit
2341b99379
@ -28,7 +28,7 @@ use warp::{Filter, http::Response};
|
|||||||
use milli::tokenizer::{simple_tokenizer, TokenType};
|
use milli::tokenizer::{simple_tokenizer, TokenType};
|
||||||
use milli::update::UpdateIndexingStep::*;
|
use milli::update::UpdateIndexingStep::*;
|
||||||
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
|
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
|
||||||
use milli::{obkv_to_json, Index, UpdateStore, SearchResult};
|
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
|
||||||
|
|
||||||
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
|
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
|
||||||
|
|
||||||
@ -550,9 +550,12 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.body(include_str!("../public/logo-black.svg"))
|
.body(include_str!("../public/logo-black.svg"))
|
||||||
);
|
);
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
|
#[serde(deny_unknown_fields)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
struct QueryBody {
|
struct QueryBody {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
|
facet_condition: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
let disable_highlighting = opt.disable_highlighting;
|
let disable_highlighting = opt.disable_highlighting;
|
||||||
@ -569,6 +572,10 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
if let Some(query) = query.query {
|
if let Some(query) = query.query {
|
||||||
search.query(query);
|
search.query(query);
|
||||||
}
|
}
|
||||||
|
if let Some(condition) = query.facet_condition {
|
||||||
|
let condition = FacetCondition::from_str(&rtxn, &index, &condition).unwrap();
|
||||||
|
search.facet_condition(condition);
|
||||||
|
}
|
||||||
|
|
||||||
let SearchResult { found_words, documents_ids } = search.execute().unwrap();
|
let SearchResult { found_words, documents_ids } = search.execute().unwrap();
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ pub use self::criterion::{Criterion, default_criteria};
|
|||||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
pub use self::fields_ids_map::FieldsIdsMap;
|
pub use self::fields_ids_map::FieldsIdsMap;
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{Search, SearchResult};
|
pub use self::search::{Search, FacetCondition, SearchResult};
|
||||||
pub use self::heed_codec::{
|
pub use self::heed_codec::{
|
||||||
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec,
|
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec,
|
||||||
ObkvCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
ObkvCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
||||||
|
165
src/search.rs
165
src/search.rs
@ -1,6 +1,8 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use anyhow::{bail, ensure, Context};
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use levenshtein_automata::DFA;
|
use levenshtein_automata::DFA;
|
||||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||||
@ -8,8 +10,10 @@ use log::debug;
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
use crate::query_tokens::{QueryTokens, QueryToken};
|
use crate::facet::FacetType;
|
||||||
|
use crate::heed_codec::{CboRoaringBitmapCodec, facet::FacetValueI64Codec};
|
||||||
use crate::mdfs::Mdfs;
|
use crate::mdfs::Mdfs;
|
||||||
|
use crate::query_tokens::{QueryTokens, QueryToken};
|
||||||
use crate::{Index, DocumentId};
|
use crate::{Index, DocumentId};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
@ -17,8 +21,91 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
|||||||
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
||||||
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
||||||
|
|
||||||
|
// TODO support also floats
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||||
|
pub enum FacetOperator {
|
||||||
|
GreaterThan(i64),
|
||||||
|
GreaterThanOrEqual(i64),
|
||||||
|
LowerThan(i64),
|
||||||
|
LowerThanOrEqual(i64),
|
||||||
|
Equal(i64),
|
||||||
|
Between(i64, i64),
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO also support ANDs, ORs, NOTs.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||||
|
pub enum FacetCondition {
|
||||||
|
Operator(u8, FacetOperator),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FacetCondition {
|
||||||
|
pub fn from_str(
|
||||||
|
rtxn: &heed::RoTxn,
|
||||||
|
index: &Index,
|
||||||
|
string: &str,
|
||||||
|
) -> anyhow::Result<Option<FacetCondition>>
|
||||||
|
{
|
||||||
|
use FacetCondition::*;
|
||||||
|
use FacetOperator::*;
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||||
|
|
||||||
|
// TODO use a better parsing technic
|
||||||
|
let mut iter = string.split_whitespace();
|
||||||
|
|
||||||
|
let field_name = match iter.next() {
|
||||||
|
Some(field_name) => field_name,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
let field_id = fields_ids_map.id(&field_name).with_context(|| format!("field {} not found", field_name))?;
|
||||||
|
let field_type = faceted_fields.get(&field_id).with_context(|| format!("field {} is not faceted", field_name))?;
|
||||||
|
|
||||||
|
ensure!(*field_type == FacetType::Integer, "Only conditions on integer facets");
|
||||||
|
|
||||||
|
match iter.next() {
|
||||||
|
Some(">") => {
|
||||||
|
let param = iter.next().context("missing parameter")?;
|
||||||
|
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
|
||||||
|
Ok(Some(Operator(field_id, GreaterThan(value))))
|
||||||
|
},
|
||||||
|
Some(">=") => {
|
||||||
|
let param = iter.next().context("missing parameter")?;
|
||||||
|
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
|
||||||
|
Ok(Some(Operator(field_id, GreaterThanOrEqual(value))))
|
||||||
|
},
|
||||||
|
Some("<") => {
|
||||||
|
let param = iter.next().context("missing parameter")?;
|
||||||
|
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
|
||||||
|
Ok(Some(Operator(field_id, LowerThan(value))))
|
||||||
|
},
|
||||||
|
Some("<=") => {
|
||||||
|
let param = iter.next().context("missing parameter")?;
|
||||||
|
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
|
||||||
|
Ok(Some(Operator(field_id, LowerThanOrEqual(value))))
|
||||||
|
},
|
||||||
|
Some("=") => {
|
||||||
|
let param = iter.next().context("missing parameter")?;
|
||||||
|
let value = param.parse().with_context(|| format!("invalid parameter ({:?})", param))?;
|
||||||
|
Ok(Some(Operator(field_id, Equal(value))))
|
||||||
|
},
|
||||||
|
Some(otherwise) => {
|
||||||
|
// BETWEEN or X TO Y (both inclusive)
|
||||||
|
let left_param = otherwise.parse().with_context(|| format!("invalid first TO parameter ({:?})", otherwise))?;
|
||||||
|
ensure!(iter.next().map_or(false, |s| s.eq_ignore_ascii_case("to")), "TO keyword missing or invalid");
|
||||||
|
let next = iter.next().context("missing second TO parameter")?;
|
||||||
|
let right_param = next.parse().with_context(|| format!("invalid second TO parameter ({:?})", next))?;
|
||||||
|
Ok(Some(Operator(field_id, Between(left_param, right_param))))
|
||||||
|
},
|
||||||
|
None => bail!("missing facet filter first parameter"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Search<'a> {
|
pub struct Search<'a> {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
|
facet_condition: Option<FacetCondition>,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
@ -27,7 +114,7 @@ pub struct Search<'a> {
|
|||||||
|
|
||||||
impl<'a> Search<'a> {
|
impl<'a> Search<'a> {
|
||||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
||||||
Search { query: None, offset: 0, limit: 20, rtxn, index }
|
Search { query: None, facet_condition: None, offset: 0, limit: 20, rtxn, index }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Search<'a> {
|
pub fn query(&mut self, query: impl Into<String>) -> &mut Search<'a> {
|
||||||
@ -45,6 +132,11 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn facet_condition(&mut self, condition: FacetCondition) -> &mut Search<'a> {
|
||||||
|
self.facet_condition = Some(condition);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Extracts the query words from the query string and returns the DFAs accordingly.
|
/// Extracts the query words from the query string and returns the DFAs accordingly.
|
||||||
/// TODO introduce settings for the number of typos regarding the words lengths.
|
/// TODO introduce settings for the number of typos regarding the words lengths.
|
||||||
fn generate_query_dfas(query: &str) -> Vec<(String, bool, DFA)> {
|
fn generate_query_dfas(query: &str) -> Vec<(String, bool, DFA)> {
|
||||||
@ -135,22 +227,66 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
pub fn execute(&self) -> anyhow::Result<SearchResult> {
|
pub fn execute(&self) -> anyhow::Result<SearchResult> {
|
||||||
let limit = self.limit;
|
let limit = self.limit;
|
||||||
|
|
||||||
let fst = self.index.words_fst(self.rtxn)?;
|
let fst = self.index.words_fst(self.rtxn)?;
|
||||||
|
|
||||||
// Construct the DFAs related to the query words.
|
// Construct the DFAs related to the query words.
|
||||||
let dfas = match self.query.as_deref().map(Self::generate_query_dfas) {
|
let derived_words = match self.query.as_deref().map(Self::generate_query_dfas) {
|
||||||
Some(dfas) if !dfas.is_empty() => dfas,
|
Some(dfas) if !dfas.is_empty() => Some(self.fetch_words_docids(&fst, dfas)?),
|
||||||
_ => {
|
_otherwise => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// We create the original candidates with the facet conditions results.
|
||||||
|
let facet_candidates = match self.facet_condition {
|
||||||
|
Some(FacetCondition::Operator(fid, operator)) => {
|
||||||
|
use std::ops::Bound::{Included, Excluded};
|
||||||
|
use FacetOperator::*;
|
||||||
|
// Make sure we always bound the ranges with the field id, as the facets
|
||||||
|
// values are all in the same database and prefixed by the field id.
|
||||||
|
let range = match operator {
|
||||||
|
GreaterThan(val) => (Excluded((fid, val)), Included((fid, i64::MAX))),
|
||||||
|
GreaterThanOrEqual(val) => (Included((fid, val)), Included((fid, i64::MAX))),
|
||||||
|
LowerThan(val) => (Included((fid, i64::MIN)), Excluded((fid, val))),
|
||||||
|
LowerThanOrEqual(val) => (Included((fid, i64::MIN)), Included((fid, val))),
|
||||||
|
Equal(val) => (Included((fid, val)), Included((fid, val))),
|
||||||
|
Between(left, right) => (Included((fid, left)), Included((fid, right))),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
|
||||||
|
let db = self.index.facet_field_id_value_docids;
|
||||||
|
let db = db.remap_types::<FacetValueI64Codec, CboRoaringBitmapCodec>();
|
||||||
|
for result in db.range(self.rtxn, &range)? {
|
||||||
|
let ((_fid, _value), docids) = result?;
|
||||||
|
candidates.union_with(&docids);
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(candidates)
|
||||||
|
},
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let (candidates, derived_words) = match (facet_candidates, derived_words) {
|
||||||
|
(Some(mut facet_candidates), Some(derived_words)) => {
|
||||||
|
let words_candidates = Self::compute_candidates(&derived_words);
|
||||||
|
facet_candidates.intersect_with(&words_candidates);
|
||||||
|
(facet_candidates, derived_words)
|
||||||
|
},
|
||||||
|
(None, Some(derived_words)) => {
|
||||||
|
(Self::compute_candidates(&derived_words), derived_words)
|
||||||
|
},
|
||||||
|
(Some(facet_candidates), None) => {
|
||||||
|
// If the query is not set or results in no DFAs but
|
||||||
|
// there is some facet conditions we return a placeholder.
|
||||||
|
let documents_ids = facet_candidates.iter().take(limit).collect();
|
||||||
|
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||||
|
},
|
||||||
|
(None, None) => {
|
||||||
// If the query is not set or results in no DFAs we return a placeholder.
|
// If the query is not set or results in no DFAs we return a placeholder.
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?.iter().take(limit).collect();
|
let documents_ids = self.index.documents_ids(self.rtxn)?.iter().take(limit).collect();
|
||||||
return Ok(SearchResult { documents_ids, ..Default::default() })
|
return Ok(SearchResult { documents_ids, ..Default::default() })
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let derived_words = self.fetch_words_docids(&fst, dfas)?;
|
|
||||||
let candidates = Self::compute_candidates(&derived_words);
|
|
||||||
|
|
||||||
debug!("candidates: {:?}", candidates);
|
debug!("candidates: {:?}", candidates);
|
||||||
|
|
||||||
// The mana depth first search is a revised DFS that explore
|
// The mana depth first search is a revised DFS that explore
|
||||||
@ -175,6 +311,17 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Search<'_> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
f.debug_struct("Search")
|
||||||
|
.field("query", &self.query)
|
||||||
|
.field("facet_condition", &self.facet_condition)
|
||||||
|
.field("offset", &self.offset)
|
||||||
|
.field("limit", &self.limit)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub found_words: HashSet<String>,
|
pub found_words: HashSet<String>,
|
||||||
|
Loading…
Reference in New Issue
Block a user