Add the count to the facet distribution

This commit is contained in:
Kerollmops 2021-01-26 14:14:37 +01:00
parent 4b9e81fc89
commit 7be275b692
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 46 additions and 22 deletions

1
http-ui/Cargo.lock generated
View File

@ -1335,6 +1335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fe9037165d7023b1228bc4ae9a2fa1a2b0095eca6c2998c624723dfd01314a5" checksum = "9fe9037165d7023b1228bc4ae9a2fa1a2b0095eca6c2998c624723dfd01314a5"
dependencies = [ dependencies = [
"num-traits", "num-traits",
"serde",
] ]
[[package]] [[package]]

View File

@ -57,7 +57,7 @@ $('#query, #filters').on('input', function () {
} }
// Create the newly discovered facets // Create the newly discovered facets
let diff = diffArray(data.facets[facet_name], selected_values); let diff = diffArray(Object.keys(data.facets[facet_name]), selected_values);
for (value of diff) { for (value of diff) {
let option = $('<option></option>') let option = $('<option></option>')
.text(value) .text(value)

View File

@ -1,4 +1,4 @@
use std::collections::{HashMap, HashSet}; use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Display; use std::fmt::Display;
use std::fs::{File, create_dir_all}; use std::fs::{File, create_dir_all};
use std::net::SocketAddr; use std::net::SocketAddr;
@ -29,6 +29,7 @@ use warp::filters::ws::Message;
use warp::{Filter, http::Response}; use warp::{Filter, http::Response};
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use milli::facet::FacetValue;
use milli::update::UpdateIndexingStep::*; use milli::update::UpdateIndexingStep::*;
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat}; use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition}; use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
@ -652,7 +653,7 @@ async fn main() -> anyhow::Result<()> {
struct Answer { struct Answer {
documents: Vec<Map<String, Value>>, documents: Vec<Map<String, Value>>,
number_of_candidates: u64, number_of_candidates: u64,
facets: HashMap<String, Vec<Value>>, facets: BTreeMap<String, BTreeMap<FacetValue, u64>>,
} }
let disable_highlighting = opt.disable_highlighting; let disable_highlighting = opt.disable_highlighting;

View File

@ -1,8 +1,7 @@
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use serde::{Serialize, Deserialize}; use serde::{Serialize, Serializer};
#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
#[derive(Serialize, Deserialize)]
pub enum FacetValue { pub enum FacetValue {
String(String), String(String),
Float(OrderedFloat<f64>), Float(OrderedFloat<f64>),
@ -38,3 +37,22 @@ impl From<i64> for FacetValue {
FacetValue::Integer(integer) FacetValue::Integer(integer)
} }
} }
impl Serialize for FacetValue {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
FacetValue::String(string) => serializer.serialize_str(string),
FacetValue::Float(float) => {
let string = float.to_string();
serializer.serialize_str(&string)
},
FacetValue::Integer(integer) => {
let string = integer.to_string();
serializer.serialize_str(&string)
},
}
}
}

View File

@ -1,4 +1,4 @@
use std::collections::{HashSet, BTreeSet, BTreeMap}; use std::collections::{HashSet, BTreeMap};
use std::ops::Bound::Unbounded; use std::ops::Bound::Unbounded;
use std::{cmp, fmt}; use std::{cmp, fmt};
@ -38,13 +38,18 @@ impl<'a> FacetDistribution<'a> {
self self
} }
fn facet_values(&self, field_id: FieldId, facet_type: FacetType) -> heed::Result<BTreeSet<FacetValue>> { fn facet_values(
&self,
field_id: FieldId,
facet_type: FacetType,
) -> heed::Result<BTreeMap<FacetValue, u64>>
{
if let Some(candidates) = self.candidates.as_ref() { if let Some(candidates) = self.candidates.as_ref() {
if candidates.len() <= 1000 { if candidates.len() <= 1000 {
let mut key_buffer = vec![field_id]; let mut key_buffer = vec![field_id];
match facet_type { match facet_type {
FacetType::Float => { FacetType::Float => {
let mut facet_values = BTreeSet::new(); let mut facet_values = BTreeMap::new();
for docid in candidates { for docid in candidates {
key_buffer.truncate(1); key_buffer.truncate(1);
key_buffer.extend_from_slice(&docid.to_be_bytes()); key_buffer.extend_from_slice(&docid.to_be_bytes());
@ -53,13 +58,13 @@ impl<'a> FacetDistribution<'a> {
.remap_key_type::<FieldDocIdFacetF64Codec>(); .remap_key_type::<FieldDocIdFacetF64Codec>();
for result in iter { for result in iter {
let ((_, _, value), ()) = result?; let ((_, _, value), ()) = result?;
facet_values.insert(FacetValue::from(value)); *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1;
} }
} }
Ok(facet_values) Ok(facet_values)
}, },
FacetType::Integer => { FacetType::Integer => {
let mut facet_values = BTreeSet::new(); let mut facet_values = BTreeMap::new();
for docid in candidates { for docid in candidates {
key_buffer.truncate(1); key_buffer.truncate(1);
key_buffer.extend_from_slice(&docid.to_be_bytes()); key_buffer.extend_from_slice(&docid.to_be_bytes());
@ -68,13 +73,13 @@ impl<'a> FacetDistribution<'a> {
.remap_key_type::<FieldDocIdFacetI64Codec>(); .remap_key_type::<FieldDocIdFacetI64Codec>();
for result in iter { for result in iter {
let ((_, _, value), ()) = result?; let ((_, _, value), ()) = result?;
facet_values.insert(FacetValue::from(value)); *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1;
} }
} }
Ok(facet_values) Ok(facet_values)
}, },
FacetType::String => { FacetType::String => {
let mut facet_values = BTreeSet::new(); let mut facet_values = BTreeMap::new();
for docid in candidates { for docid in candidates {
key_buffer.truncate(1); key_buffer.truncate(1);
key_buffer.extend_from_slice(&docid.to_be_bytes()); key_buffer.extend_from_slice(&docid.to_be_bytes());
@ -83,7 +88,7 @@ impl<'a> FacetDistribution<'a> {
.remap_key_type::<FieldDocIdFacetStringCodec>(); .remap_key_type::<FieldDocIdFacetStringCodec>();
for result in iter { for result in iter {
let ((_, _, value), ()) = result?; let ((_, _, value), ()) = result?;
facet_values.insert(FacetValue::from(value)); *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1;
} }
} }
Ok(facet_values) Ok(facet_values)
@ -113,11 +118,12 @@ impl<'a> FacetDistribution<'a> {
}, },
}; };
let mut facet_values = BTreeSet::new(); let mut facet_values = BTreeMap::new();
for result in iter { for result in iter {
let (value, docids) = result?; let (value, mut docids) = result?;
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) { docids.intersect_with(candidates);
facet_values.insert(value); if !docids.is_empty() {
facet_values.insert(value, docids.len());
} }
if facet_values.len() == self.max_values_by_facet { if facet_values.len() == self.max_values_by_facet {
break; break;
@ -152,12 +158,10 @@ impl<'a> FacetDistribution<'a> {
}, },
}; };
let mut facet_values = BTreeSet::new(); let mut facet_values = BTreeMap::new();
for result in iter { for result in iter {
let (value, docids) = result?; let (value, docids) = result?;
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) { facet_values.insert(value, docids.len());
facet_values.insert(value);
}
if facet_values.len() == self.max_values_by_facet { if facet_values.len() == self.max_values_by_facet {
break; break;
} }
@ -167,7 +171,7 @@ impl<'a> FacetDistribution<'a> {
} }
} }
pub fn execute(&self) -> heed::Result<BTreeMap<String, BTreeSet<FacetValue>>> { pub fn execute(&self) -> heed::Result<BTreeMap<String, BTreeMap<FacetValue, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let faceted_fields = self.index.faceted_fields(self.rtxn)?; let faceted_fields = self.index.faceted_fields(self.rtxn)?;
let fields_ids: Vec<_> = match &self.facets { let fields_ids: Vec<_> = match &self.facets {