Add the count to the facet distribution

This commit is contained in:
Kerollmops 2021-01-26 14:14:37 +01:00
parent 4b9e81fc89
commit 7be275b692
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 46 additions and 22 deletions

1
http-ui/Cargo.lock generated
View File

@ -1335,6 +1335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fe9037165d7023b1228bc4ae9a2fa1a2b0095eca6c2998c624723dfd01314a5"
dependencies = [
"num-traits",
"serde",
]
[[package]]

View File

@ -57,7 +57,7 @@ $('#query, #filters').on('input', function () {
}
// Create the newly discovered facets
let diff = diffArray(data.facets[facet_name], selected_values);
let diff = diffArray(Object.keys(data.facets[facet_name]), selected_values);
for (value of diff) {
let option = $('<option></option>')
.text(value)

View File

@ -1,4 +1,4 @@
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Display;
use std::fs::{File, create_dir_all};
use std::net::SocketAddr;
@ -29,6 +29,7 @@ use warp::filters::ws::Message;
use warp::{Filter, http::Response};
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use milli::facet::FacetValue;
use milli::update::UpdateIndexingStep::*;
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
@ -652,7 +653,7 @@ async fn main() -> anyhow::Result<()> {
struct Answer {
documents: Vec<Map<String, Value>>,
number_of_candidates: u64,
facets: HashMap<String, Vec<Value>>,
facets: BTreeMap<String, BTreeMap<FacetValue, u64>>,
}
let disable_highlighting = opt.disable_highlighting;

View File

@ -1,8 +1,7 @@
use ordered_float::OrderedFloat;
use serde::{Serialize, Deserialize};
use serde::{Serialize, Serializer};
#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
#[derive(Serialize, Deserialize)]
pub enum FacetValue {
String(String),
Float(OrderedFloat<f64>),
@ -38,3 +37,22 @@ impl From<i64> for FacetValue {
FacetValue::Integer(integer)
}
}
impl Serialize for FacetValue {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
FacetValue::String(string) => serializer.serialize_str(string),
FacetValue::Float(float) => {
let string = float.to_string();
serializer.serialize_str(&string)
},
FacetValue::Integer(integer) => {
let string = integer.to_string();
serializer.serialize_str(&string)
},
}
}
}

View File

@ -1,4 +1,4 @@
use std::collections::{HashSet, BTreeSet, BTreeMap};
use std::collections::{HashSet, BTreeMap};
use std::ops::Bound::Unbounded;
use std::{cmp, fmt};
@ -38,13 +38,18 @@ impl<'a> FacetDistribution<'a> {
self
}
fn facet_values(&self, field_id: FieldId, facet_type: FacetType) -> heed::Result<BTreeSet<FacetValue>> {
fn facet_values(
&self,
field_id: FieldId,
facet_type: FacetType,
) -> heed::Result<BTreeMap<FacetValue, u64>>
{
if let Some(candidates) = self.candidates.as_ref() {
if candidates.len() <= 1000 {
let mut key_buffer = vec![field_id];
match facet_type {
FacetType::Float => {
let mut facet_values = BTreeSet::new();
let mut facet_values = BTreeMap::new();
for docid in candidates {
key_buffer.truncate(1);
key_buffer.extend_from_slice(&docid.to_be_bytes());
@ -53,13 +58,13 @@ impl<'a> FacetDistribution<'a> {
.remap_key_type::<FieldDocIdFacetF64Codec>();
for result in iter {
let ((_, _, value), ()) = result?;
facet_values.insert(FacetValue::from(value));
*facet_values.entry(FacetValue::from(value)).or_insert(0) += 1;
}
}
Ok(facet_values)
},
FacetType::Integer => {
let mut facet_values = BTreeSet::new();
let mut facet_values = BTreeMap::new();
for docid in candidates {
key_buffer.truncate(1);
key_buffer.extend_from_slice(&docid.to_be_bytes());
@ -68,13 +73,13 @@ impl<'a> FacetDistribution<'a> {
.remap_key_type::<FieldDocIdFacetI64Codec>();
for result in iter {
let ((_, _, value), ()) = result?;
facet_values.insert(FacetValue::from(value));
*facet_values.entry(FacetValue::from(value)).or_insert(0) += 1;
}
}
Ok(facet_values)
},
FacetType::String => {
let mut facet_values = BTreeSet::new();
let mut facet_values = BTreeMap::new();
for docid in candidates {
key_buffer.truncate(1);
key_buffer.extend_from_slice(&docid.to_be_bytes());
@ -83,7 +88,7 @@ impl<'a> FacetDistribution<'a> {
.remap_key_type::<FieldDocIdFacetStringCodec>();
for result in iter {
let ((_, _, value), ()) = result?;
facet_values.insert(FacetValue::from(value));
*facet_values.entry(FacetValue::from(value)).or_insert(0) += 1;
}
}
Ok(facet_values)
@ -113,11 +118,12 @@ impl<'a> FacetDistribution<'a> {
},
};
let mut facet_values = BTreeSet::new();
let mut facet_values = BTreeMap::new();
for result in iter {
let (value, docids) = result?;
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) {
facet_values.insert(value);
let (value, mut docids) = result?;
docids.intersect_with(candidates);
if !docids.is_empty() {
facet_values.insert(value, docids.len());
}
if facet_values.len() == self.max_values_by_facet {
break;
@ -152,12 +158,10 @@ impl<'a> FacetDistribution<'a> {
},
};
let mut facet_values = BTreeSet::new();
let mut facet_values = BTreeMap::new();
for result in iter {
let (value, docids) = result?;
if self.candidates.as_ref().map_or(true, |c| docids.is_disjoint(c)) {
facet_values.insert(value);
}
facet_values.insert(value, docids.len());
if facet_values.len() == self.max_values_by_facet {
break;
}
@ -167,7 +171,7 @@ impl<'a> FacetDistribution<'a> {
}
}
pub fn execute(&self) -> heed::Result<BTreeMap<String, BTreeSet<FacetValue>>> {
pub fn execute(&self) -> heed::Result<BTreeMap<String, BTreeMap<FacetValue, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
let fields_ids: Vec<_> = match &self.facets {