apply all style review comments

This commit is contained in:
Tamo 2024-05-15 15:02:26 +02:00
parent 9fffb8e83d
commit 7ec4e2a3fb
6 changed files with 45 additions and 36 deletions

View File

@ -85,11 +85,13 @@ impl SearchQueue {
}, },
search_request = receive_new_searches.recv() => { search_request = receive_new_searches.recv() => {
if search_request.is_none() { let search_request = match search_request {
continue; Some(search_request) => search_request,
} // This should never happen while actix-web is running, but it's not a reason to crash
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web // and it can generate a lot of noise in the tests.
let search_request = search_request.unwrap(); None => continue,
};
if searches_running < usize::from(parallelism) && queue.is_empty() { if searches_running < usize::from(parallelism) && queue.is_empty() {
searches_running += 1; searches_running += 1;
// if the search requests die it's not a hard error on our side // if the search requests die it's not a hard error on our side

View File

@ -32,6 +32,8 @@ pub enum InternalError {
DatabaseClosing, DatabaseClosing,
#[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))] #[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> }, DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
#[error("Missing {key} in the fieldids weights mapping.")]
FieldidsWeightsMapMissingEntry { key: FieldId },
#[error(transparent)] #[error(transparent)]
FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry), FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
#[error("Missing {key} in the field id mapping.")] #[error("Missing {key} in the field id mapping.")]

View File

@ -1,3 +1,5 @@
//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
use std::collections::HashMap; use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -10,22 +12,29 @@ pub struct FieldidsWeightsMap {
} }
impl FieldidsWeightsMap { impl FieldidsWeightsMap {
/// Insert a field id -> weigth into the map.
/// If the map did not have this key present, `None` is returned.
/// If the map did have this key present, the value is updated, and the old value is returned.
pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> { pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
self.map.insert(fid, weight) self.map.insert(fid, weight)
} }
/// Removes a field id from the map, returning the associated weight previously in the map.
pub fn remove(&mut self, fid: FieldId) -> Option<Weight> { pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
self.map.remove(&fid) self.map.remove(&fid)
} }
/// Returns weight corresponding to the key.
pub fn weight(&self, fid: FieldId) -> Option<Weight> { pub fn weight(&self, fid: FieldId) -> Option<Weight> {
self.map.get(&fid).copied() self.map.get(&fid).copied()
} }
/// Returns highest weight contained in the map if any.
pub fn max_weight(&self) -> Option<Weight> { pub fn max_weight(&self) -> Option<Weight> {
self.map.values().copied().max() self.map.values().copied().max()
} }
/// Return an iterator visiting all field ids in arbitrary order.
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ { pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
self.map.keys().copied() self.map.keys().copied()
} }

View File

@ -26,9 +26,9 @@ use crate::proximity::ProximityPrecision;
use crate::vector::EmbeddingConfig; use crate::vector::EmbeddingConfig;
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Weight, BEU16, BEU32, BEU64, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
}; };
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -446,22 +446,25 @@ impl Index {
pub fn searchable_fields_and_weights<'a>( pub fn searchable_fields_and_weights<'a>(
&self, &self,
rtxn: &'a RoTxn, rtxn: &'a RoTxn,
) -> heed::Result<Vec<(Cow<'a, str>, FieldId, Weight)>> { ) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
let fid_map = self.fields_ids_map(rtxn)?; let fid_map = self.fields_ids_map(rtxn)?;
let weight_map = self.fieldids_weights_map(rtxn)?; let weight_map = self.fieldids_weights_map(rtxn)?;
let searchable = self.searchable_fields(rtxn)?; let searchable = self.searchable_fields(rtxn)?;
Ok(searchable searchable
.into_iter() .into_iter()
.map(|field| { .map(|field| -> Result<_> {
// the searchable attributes are a subset of the field id map let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
let fid = fid_map.id(&field).unwrap(); field_name: field.to_string(),
// all the searchable fields have a weight process: "searchable_fields_and_weights",
let weight = weight_map.weight(fid).unwrap(); })?;
let weight = weight_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
(field, fid, weight) Ok((field, fid, weight))
}) })
.collect()) .collect()
} }
/* geo rtree */ /* geo rtree */

View File

@ -7,7 +7,7 @@ use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
use crate::search::new::SearchContext; use crate::search::new::SearchContext;
use crate::{FieldId, Result}; use crate::{FieldId, InternalError, Result};
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
pub struct FidCondition { pub struct FidCondition {
@ -29,10 +29,9 @@ impl RankingRuleGraphTrait for FidGraph {
let docids = if let Some(fid) = condition.fid { let docids = if let Some(fid) = condition.fid {
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument // maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
let mut docids = let docids =
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?; compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
docids &= universe; docids & universe
docids
} else { } else {
RoaringBitmap::new() RoaringBitmap::new()
}; };
@ -75,7 +74,9 @@ impl RankingRuleGraphTrait for FidGraph {
let mut edges = vec![]; let mut edges = vec![];
for fid in all_fields.iter().copied() { for fid in all_fields.iter().copied() {
let weight = weights_map.weight(fid).unwrap(); let weight = weights_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
edges.push(( edges.push((
weight as u32 * term.term_ids.len() as u32, weight as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }), conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),

View File

@ -475,33 +475,25 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
return Ok(false); return Ok(false);
} }
// every time the searchable attributes are updated, we need to update the
// ids for any settings that uses the facets. (distinct_fields, filterable_fields).
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// Since we're updating the settings we can only add new fields at the end of the field id map // Since we're updating the settings we can only add new fields at the end of the field id map
let mut new_fields_ids_map = old_fields_ids_map.clone(); let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let names = fields // fields are deduplicated, only the first occurrence is taken into account
.iter() let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
// fields are deduplicated, only the first occurrence is taken into account
.unique()
.map(String::as_str)
.collect::<Vec<_>>();
// Add all the searchable attributes to the field map, and then add the // Add all the searchable attributes to the field map, and then add the
// remaining fields from the old field map to the new one // remaining fields from the old field map to the new one
for name in names.iter() { for name in names.iter() {
// The fields ids map won't change the field id of already present elements thus only the // The fields ids map won't change the field id of already present elements thus only the
// new fields will be inserted. // new fields will be inserted.
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
} }
self.index.put_all_searchable_fields_from_fields_ids_map( self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn, self.wtxn,
Some(&names), Some(&names),
&new_fields_ids_map, &fields_ids_map,
)?; )?;
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
Ok(true) Ok(true)
} }
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?), Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),