From 9e7c455a0177a829e1ff3a536951e51808e04f1f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 28 Oct 2024 14:09:48 +0100 Subject: [PATCH] GlobalFieldIdMap manages metadata --- milli/src/fields_ids_map.rs | 1 + milli/src/fields_ids_map/global.rs | 61 ++++++--- milli/src/fields_ids_map/metadata.rs | 184 +++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 18 deletions(-) create mode 100644 milli/src/fields_ids_map/metadata.rs diff --git a/milli/src/fields_ids_map.rs b/milli/src/fields_ids_map.rs index af96f6a86..9a016e7bd 100644 --- a/milli/src/fields_ids_map.rs +++ b/milli/src/fields_ids_map.rs @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::FieldId; mod global; +pub mod metadata; pub use global::GlobalFieldsIdsMap; #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/milli/src/fields_ids_map/global.rs b/milli/src/fields_ids_map/global.rs index 40d7f389b..2b948b377 100644 --- a/milli/src/fields_ids_map/global.rs +++ b/milli/src/fields_ids_map/global.rs @@ -1,14 +1,15 @@ use std::collections::BTreeMap; use std::sync::RwLock; +use super::metadata::{FieldIdMapWithMetadata, Metadata}; use super::MutFieldIdMapper; use crate::documents::FieldIdMapper; -use crate::{FieldId, FieldsIdsMap}; +use crate::FieldId; /// A fields ids map that can be globally updated to add fields #[derive(Debug, Clone)] pub struct GlobalFieldsIdsMap<'indexing> { - global: &'indexing RwLock, + global: &'indexing RwLock, local: LocalFieldsIdsMap, } @@ -16,6 +17,7 @@ pub struct GlobalFieldsIdsMap<'indexing> { pub struct LocalFieldsIdsMap { names_ids: BTreeMap, ids_names: BTreeMap, + metadata: BTreeMap, } impl FieldIdMapper for LocalFieldsIdsMap { @@ -29,14 +31,19 @@ impl FieldIdMapper for LocalFieldsIdsMap { } impl LocalFieldsIdsMap { - fn new(global: &RwLock) -> Self { + fn new(global: &RwLock) -> Self { let global = global.read().unwrap(); - Self { names_ids: global.names_ids.clone(), ids_names: global.ids_names.clone() } + Self { + names_ids: global.as_fields_ids_map().names_ids.clone(), + ids_names: global.as_fields_ids_map().ids_names.clone(), + metadata: global.iter_id_metadata().collect(), + } } - fn insert(&mut self, name: &str, field_id: FieldId) { + fn insert(&mut self, name: &str, field_id: FieldId, metadata: Metadata) { self.names_ids.insert(name.to_owned(), field_id); self.ids_names.insert(field_id, name.to_owned()); + self.metadata.insert(field_id, metadata); } fn name(&self, id: FieldId) -> Option<&str> { @@ -46,41 +53,59 @@ impl LocalFieldsIdsMap { fn id(&self, name: &str) -> Option { self.names_ids.get(name).copied() } + + fn id_with_metadata(&self, name: &str) -> Option<(FieldId, Metadata)> { + let id = self.id(name)?; + Some((id, self.metadata(id).unwrap())) + } + + fn metadata(&self, id: FieldId) -> Option { + self.metadata.get(&id).copied() + } + + fn iter(&self) -> impl Iterator { + self.ids_names.iter().map(|(k, v)| (*k, v.as_str(), self.metadata.get(k).copied().unwrap())) + } } impl<'indexing> GlobalFieldsIdsMap<'indexing> { - pub fn new(global: &'indexing RwLock) -> Self { + pub fn new(global: &'indexing RwLock) -> Self { Self { local: LocalFieldsIdsMap::new(global), global } } /// Returns the field id related to a field name, it will create a new field id if the /// name is not already known. Returns `None` if the maximum field id as been reached. pub fn id_or_insert(&mut self, name: &str) -> Option { - if let Some(field_id) = self.local.id(name) { - return Some(field_id); + self.id_with_metadata_or_insert(name).map(|(fid, _meta)| fid) + } + + pub fn id_with_metadata_or_insert(&mut self, name: &str) -> Option<(FieldId, Metadata)> { + if let Some(entry) = self.local.id_with_metadata(name) { + return Some(entry); } { // optimistically lookup the global map let global = self.global.read().unwrap(); - if let Some(field_id) = global.id(name) { - self.local.insert(name, field_id); - return Some(field_id); + if let Some((field_id, metadata)) = global.id_with_metadata(name) { + self.local.insert(name, field_id, metadata); + return Some((field_id, metadata)); } } { let mut global = self.global.write().unwrap(); - if let Some(field_id) = global.id(name) { - self.local.insert(name, field_id); - return Some(field_id); + if let Some((field_id, metadata)) = global.id_with_metadata(name) { + self.local.insert(name, field_id, metadata); + return Some((field_id, metadata)); } let field_id = global.insert(name)?; - self.local.insert(name, field_id); - Some(field_id) + let metadata = global.metadata(field_id).unwrap(); + self.local.insert(name, field_id, metadata); + Some((field_id, metadata)) } } @@ -89,8 +114,8 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> { if self.local.name(id).is_none() { let global = self.global.read().unwrap(); - let name = global.name(id)?; - self.local.insert(name, id); + let (name, metadata) = global.name_with_metadata(id)?; + self.local.insert(name, id, metadata); } self.local.name(id) diff --git a/milli/src/fields_ids_map/metadata.rs b/milli/src/fields_ids_map/metadata.rs new file mode 100644 index 000000000..8198bd415 --- /dev/null +++ b/milli/src/fields_ids_map/metadata.rs @@ -0,0 +1,184 @@ +use std::collections::{BTreeMap, HashSet}; +use std::num::NonZeroU16; + +use charabia::Language; +use heed::RoTxn; + +use super::FieldsIdsMap; +use crate::{FieldId, Index, LocalizedAttributesRule, Result}; + +#[derive(Debug, Clone, Copy)] +pub struct Metadata { + pub searchable: bool, + pub filterable: bool, + pub sortable: bool, + localized_attributes_rule_id: Option, +} + +#[derive(Debug, Clone)] +pub struct FieldIdMapWithMetadata { + fields_ids_map: FieldsIdsMap, + builder: MetadataBuilder, + metadata: BTreeMap, +} + +impl FieldIdMapWithMetadata { + pub fn new(existing_fields_ids_map: FieldsIdsMap, builder: MetadataBuilder) -> Self { + let metadata = existing_fields_ids_map + .iter() + .map(|(id, name)| (id, builder.metadata_for_field(name))) + .collect(); + Self { fields_ids_map: existing_fields_ids_map, builder, metadata } + } + + pub fn as_fields_ids_map(&self) -> &FieldsIdsMap { + &self.fields_ids_map + } + + /// Returns the number of fields ids in the map. + pub fn len(&self) -> usize { + self.fields_ids_map.len() + } + + /// Returns `true` if the map is empty. + pub fn is_empty(&self) -> bool { + self.fields_ids_map.is_empty() + } + + /// Returns the field id related to a field name, it will create a new field id if the + /// name is not already known. Returns `None` if the maximum field id as been reached. + pub fn insert(&mut self, name: &str) -> Option { + let id = self.fields_ids_map.insert(name)?; + self.metadata.insert(id, self.builder.metadata_for_field(name)); + Some(id) + } + + /// Get the id of a field based on its name. + pub fn id(&self, name: &str) -> Option { + self.fields_ids_map.id(name) + } + + pub fn id_with_metadata(&self, name: &str) -> Option<(FieldId, Metadata)> { + let id = self.fields_ids_map.id(name)?; + Some((id, self.metadata(id).unwrap())) + } + + /// Get the name of a field based on its id. + pub fn name(&self, id: FieldId) -> Option<&str> { + self.fields_ids_map.name(id) + } + + /// Get the name of a field based on its id. + pub fn name_with_metadata(&self, id: FieldId) -> Option<(&str, Metadata)> { + let name = self.fields_ids_map.name(id)?; + Some((name, self.metadata(id).unwrap())) + } + + pub fn metadata(&self, id: FieldId) -> Option { + self.metadata.get(&id).copied() + } + + /// Iterate over the ids and names in the ids order. + pub fn iter(&self) -> impl Iterator { + self.fields_ids_map.iter().map(|(id, name)| (id, name, self.metadata(id).unwrap())) + } + + pub fn iter_id_metadata(&self) -> impl Iterator + '_ { + self.metadata.iter().map(|(k, v)| (*k, *v)) + } + + pub fn iter_metadata(&self) -> impl Iterator + '_ { + self.metadata.values().copied() + } + + pub fn metadata_builder(&self) -> &MetadataBuilder { + &self.builder + } +} + +impl Metadata { + pub fn locales<'rules>( + &self, + rules: &'rules [LocalizedAttributesRule], + ) -> Option<&'rules [Language]> { + let localized_attributes_rule_id = self.localized_attributes_rule_id?.get(); + let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap(); + Some(rule.locales()) + } +} + +#[derive(Debug, Clone)] +pub struct MetadataBuilder { + searchable_attributes: Vec, + filterable_attributes: HashSet, + sortable_attributes: HashSet, + localized_attributes: Option>, +} + +impl MetadataBuilder { + pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result { + let searchable_attributes = + index.searchable_fields(rtxn)?.into_iter().map(|s| s.to_string()).collect(); + let filterable_attributes = index.filterable_fields(rtxn)?; + let sortable_attributes = index.sortable_fields(rtxn)?; + let localized_attributes = index.localized_attributes_rules(rtxn)?; + + Ok(Self { + searchable_attributes, + filterable_attributes, + sortable_attributes, + localized_attributes, + }) + } + + pub fn new( + searchable_attributes: Vec, + filterable_attributes: HashSet, + sortable_attributes: HashSet, + localized_attributes: Option>, + ) -> Self { + Self { + searchable_attributes, + filterable_attributes, + sortable_attributes, + localized_attributes, + } + } + + pub fn metadata_for_field(&self, field: &str) -> Metadata { + let searchable = self + .searchable_attributes + .iter() + .any(|attribute| attribute == "*" || attribute == field); + + let filterable = self.filterable_attributes.contains(field); + + let sortable = self.sortable_attributes.contains(field); + + let localized_attributes_rule_id = self + .localized_attributes + .iter() + .map(|v| v.iter()) + .flatten() + .position(|rule| rule.match_str(field)) + .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); + + Metadata { searchable, filterable, sortable, localized_attributes_rule_id } + } + + pub fn searchable_attributes(&self) -> &[String] { + self.searchable_attributes.as_slice() + } + + pub fn sortable_attributes(&self) -> &HashSet { + &self.sortable_attributes + } + + pub fn filterable_attributes(&self) -> &HashSet { + &self.filterable_attributes + } + + pub fn localized_attributes_rules(&self) -> Option<&[LocalizedAttributesRule]> { + self.localized_attributes.as_deref() + } +}