From b31158528020d204e2e604b949fcb257374cb1eb Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 13 Nov 2024 13:50:10 +0100 Subject: [PATCH] Fix facet strings --- crates/milli/src/update/new/channel.rs | 4 ++-- .../new/extract/faceted/extract_facets.rs | 23 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index 3287a1f7f..fbf102f18 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -364,9 +364,9 @@ impl DocidsSender for FacetDocidsSender<'_> { pub struct FieldIdDocidFacetSender<'a>(&'a ExtractorSender); impl FieldIdDocidFacetSender<'_> { - pub fn write_facet_string(&self, key: &[u8]) -> StdResult<(), SendError<()>> { + pub fn write_facet_string(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> { debug_assert!(FieldDocIdFacetStringCodec::bytes_decode(key).is_ok()); - let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(&key, &[])); + let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(&key, &value)); self.0 .send_db_operation(DbOperation { database: Database::FieldIdDocidFacetStrings, entry }) } diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 0e7dcc4b9..89223bc55 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -290,22 +290,22 @@ impl<'doc> DelAddFacetValue<'doc> { sender: &FieldIdDocidFacetSender, doc_alloc: &Bump, ) -> std::result::Result<(), crossbeam_channel::SendError<()>> { - println!("sending FieldIdDocidFacet data"); - let mut count = 0; let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc); for ((fid, value), deladd) in self.strings { - buffer.clear(); - buffer.extend_from_slice(&fid.to_be_bytes()); - buffer.extend_from_slice(&docid.to_be_bytes()); - buffer.extend_from_slice(&value); - match deladd { - DelAdd::Deletion => sender.delete_facet_string(&buffer)?, - DelAdd::Addition => sender.write_facet_string(&buffer)?, + if let Ok(s) = std::str::from_utf8(&value) { + buffer.clear(); + buffer.extend_from_slice(&fid.to_be_bytes()); + buffer.extend_from_slice(&docid.to_be_bytes()); + let normalized = crate::normalize_facet(s); + let truncated = truncate_str(&normalized); + buffer.extend_from_slice(truncated.as_bytes()); + match deladd { + DelAdd::Deletion => sender.delete_facet_string(&buffer)?, + DelAdd::Addition => sender.write_facet_string(&buffer, &value)?, + } } - count += 1; } - count = 0; for ((fid, value), deladd) in self.f64s { buffer.clear(); buffer.extend_from_slice(&fid.to_be_bytes()); @@ -315,7 +315,6 @@ impl<'doc> DelAddFacetValue<'doc> { DelAdd::Deletion => sender.delete_facet_f64(&buffer)?, DelAdd::Addition => sender.write_facet_f64(&buffer)?, } - count += 1; } Ok(())