From 0d4482625a974adb24bfa70f97cf78acd006880f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 22 Nov 2023 18:21:19 +0100 Subject: [PATCH] Make the changes to use heed v0.20-alpha.6 --- Cargo.lock | 11 +- fuzzers/src/bin/fuzz-indexing.rs | 2 +- index-scheduler/src/batch.rs | 20 +- index-scheduler/src/index_mapper/index_map.rs | 5 +- index-scheduler/src/lib.rs | 34 +- index-scheduler/src/utils.rs | 24 +- index-scheduler/src/uuid_codec.rs | 10 +- meilisearch-auth/src/store.rs | 16 +- meilisearch-types/src/error.rs | 6 +- meilitool/src/main.rs | 14 +- meilitool/src/uuid_codec.rs | 10 +- milli/Cargo.toml | 4 +- milli/src/error.rs | 9 +- milli/src/external_documents_ids.rs | 14 +- milli/src/heed_codec/beu16_str_codec.rs | 14 +- milli/src/heed_codec/beu32_str_codec.rs | 14 +- milli/src/heed_codec/byte_slice_ref.rs | 10 +- .../facet/field_doc_id_facet_codec.rs | 16 +- milli/src/heed_codec/facet/mod.rs | 28 +- .../src/heed_codec/facet/ordered_f64_codec.rs | 16 +- .../heed_codec/field_id_word_count_codec.rs | 14 +- milli/src/heed_codec/fst_set_codec.rs | 10 +- milli/src/heed_codec/mod.rs | 3 +- milli/src/heed_codec/obkv_codec.rs | 9 +- .../roaring_bitmap/bo_roaring_bitmap_codec.rs | 14 +- .../cbo_roaring_bitmap_codec.rs | 13 +- .../roaring_bitmap/roaring_bitmap_codec.rs | 15 +- .../bo_roaring_bitmap_len_codec.rs | 8 +- .../cbo_roaring_bitmap_len_codec.rs | 6 +- .../roaring_bitmap_len_codec.rs | 9 +- milli/src/heed_codec/script_language_codec.rs | 15 +- milli/src/heed_codec/str_beu32_codec.rs | 32 +- milli/src/heed_codec/str_ref.rs | 11 +- milli/src/heed_codec/str_str_u8_codec.rs | 30 +- milli/src/index.rs | 353 ++++++++++++------ milli/src/lib.rs | 6 +- milli/src/search/facet/facet_range_search.rs | 8 +- milli/src/search/facet/mod.rs | 23 +- milli/src/search/mod.rs | 5 +- milli/src/search/new/db_cache.rs | 28 +- milli/src/search/new/mod.rs | 8 +- milli/src/update/clear_documents.rs | 9 +- milli/src/update/facet/bulk.rs | 22 +- milli/src/update/facet/incremental.rs | 45 ++- milli/src/update/facet/mod.rs | 8 +- .../extract/extract_fid_docid_facet_values.rs | 12 +- .../extract/extract_word_docids.rs | 2 +- milli/src/update/index_documents/mod.rs | 10 +- milli/src/update/index_documents/transform.rs | 11 +- .../src/update/index_documents/typed_chunk.rs | 27 +- milli/src/update/settings.rs | 10 +- milli/src/update/word_prefix_docids.rs | 10 +- .../src/update/words_prefix_integer_docids.rs | 13 +- milli/src/update/words_prefixes_fst.rs | 12 +- 54 files changed, 611 insertions(+), 477 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a795c502..21ae2b17f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1824,7 +1824,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" version = "0.20.0-alpha.6" -source = "git+https://github.com/meilisearch/heed?branch=put-current-with-data-codec#2824585c37a6c7c0c5d11fe13c0dad3ebb1a5c4f" +source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" dependencies = [ "bitflags 2.3.3", "bytemuck", @@ -1842,16 +1842,19 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.20.0-alpha.6" -source = "git+https://github.com/meilisearch/heed?branch=put-current-with-data-codec#2824585c37a6c7c0c5d11fe13c0dad3ebb1a5c4f" +source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" [[package]] name = "heed-types" version = "0.20.0-alpha.6" -source = "git+https://github.com/meilisearch/heed?branch=put-current-with-data-codec#2824585c37a6c7c0c5d11fe13c0dad3ebb1a5c4f" +source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" dependencies = [ + "bincode", "bytemuck", "byteorder", "heed-traits", + "serde", + "serde_json", ] [[package]] @@ -2981,7 +2984,7 @@ dependencies = [ [[package]] name = "lmdb-master-sys" version = "0.1.0" -source = "git+https://github.com/meilisearch/heed?branch=put-current-with-data-codec#2824585c37a6c7c0c5d11fe13c0dad3ebb1a5c4f" +source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" dependencies = [ "cc", "doxygen-rs", diff --git a/fuzzers/src/bin/fuzz-indexing.rs b/fuzzers/src/bin/fuzz-indexing.rs index 1d53e069c..baf705709 100644 --- a/fuzzers/src/bin/fuzz-indexing.rs +++ b/fuzzers/src/bin/fuzz-indexing.rs @@ -113,7 +113,7 @@ fn main() { index.documents(&wtxn, res.documents_ids).unwrap(); progression.fetch_add(1, Ordering::Relaxed); } - wtxn.abort().unwrap(); + wtxn.abort(); }); if let err @ Err(_) = handle.join() { stop.store(true, Ordering::Relaxed); diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 661285325..d53dbf001 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -32,7 +32,7 @@ use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::update::{ IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings, }; -use meilisearch_types::milli::{self, Filter, BEU32}; +use meilisearch_types::milli::{self, Filter}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; @@ -715,7 +715,7 @@ impl IndexScheduler { // 2. Snapshot the index-scheduler LMDB env // - // When we call copy_to_path, LMDB opens a read transaction by itself, + // When we call copy_to_file, LMDB opens a read transaction by itself, // we can't provide our own. It is an issue as we would like to know // the update files to copy but new ones can be enqueued between the copy // of the env and the new transaction we open to retrieve the enqueued tasks. @@ -728,7 +728,7 @@ impl IndexScheduler { // 2.1 First copy the LMDB env of the index-scheduler let dst = temp_snapshot_dir.path().join("tasks"); fs::create_dir_all(&dst)?; - self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; // 2.2 Create a read transaction on the index-scheduler let rtxn = self.env.read_txn()?; @@ -766,7 +766,7 @@ impl IndexScheduler { .map_size(1024 * 1024 * 1024) // 1 GiB .max_dbs(2) .open(&self.auth_path)?; - auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; // 5. Copy and tarball the flat snapshot // 5.1 Find the original name of the database @@ -1106,7 +1106,7 @@ impl IndexScheduler { for task_id in &index_lhs_task_ids | &index_rhs_task_ids { let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; swap_index_uid_in_task(&mut task, (lhs, rhs)); - self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?; + self.all_tasks.put(wtxn, &task_id, &task)?; } // 4. remove the task from indexuid = before_name @@ -1132,7 +1132,7 @@ impl IndexScheduler { /// The list of processed tasks. fn apply_index_operation<'i>( &self, - index_wtxn: &mut RwTxn<'i, '_>, + index_wtxn: &mut RwTxn<'i>, index: &'i Index, operation: IndexOperation, ) -> Result> { @@ -1479,10 +1479,10 @@ impl IndexScheduler { } for task in to_delete_tasks.iter() { - self.all_tasks.delete(wtxn, &BEU32::new(task))?; + self.all_tasks.delete(wtxn, &task)?; } for canceled_by in affected_canceled_by { - let canceled_by = BEU32::new(canceled_by); + let canceled_by = canceled_by; if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { tasks -= &to_delete_tasks; if tasks.is_empty() { @@ -1530,14 +1530,14 @@ impl IndexScheduler { task.details = task.details.map(|d| d.to_failed()); self.update_task(wtxn, &task)?; } - self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?; + self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?; Ok(content_files_to_delete) } } fn delete_document_by_filter<'a>( - wtxn: &mut RwTxn<'a, '_>, + wtxn: &mut RwTxn<'a>, filter: &serde_json::Value, indexer_config: &IndexerConfig, must_stop_processing: MustStopProcessing, diff --git a/index-scheduler/src/index_mapper/index_map.rs b/index-scheduler/src/index_mapper/index_map.rs index a24213558..d250ba02a 100644 --- a/index-scheduler/src/index_mapper/index_map.rs +++ b/index-scheduler/src/index_mapper/index_map.rs @@ -5,8 +5,7 @@ use std::collections::BTreeMap; use std::path::Path; use std::time::Duration; -use meilisearch_types::heed::flags::Flags; -use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions}; +use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; use meilisearch_types::milli::Index; use time::OffsetDateTime; use uuid::Uuid; @@ -309,7 +308,7 @@ fn create_or_open_index( options.map_size(clamp_to_page_size(map_size)); options.max_readers(1024); if enable_mdb_writemap { - unsafe { options.flag(Flags::MdbWriteMap) }; + unsafe { options.flags(EnvFlags::WRITE_MAP) }; } if let Some((created, updated)) = date { diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 896c06c99..545409b81 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -47,8 +47,9 @@ pub use features::RoFeatures; use file_store::FileStore; use meilisearch_types::error::ResponseError; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; -use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; -use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; +use meilisearch_types::heed::byteorder::BE; +use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; +use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; @@ -64,8 +65,7 @@ use uuid::Uuid; use crate::index_mapper::IndexMapper; use crate::utils::{check_index_swap_validity, clamp_to_page_size}; -pub(crate) type BEI128 = - meilisearch_types::heed::zerocopy::I128; +pub(crate) type BEI128 = I128; /// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. /// @@ -278,7 +278,7 @@ pub struct IndexScheduler { pub(crate) file_store: FileStore, // The main database, it contains all the tasks accessible by their Id. - pub(crate) all_tasks: Database, SerdeJson>, + pub(crate) all_tasks: Database>, /// All the tasks ids grouped by their status. // TODO we should not be able to serialize a `Status::Processing` in this database. @@ -289,16 +289,16 @@ pub struct IndexScheduler { pub(crate) index_tasks: Database, /// Store the tasks that were canceled by a task uid - pub(crate) canceled_by: Database, RoaringBitmapCodec>, + pub(crate) canceled_by: Database, /// Store the task ids of tasks which were enqueued at a specific date - pub(crate) enqueued_at: Database, CboRoaringBitmapCodec>, + pub(crate) enqueued_at: Database, /// Store the task ids of finished tasks which started being processed at a specific date - pub(crate) started_at: Database, CboRoaringBitmapCodec>, + pub(crate) started_at: Database, /// Store the task ids of tasks which finished at a specific date - pub(crate) finished_at: Database, CboRoaringBitmapCodec>, + pub(crate) finished_at: Database, /// In charge of creating, opening, storing and returning indexes. pub(crate) index_mapper: IndexMapper, @@ -730,9 +730,7 @@ impl IndexScheduler { if let Some(canceled_by) = &query.canceled_by { let mut all_canceled_tasks = RoaringBitmap::new(); for cancel_task_uid in canceled_by { - if let Some(canceled_by_uid) = - self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))? - { + if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, &*cancel_task_uid)? { all_canceled_tasks |= canceled_by_uid; } } @@ -983,7 +981,7 @@ impl IndexScheduler { // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty()) - && (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50 + && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50 { return Err(Error::NoSpaceLeftInTaskQueue); } @@ -1009,7 +1007,7 @@ impl IndexScheduler { // Get rid of the mutability. let task = task; - self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?; + self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?; for index in task.indexes() { self.update_index(&mut wtxn, index, |bitmap| { @@ -1187,7 +1185,7 @@ impl IndexScheduler { | Err(Error::AbortedTask) => { #[cfg(test)] self.breakpoint(Breakpoint::AbortedIndexation); - wtxn.abort().map_err(Error::HeedTransaction)?; + wtxn.abort(); // We make sure that we don't call `stop_processing` on the `processing_tasks`, // this is because we want to let the next tick call `create_next_batch` and keep @@ -1208,7 +1206,7 @@ impl IndexScheduler { let index_uid = index_uid.unwrap(); // fixme: handle error more gracefully? not sure when this could happen self.index_mapper.resize_index(&wtxn, &index_uid)?; - wtxn.abort().map_err(Error::HeedTransaction)?; + wtxn.abort(); return Ok(TickOutcome::TickAgain(0)); } @@ -1354,7 +1352,7 @@ impl IndexScheduler { pub struct Dump<'a> { index_scheduler: &'a IndexScheduler, - wtxn: RwTxn<'a, 'a>, + wtxn: RwTxn<'a>, indexes: HashMap, statuses: HashMap, @@ -1469,7 +1467,7 @@ impl<'a> Dump<'a> { }, }; - self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?; + self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; for index in task.indexes() { match self.indexes.get_mut(index) { diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index 3971d9116..f4eff7ea5 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -3,9 +3,9 @@ use std::collections::{BTreeSet, HashSet}; use std::ops::Bound; -use meilisearch_types::heed::types::{DecodeIgnore, OwnedType}; +use meilisearch_types::heed::types::DecodeIgnore; use meilisearch_types::heed::{Database, RoTxn, RwTxn}; -use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; +use meilisearch_types::milli::CboRoaringBitmapCodec; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; use roaring::{MultiOps, RoaringBitmap}; use time::OffsetDateTime; @@ -18,7 +18,7 @@ impl IndexScheduler { } pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { - Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k.get() + 1)) + Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k + 1)) } pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { @@ -26,7 +26,7 @@ impl IndexScheduler { } pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { - Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?) + Ok(self.all_tasks.get(rtxn, &task_id)?) } /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a @@ -88,7 +88,7 @@ impl IndexScheduler { } } - self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?; + self.all_tasks.put(wtxn, &task.uid, task)?; Ok(()) } @@ -169,11 +169,11 @@ impl IndexScheduler { pub(crate) fn insert_task_datetime( wtxn: &mut RwTxn, - database: Database, CboRoaringBitmapCodec>, + database: Database, time: OffsetDateTime, task_id: TaskId, ) -> Result<()> { - let timestamp = BEI128::new(time.unix_timestamp_nanos()); + let timestamp = time.unix_timestamp_nanos(); let mut task_ids = database.get(wtxn, ×tamp)?.unwrap_or_default(); task_ids.insert(task_id); database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(task_ids))?; @@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime( pub(crate) fn remove_task_datetime( wtxn: &mut RwTxn, - database: Database, CboRoaringBitmapCodec>, + database: Database, time: OffsetDateTime, task_id: TaskId, ) -> Result<()> { - let timestamp = BEI128::new(time.unix_timestamp_nanos()); + let timestamp = time.unix_timestamp_nanos(); if let Some(mut existing) = database.get(wtxn, ×tamp)? { existing.remove(task_id); if existing.is_empty() { @@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime( pub(crate) fn keep_tasks_within_datetimes( rtxn: &RoTxn, tasks: &mut RoaringBitmap, - database: Database, CboRoaringBitmapCodec>, + database: Database, after: Option, before: Option, ) -> Result<()> { @@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes( (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)), }; let mut collected_task_ids = RoaringBitmap::new(); - let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos())); - let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos())); + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); let iter = database.range(rtxn, &(start, end))?; for r in iter { let (_timestamp, task_ids) = r?; diff --git a/index-scheduler/src/uuid_codec.rs b/index-scheduler/src/uuid_codec.rs index 70a92ca94..54020fa3c 100644 --- a/index-scheduler/src/uuid_codec.rs +++ b/index-scheduler/src/uuid_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::convert::TryInto; -use meilisearch_types::heed::{BytesDecode, BytesEncode}; +use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode}; use uuid::Uuid; /// A heed codec for value of struct Uuid. @@ -10,15 +10,15 @@ pub struct UuidCodec; impl<'a> BytesDecode<'a> for UuidCodec { type DItem = Uuid; - fn bytes_decode(bytes: &'a [u8]) -> Option { - bytes.try_into().ok().map(Uuid::from_bytes) + fn bytes_decode(bytes: &'a [u8]) -> Result { + bytes.try_into().map(Uuid::from_bytes).map_err(Into::into) } } impl BytesEncode<'_> for UuidCodec { type EItem = Uuid; - fn bytes_encode(item: &Self::EItem) -> Option> { - Some(Cow::Borrowed(item.as_bytes())) + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_bytes())) } } diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 28ec8b5e4..944082285 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -4,11 +4,13 @@ use std::collections::HashSet; use std::convert::{TryFrom, TryInto}; use std::fs::create_dir_all; use std::path::Path; +use std::result::Result as StdResult; use std::str; use std::str::FromStr; use std::sync::Arc; use hmac::{Hmac, Mac}; +use meilisearch_types::heed::BoxedError; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::KeyId; use meilisearch_types::milli; @@ -294,23 +296,23 @@ pub struct KeyIdActionCodec; impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { type DItem = (KeyId, Action, Option<&'a [u8]>); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?; - let (action_bytes, index) = match try_split_array_at(action_bytes)? { + fn bytes_decode(bytes: &'a [u8]) -> StdResult { + let (key_id_bytes, action_bytes) = try_split_array_at(bytes).unwrap(); + let (action_bytes, index) = match try_split_array_at(action_bytes).unwrap() { (action, []) => (action, None), (action, index) => (action, Some(index)), }; let key_id = Uuid::from_bytes(*key_id_bytes); - let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?; + let action = Action::from_repr(u8::from_be_bytes(*action_bytes)).unwrap(); - Some((key_id, action, index)) + Ok((key_id, action, index)) } } impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); - fn bytes_encode((key_id, action, index): &Self::EItem) -> Option> { + fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult, BoxedError> { let mut bytes = Vec::new(); bytes.extend_from_slice(key_id.as_bytes()); @@ -320,7 +322,7 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { bytes.extend_from_slice(index); } - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index afe9c5189..653cb108b 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -386,11 +386,11 @@ impl ErrorCode for HeedError { HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile, HeedError::Io(e) => e.error_code(), HeedError::Mdb(_) - | HeedError::Encoding - | HeedError::Decoding + | HeedError::Encoding(_) + | HeedError::Decoding(_) | HeedError::InvalidDatabaseTyping | HeedError::DatabaseClosing - | HeedError::BadOpenOptions => Code::Internal, + | HeedError::BadOpenOptions { .. } => Code::Internal, } } } diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index 2b40e42c2..f199df216 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -7,8 +7,8 @@ use clap::{Parser, Subcommand}; use dump::{DumpWriter, IndexMetadata}; use file_store::FileStore; use meilisearch_auth::AuthController; -use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str}; -use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn}; +use meilisearch_types::heed::types::{SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::{obkv_to_json, BEU32}; use meilisearch_types::tasks::{Status, Task}; @@ -148,15 +148,17 @@ fn try_opening_poly_database( env: &Env, rtxn: &RoTxn, db_name: &str, -) -> anyhow::Result { - env.open_poly_database(rtxn, Some(db_name)) +) -> anyhow::Result> { + env.database_options() + .name(db_name) + .open(rtxn) .with_context(|| format!("While opening the {db_name:?} poly database"))? .with_context(|| format!("Missing the {db_name:?} poly database")) } fn try_clearing_poly_database( wtxn: &mut RwTxn, - database: PolyDatabase, + database: Database, db_name: &str, ) -> anyhow::Result<()> { database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database")) @@ -212,7 +214,7 @@ fn export_a_dump( eprintln!("Successfully dumped {count} keys!"); let rtxn = env.read_txn()?; - let all_tasks: Database, SerdeJson> = + let all_tasks: Database> = try_opening_database(&env, &rtxn, "all-tasks")?; let index_mapping: Database = try_opening_database(&env, &rtxn, "index-mapping")?; diff --git a/meilitool/src/uuid_codec.rs b/meilitool/src/uuid_codec.rs index 70a92ca94..54020fa3c 100644 --- a/meilitool/src/uuid_codec.rs +++ b/meilitool/src/uuid_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::convert::TryInto; -use meilisearch_types::heed::{BytesDecode, BytesEncode}; +use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode}; use uuid::Uuid; /// A heed codec for value of struct Uuid. @@ -10,15 +10,15 @@ pub struct UuidCodec; impl<'a> BytesDecode<'a> for UuidCodec { type DItem = Uuid; - fn bytes_decode(bytes: &'a [u8]) -> Option { - bytes.try_into().ok().map(Uuid::from_bytes) + fn bytes_decode(bytes: &'a [u8]) -> Result { + bytes.try_into().map(Uuid::from_bytes).map_err(Into::into) } } impl BytesEncode<'_> for UuidCodec { type EItem = Uuid; - fn bytes_encode(item: &Self::EItem) -> Option> { - Some(Cow::Borrowed(item.as_bytes())) + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_bytes())) } } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 190c8e4e2..e563f5d82 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -29,8 +29,8 @@ geoutils = "0.5.1" grenad = { version = "0.4.5", default-features = false, features = [ "rayon", "tempfile" ] } -heed = { git = "https://github.com/meilisearch/heed", branch = "put-current-with-data-codec", default-features = false, features = [ - "read-txn-no-tls" +heed = { git = "https://github.com/meilisearch/heed", branch = "main", default-features = false, features = [ + "serde-json", "serde-bincode", "read-txn-no-tls" ] } indexmap = { version = "2.0.0", features = ["serde"] } instant-distance = { version = "0.6.1", features = ["with-serde"] } diff --git a/milli/src/error.rs b/milli/src/error.rs index b249f2977..cbbd8a3e5 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -152,7 +152,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco valid_fields: BTreeSet, hidden_fields: bool, }, - #[error("{}", HeedError::BadOpenOptions)] + #[error("an environment is already opened with different options")] InvalidLmdbOpenOptions, #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] SortRankingRuleMissing, @@ -326,11 +326,12 @@ impl From for Error { HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached), HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile), HeedError::Mdb(error) => InternalError(Store(error)), - HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })), - HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })), + // TODO use the encoding + HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), + HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping), HeedError::DatabaseClosing => InternalError(DatabaseClosing), - HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions), + HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions), } } } diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index ec419446c..7e0f795a4 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use heed::types::{OwnedType, Str}; +use heed::types::Str; use heed::{Database, RoIter, RoTxn, RwTxn}; use crate::{DocumentId, BEU32}; @@ -16,10 +16,10 @@ pub struct DocumentOperation { pub kind: DocumentOperationKind, } -pub struct ExternalDocumentsIds(Database>); +pub struct ExternalDocumentsIds(Database); impl ExternalDocumentsIds { - pub fn new(db: Database>) -> ExternalDocumentsIds { + pub fn new(db: Database) -> ExternalDocumentsIds { ExternalDocumentsIds(db) } @@ -29,7 +29,7 @@ impl ExternalDocumentsIds { } pub fn get>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result> { - Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get())) + Ok(self.0.get(rtxn, external_id.as_ref())?) } /// An helper function to debug this type, returns an `HashMap` of both, @@ -38,7 +38,7 @@ impl ExternalDocumentsIds { let mut map = HashMap::default(); for result in self.0.iter(rtxn)? { let (external, internal) = result?; - map.insert(external.to_owned(), internal.get()); + map.insert(external.to_owned(), internal); } Ok(map) } @@ -55,7 +55,7 @@ impl ExternalDocumentsIds { for DocumentOperation { external_id, internal_id, kind } in operations { match kind { DocumentOperationKind::Create => { - self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?; + self.0.put(wtxn, &external_id, &internal_id)?; } DocumentOperationKind::Delete => { if !self.0.delete(wtxn, &external_id)? { @@ -69,7 +69,7 @@ impl ExternalDocumentsIds { } /// Returns an iterator over all the external ids. - pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { + pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { self.0.iter(rtxn) } } diff --git a/milli/src/heed_codec/beu16_str_codec.rs b/milli/src/heed_codec/beu16_str_codec.rs index d1b85d47f..ba04f0900 100644 --- a/milli/src/heed_codec/beu16_str_codec.rs +++ b/milli/src/heed_codec/beu16_str_codec.rs @@ -2,26 +2,28 @@ use std::borrow::Cow; use std::convert::TryInto; use std::str; +use heed::BoxedError; + pub struct BEU16StrCodec; impl<'a> heed::BytesDecode<'a> for BEU16StrCodec { type DItem = (u16, &'a str); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let (n_bytes, str_bytes) = bytes.split_at(2); - let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?; - let s = str::from_utf8(str_bytes).ok()?; - Some((n, s)) + let n = n_bytes.try_into().map(u16::from_be_bytes)?; + let s = str::from_utf8(str_bytes)?; + Ok((n, s)) } } impl<'a> heed::BytesEncode<'a> for BEU16StrCodec { type EItem = (u16, &'a str); - fn bytes_encode((n, s): &Self::EItem) -> Option> { + fn bytes_encode((n, s): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s.len() + 2); bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(s.as_bytes()); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/beu32_str_codec.rs b/milli/src/heed_codec/beu32_str_codec.rs index c525d6b5b..762e31ca4 100644 --- a/milli/src/heed_codec/beu32_str_codec.rs +++ b/milli/src/heed_codec/beu32_str_codec.rs @@ -2,26 +2,28 @@ use std::borrow::Cow; use std::convert::TryInto; use std::str; +use heed::BoxedError; + pub struct BEU32StrCodec; impl<'a> heed::BytesDecode<'a> for BEU32StrCodec { type DItem = (u32, &'a str); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let (n_bytes, str_bytes) = bytes.split_at(4); - let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?; - let s = str::from_utf8(str_bytes).ok()?; - Some((n, s)) + let n = n_bytes.try_into().map(u32::from_be_bytes)?; + let s = str::from_utf8(str_bytes)?; + Ok((n, s)) } } impl<'a> heed::BytesEncode<'a> for BEU32StrCodec { type EItem = (u32, &'a str); - fn bytes_encode((n, s): &Self::EItem) -> Option> { + fn bytes_encode((n, s): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s.len() + 4); bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(s.as_bytes()); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/byte_slice_ref.rs b/milli/src/heed_codec/byte_slice_ref.rs index 48eda63c5..b027c7ebc 100644 --- a/milli/src/heed_codec/byte_slice_ref.rs +++ b/milli/src/heed_codec/byte_slice_ref.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; /// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. @@ -9,15 +9,15 @@ pub struct ByteSliceRefCodec; impl<'a> BytesEncode<'a> for ByteSliceRefCodec { type EItem = &'a [u8]; - fn bytes_encode(item: &'a Self::EItem) -> Option> { - Some(Cow::Borrowed(item)) + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item)) } } impl<'a> BytesDecode<'a> for ByteSliceRefCodec { type DItem = &'a [u8]; - fn bytes_decode(bytes: &'a [u8]) -> Option { - Some(bytes) + fn bytes_decode(bytes: &'a [u8]) -> Result { + Ok(bytes) } } diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs b/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs index cc9919ad2..a0bea2c42 100644 --- a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs +++ b/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::marker::PhantomData; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; use crate::{try_split_array_at, DocumentId, FieldId}; @@ -13,16 +13,16 @@ where { type DItem = (FieldId, DocumentId, C::DItem); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id_bytes, bytes) = try_split_array_at(bytes)?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (field_id_bytes, bytes) = try_split_array_at(bytes).unwrap(); let field_id = u16::from_be_bytes(field_id_bytes); - let (document_id_bytes, bytes) = try_split_array_at(bytes)?; + let (document_id_bytes, bytes) = try_split_array_at(bytes).unwrap(); let document_id = u32::from_be_bytes(document_id_bytes); let value = C::bytes_decode(bytes)?; - Some((field_id, document_id, value)) + Ok((field_id, document_id, value)) } } @@ -32,13 +32,15 @@ where { type EItem = (FieldId, DocumentId, C::EItem); - fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option> { + fn bytes_encode( + (field_id, document_id, value): &'a Self::EItem, + ) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(32); bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes let value_bytes = C::bytes_encode(value)?; // variable length, if f64 -> 16 bytes, if string -> large, potentially bytes.extend_from_slice(&value_bytes); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index d36ec8434..e5ff1820c 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -5,8 +5,8 @@ use std::borrow::Cow; use std::convert::TryFrom; use std::marker::PhantomData; -use heed::types::{DecodeIgnore, OwnedType}; -use heed::{BytesDecode, BytesEncode}; +use heed::types::DecodeIgnore; +use heed::{BoxedError, BytesDecode, BytesEncode}; use roaring::RoaringBitmap; pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec; @@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec; pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec; pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec; -pub type FieldIdCodec = OwnedType; +pub type FieldIdCodec = BEU16; /// Tries to split a slice in half at the given middle point, /// `None` if the slice is too short. @@ -58,15 +58,15 @@ where { type EItem = FacetGroupKey; - fn bytes_encode(value: &'a Self::EItem) -> Option> { + fn bytes_encode(value: &'a Self::EItem) -> Result, BoxedError> { let mut v = vec![]; v.extend_from_slice(&value.field_id.to_be_bytes()); v.extend_from_slice(&[value.level]); - let bound = T::bytes_encode(&value.left_bound)?; + let bound = T::bytes_encode(&value.left_bound).unwrap(); v.extend_from_slice(&bound); - Some(Cow::Owned(v)) + Ok(Cow::Owned(v)) } } impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec @@ -75,11 +75,11 @@ where { type DItem = FacetGroupKey; - fn bytes_decode(bytes: &'a [u8]) -> Option { - let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); + fn bytes_decode(bytes: &'a [u8]) -> Result { + let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?); let level = bytes[2]; let bound = T::bytes_decode(&bytes[3..])?; - Some(FacetGroupKey { field_id: fid, level, left_bound: bound }) + Ok(FacetGroupKey { field_id: fid, level, left_bound: bound }) } } @@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec; impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { type EItem = FacetGroupValue; - fn bytes_encode(value: &'a Self::EItem) -> Option> { + fn bytes_encode(value: &'a Self::EItem) -> Result, BoxedError> { let mut v = vec![value.size]; CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); - Some(Cow::Owned(v)) + Ok(Cow::Owned(v)) } } impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { type DItem = FacetGroupValue; - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let size = bytes[0]; - let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; - Some(FacetGroupValue { size, bitmap }) + let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; + Ok(FacetGroupValue { size, bitmap }) } } diff --git a/milli/src/heed_codec/facet/ordered_f64_codec.rs b/milli/src/heed_codec/facet/ordered_f64_codec.rs index 5ac9ffcfc..cbefe499f 100644 --- a/milli/src/heed_codec/facet/ordered_f64_codec.rs +++ b/milli/src/heed_codec/facet/ordered_f64_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::convert::TryInto; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; use crate::facet::value_encoding::f64_into_bytes; @@ -10,28 +10,28 @@ pub struct OrderedF64Codec; impl<'a> BytesDecode<'a> for OrderedF64Codec { type DItem = f64; - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { if bytes.len() < 16 { - return None; + panic!() // TODO don't panic } - let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?; - Some(f) + let f = bytes[8..].try_into().ok().map(f64::from_be_bytes).unwrap(); + Ok(f) } } impl heed::BytesEncode<'_> for OrderedF64Codec { type EItem = f64; - fn bytes_encode(f: &Self::EItem) -> Option> { + fn bytes_encode(f: &Self::EItem) -> Result, BoxedError> { let mut buffer = [0u8; 16]; // write the globally ordered float - let bytes = f64_into_bytes(*f)?; + let bytes = f64_into_bytes(*f).unwrap(); buffer[..8].copy_from_slice(&bytes[..]); // Then the f64 value just to be able to read it back let bytes = f.to_be_bytes(); buffer[8..16].copy_from_slice(&bytes[..]); - Some(Cow::Owned(buffer.to_vec())) + Ok(Cow::Owned(buffer.to_vec())) } } diff --git a/milli/src/heed_codec/field_id_word_count_codec.rs b/milli/src/heed_codec/field_id_word_count_codec.rs index aca7a80c4..e284a45a2 100644 --- a/milli/src/heed_codec/field_id_word_count_codec.rs +++ b/milli/src/heed_codec/field_id_word_count_codec.rs @@ -1,5 +1,7 @@ use std::borrow::Cow; +use heed::BoxedError; + use crate::{try_split_array_at, FieldId}; pub struct FieldIdWordCountCodec; @@ -7,21 +9,21 @@ pub struct FieldIdWordCountCodec; impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec { type DItem = (FieldId, u8); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id_bytes, bytes) = try_split_array_at(bytes)?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (field_id_bytes, bytes) = try_split_array_at(bytes).unwrap(); let field_id = u16::from_be_bytes(field_id_bytes); - let ([word_count], _nothing) = try_split_array_at(bytes)?; - Some((field_id, word_count)) + let ([word_count], _nothing) = try_split_array_at(bytes).unwrap(); + Ok((field_id, word_count)) } } impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec { type EItem = (FieldId, u8); - fn bytes_encode((field_id, word_count): &Self::EItem) -> Option> { + fn bytes_encode((field_id, word_count): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(2 + 1); bytes.extend_from_slice(&field_id.to_be_bytes()); bytes.push(*word_count); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/fst_set_codec.rs b/milli/src/heed_codec/fst_set_codec.rs index fc79acf29..b402c8ff3 100644 --- a/milli/src/heed_codec/fst_set_codec.rs +++ b/milli/src/heed_codec/fst_set_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use fst::Set; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; /// A codec for values of type `Set<&[u8]>`. pub struct FstSetCodec; @@ -9,15 +9,15 @@ pub struct FstSetCodec; impl<'a> BytesEncode<'a> for FstSetCodec { type EItem = Set>; - fn bytes_encode(item: &'a Self::EItem) -> Option> { - Some(Cow::Borrowed(item.as_fst().as_bytes())) + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_fst().as_bytes())) } } impl<'a> BytesDecode<'a> for FstSetCodec { type DItem = Set<&'a [u8]>; - fn bytes_decode(bytes: &'a [u8]) -> Option { - Set::new(bytes).ok() + fn bytes_decode(bytes: &'a [u8]) -> Result { + Set::new(bytes).map_err(Into::into) } } diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index d04eaa644..4b2b0fb6f 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -13,6 +13,7 @@ mod str_ref; mod str_str_u8_codec; pub use byte_slice_ref::ByteSliceRefCodec; +use heed::BoxedError; pub use str_ref::StrRefCodec; pub use self::beu16_str_codec::BEU16StrCodec; @@ -31,5 +32,5 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; pub trait BytesDecodeOwned { type DItem; - fn bytes_decode_owned(bytes: &[u8]) -> Option; + fn bytes_decode_owned(bytes: &[u8]) -> Result; } diff --git a/milli/src/heed_codec/obkv_codec.rs b/milli/src/heed_codec/obkv_codec.rs index 6dad771a8..d2408c87d 100644 --- a/milli/src/heed_codec/obkv_codec.rs +++ b/milli/src/heed_codec/obkv_codec.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; +use heed::BoxedError; use obkv::{KvReaderU16, KvWriterU16}; pub struct ObkvCodec; @@ -7,15 +8,15 @@ pub struct ObkvCodec; impl<'a> heed::BytesDecode<'a> for ObkvCodec { type DItem = KvReaderU16<'a>; - fn bytes_decode(bytes: &'a [u8]) -> Option { - Some(KvReaderU16::new(bytes)) + fn bytes_decode(bytes: &'a [u8]) -> Result { + Ok(KvReaderU16::new(bytes)) } } impl heed::BytesEncode<'_> for ObkvCodec { type EItem = KvWriterU16>; - fn bytes_encode(item: &Self::EItem) -> Option> { - item.clone().into_inner().map(Cow::Owned).ok() + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + item.clone().into_inner().map(Cow::Owned).map_err(Into::into) } } diff --git a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs index 9ad2e9707..c5e7e3e89 100644 --- a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use std::convert::TryInto; use std::mem::size_of; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; use roaring::RoaringBitmap; use crate::heed_codec::BytesDecodeOwned; @@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec { impl BytesDecode<'_> for BoRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode(bytes: &[u8]) -> Option { + fn bytes_decode(bytes: &[u8]) -> Result { let mut bitmap = RoaringBitmap::new(); for chunk in bytes.chunks(size_of::()) { - let bytes = chunk.try_into().ok()?; + let bytes = chunk.try_into()?; bitmap.push(u32::from_ne_bytes(bytes)); } - Some(bitmap) + Ok(bitmap) } } impl BytesDecodeOwned for BoRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode_owned(bytes: &[u8]) -> Option { + fn bytes_decode_owned(bytes: &[u8]) -> Result { Self::bytes_decode(bytes) } } @@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec { impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { type EItem = RoaringBitmap; - fn bytes_encode(item: &Self::EItem) -> Option> { + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { let mut out = Vec::new(); BoRoaringBitmapCodec::serialize_into(item, &mut out); - Some(Cow::Owned(out)) + Ok(Cow::Owned(out)) } } diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index f635e55af..dcab42c0a 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -3,6 +3,7 @@ use std::io; use std::mem::size_of; use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; +use heed::BoxedError; use roaring::RoaringBitmap; use crate::heed_codec::BytesDecodeOwned; @@ -132,26 +133,26 @@ impl CboRoaringBitmapCodec { impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode(bytes: &[u8]) -> Option { - Self::deserialize_from(bytes).ok() + fn bytes_decode(bytes: &[u8]) -> Result { + Self::deserialize_from(bytes).map_err(Into::into) } } impl BytesDecodeOwned for CboRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode_owned(bytes: &[u8]) -> Option { - Self::deserialize_from(bytes).ok() + fn bytes_decode_owned(bytes: &[u8]) -> Result { + Self::deserialize_from(bytes).map_err(Into::into) } } impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { type EItem = RoaringBitmap; - fn bytes_encode(item: &Self::EItem) -> Option> { + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { let mut vec = Vec::with_capacity(Self::serialized_size(item)); Self::serialize_into(item, &mut vec); - Some(Cow::Owned(vec)) + Ok(Cow::Owned(vec)) } } diff --git a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs index f982cc105..aa532ffdd 100644 --- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; +use heed::BoxedError; use roaring::RoaringBitmap; use crate::heed_codec::BytesDecodeOwned; @@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec; impl heed::BytesDecode<'_> for RoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode(bytes: &[u8]) -> Option { - RoaringBitmap::deserialize_unchecked_from(bytes).ok() + fn bytes_decode(bytes: &[u8]) -> Result { + RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into) } } impl BytesDecodeOwned for RoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode_owned(bytes: &[u8]) -> Option { - RoaringBitmap::deserialize_from(bytes).ok() + fn bytes_decode_owned(bytes: &[u8]) -> Result { + RoaringBitmap::deserialize_from(bytes).map_err(Into::into) } } impl heed::BytesEncode<'_> for RoaringBitmapCodec { type EItem = RoaringBitmap; - fn bytes_encode(item: &Self::EItem) -> Option> { + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(item.serialized_size()); - item.serialize_into(&mut bytes).ok()?; - Some(Cow::Owned(bytes)) + item.serialize_into(&mut bytes)?; + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs index 8fae60df7..cf4997d26 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs @@ -1,6 +1,6 @@ use std::mem; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; use crate::heed_codec::BytesDecodeOwned; @@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec; impl BytesDecode<'_> for BoRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode(bytes: &[u8]) -> Option { - Some((bytes.len() / mem::size_of::()) as u64) + fn bytes_decode(bytes: &[u8]) -> Result { + Ok((bytes.len() / mem::size_of::()) as u64) } } impl BytesDecodeOwned for BoRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode_owned(bytes: &[u8]) -> Option { + fn bytes_decode_owned(bytes: &[u8]) -> Result { Self::bytes_decode(bytes) } } diff --git a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs index 5719a538a..c2565c939 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs @@ -1,6 +1,6 @@ use std::mem; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec}; use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD; @@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec; impl BytesDecode<'_> for CboRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode(bytes: &[u8]) -> Option { + fn bytes_decode(bytes: &[u8]) -> Result { if bytes.len() <= THRESHOLD * mem::size_of::() { // If there is threshold or less than threshold integers that can fit into this array // of bytes it means that we used the ByteOrder codec serializer. @@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec { impl BytesDecodeOwned for CboRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode_owned(bytes: &[u8]) -> Option { + fn bytes_decode_owned(bytes: &[u8]) -> Result { Self::bytes_decode(bytes) } } diff --git a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs index a9b0506ff..578cb31e2 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs @@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read}; use std::mem; use byteorder::{LittleEndian, ReadBytesExt}; +use heed::BoxedError; use crate::heed_codec::BytesDecodeOwned; @@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec { impl heed::BytesDecode<'_> for RoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode(bytes: &[u8]) -> Option { - RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() + fn bytes_decode(bytes: &[u8]) -> Result { + RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into) } } impl BytesDecodeOwned for RoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode_owned(bytes: &[u8]) -> Option { - RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() + fn bytes_decode_owned(bytes: &[u8]) -> Result { + RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into) } } diff --git a/milli/src/heed_codec/script_language_codec.rs b/milli/src/heed_codec/script_language_codec.rs index 83e8a7241..83f5bf45f 100644 --- a/milli/src/heed_codec/script_language_codec.rs +++ b/milli/src/heed_codec/script_language_codec.rs @@ -2,29 +2,30 @@ use std::borrow::Cow; use std::str; use charabia::{Language, Script}; +use heed::BoxedError; pub struct ScriptLanguageCodec; impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec { type DItem = (Script, Language); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let sep = bytes.iter().position(|b| *b == 0)?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let sep = bytes.iter().position(|b| *b == 0).unwrap(); let (s_bytes, l_bytes) = bytes.split_at(sep); - let script = str::from_utf8(s_bytes).ok()?; + let script = str::from_utf8(s_bytes)?; let script_name = Script::from_name(script); - let lan = str::from_utf8(l_bytes).ok()?; + let lan = str::from_utf8(l_bytes)?; // skip '\0' byte between the two strings. let lan_name = Language::from_name(&lan[1..]); - Some((script_name, lan_name)) + Ok((script_name, lan_name)) } } impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { type EItem = (Script, Language); - fn bytes_encode((script, lan): &Self::EItem) -> Option> { + fn bytes_encode((script, lan): &Self::EItem) -> Result, BoxedError> { let script_name = script.name().as_bytes(); let lan_name = lan.name().as_bytes(); @@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { bytes.push(0); bytes.extend_from_slice(lan_name); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/str_beu32_codec.rs b/milli/src/heed_codec/str_beu32_codec.rs index cce849e37..aace89d78 100644 --- a/milli/src/heed_codec/str_beu32_codec.rs +++ b/milli/src/heed_codec/str_beu32_codec.rs @@ -3,37 +3,39 @@ use std::convert::TryInto; use std::mem::size_of; use std::str; +use heed::BoxedError; + pub struct StrBEU32Codec; impl<'a> heed::BytesDecode<'a> for StrBEU32Codec { type DItem = (&'a str, u32); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let footer_len = size_of::(); if bytes.len() < footer_len { - return None; + panic!() // TODO Do not panic } let (word, bytes) = bytes.split_at(bytes.len() - footer_len); - let word = str::from_utf8(word).ok()?; - let pos = bytes.try_into().map(u32::from_be_bytes).ok()?; + let word = str::from_utf8(word)?; + let pos = bytes.try_into().map(u32::from_be_bytes)?; - Some((word, pos)) + Ok((word, pos)) } } impl<'a> heed::BytesEncode<'a> for StrBEU32Codec { type EItem = (&'a str, u32); - fn bytes_encode((word, pos): &Self::EItem) -> Option> { + fn bytes_encode((word, pos): &Self::EItem) -> Result, BoxedError> { let pos = pos.to_be_bytes(); let mut bytes = Vec::with_capacity(word.len() + pos.len()); bytes.extend_from_slice(word.as_bytes()); bytes.extend_from_slice(&pos[..]); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } @@ -42,26 +44,26 @@ pub struct StrBEU16Codec; impl<'a> heed::BytesDecode<'a> for StrBEU16Codec { type DItem = (&'a str, u16); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let footer_len = size_of::(); if bytes.len() < footer_len + 1 { - return None; + panic!() // TODO do not panic } let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len); - let (_, word) = word_plus_nul_byte.split_last()?; - let word = str::from_utf8(word).ok()?; - let pos = bytes.try_into().map(u16::from_be_bytes).ok()?; + let (_, word) = word_plus_nul_byte.split_last().unwrap(); + let word = str::from_utf8(word).ok().unwrap(); + let pos = bytes.try_into().map(u16::from_be_bytes).ok().unwrap(); - Some((word, pos)) + Ok((word, pos)) } } impl<'a> heed::BytesEncode<'a> for StrBEU16Codec { type EItem = (&'a str, u16); - fn bytes_encode((word, pos): &Self::EItem) -> Option> { + fn bytes_encode((word, pos): &Self::EItem) -> Result, BoxedError> { let pos = pos.to_be_bytes(); let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len()); @@ -69,6 +71,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec { bytes.push(0); bytes.extend_from_slice(&pos[..]); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/str_ref.rs b/milli/src/heed_codec/str_ref.rs index ced5cc65e..bdf262a46 100644 --- a/milli/src/heed_codec/str_ref.rs +++ b/milli/src/heed_codec/str_ref.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; /// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated /// types are equivalent (= `&'a str`) and these values can reside within another structure. @@ -8,15 +8,14 @@ pub struct StrRefCodec; impl<'a> BytesEncode<'a> for StrRefCodec { type EItem = &'a str; - fn bytes_encode(item: &'a &'a str) -> Option> { - Some(Cow::Borrowed(item.as_bytes())) + fn bytes_encode(item: &'a &'a str) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_bytes())) } } impl<'a> BytesDecode<'a> for StrRefCodec { type DItem = &'a str; - fn bytes_decode(bytes: &'a [u8]) -> Option { - let s = std::str::from_utf8(bytes).ok()?; - Some(s) + fn bytes_decode(bytes: &'a [u8]) -> Result { + std::str::from_utf8(bytes).map_err(Into::into) } } diff --git a/milli/src/heed_codec/str_str_u8_codec.rs b/milli/src/heed_codec/str_str_u8_codec.rs index 60be8ddc7..c6ba7ebac 100644 --- a/milli/src/heed_codec/str_str_u8_codec.rs +++ b/milli/src/heed_codec/str_str_u8_codec.rs @@ -1,32 +1,34 @@ use std::borrow::Cow; use std::str; +use heed::BoxedError; + pub struct U8StrStrCodec; impl<'a> heed::BytesDecode<'a> for U8StrStrCodec { type DItem = (u8, &'a str, &'a str); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (n, bytes) = bytes.split_first()?; - let s1_end = bytes.iter().position(|b| *b == 0)?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (n, bytes) = bytes.split_first().unwrap(); + let s1_end = bytes.iter().position(|b| *b == 0).unwrap(); let (s1_bytes, rest) = bytes.split_at(s1_end); let s2_bytes = &rest[1..]; - let s1 = str::from_utf8(s1_bytes).ok()?; - let s2 = str::from_utf8(s2_bytes).ok()?; - Some((*n, s1, s2)) + let s1 = str::from_utf8(s1_bytes).ok().unwrap(); + let s2 = str::from_utf8(s2_bytes).ok().unwrap(); + Ok((*n, s1, s2)) } } impl<'a> heed::BytesEncode<'a> for U8StrStrCodec { type EItem = (u8, &'a str, &'a str); - fn bytes_encode((n, s1, s2): &Self::EItem) -> Option> { + fn bytes_encode((n, s1, s2): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); bytes.push(*n); bytes.extend_from_slice(s1.as_bytes()); bytes.push(0); bytes.extend_from_slice(s2.as_bytes()); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } pub struct UncheckedU8StrStrCodec; @@ -34,24 +36,24 @@ pub struct UncheckedU8StrStrCodec; impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec { type DItem = (u8, &'a [u8], &'a [u8]); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (n, bytes) = bytes.split_first()?; - let s1_end = bytes.iter().position(|b| *b == 0)?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (n, bytes) = bytes.split_first().unwrap(); + let s1_end = bytes.iter().position(|b| *b == 0).unwrap(); let (s1_bytes, rest) = bytes.split_at(s1_end); let s2_bytes = &rest[1..]; - Some((*n, s1_bytes, s2_bytes)) + Ok((*n, s1_bytes, s2_bytes)) } } impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec { type EItem = (u8, &'a [u8], &'a [u8]); - fn bytes_encode((n, s1, s2): &Self::EItem) -> Option> { + fn bytes_encode((n, s1, s2): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); bytes.push(*n); bytes.extend_from_slice(s1); bytes.push(0); bytes.extend_from_slice(s2); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/index.rs b/milli/src/index.rs index f8cceb0ef..0b57c42ff 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -4,9 +4,8 @@ use std::fs::File; use std::path::Path; use charabia::{Language, Script}; -use heed::flags::Flags; use heed::types::*; -use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn}; +use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; use roaring::RoaringBitmap; use rstar::RTree; use time::OffsetDateTime; @@ -109,10 +108,10 @@ pub struct Index { pub(crate) env: heed::Env, /// Contains many different types (e.g. the fields ids map). - pub(crate) main: PolyDatabase, + pub(crate) main: Database, /// Maps the external documents ids with the internal document id. - pub external_documents_ids: Database>, + pub external_documents_ids: Database, /// A word and all the documents ids containing the word. pub word_docids: Database, @@ -158,7 +157,7 @@ pub struct Index { /// Maps the facet field id of the normalized-for-search string facets with their original versions. pub facet_id_normalized_string_strings: Database>>, /// Maps the facet field id of the string facets with an FST containing all the facets values. - pub facet_id_string_fst: Database, FstSetCodec>, + pub facet_id_string_fst: Database, /// Maps the document id, the facet field id and the numbers. pub field_id_docid_facet_f64s: Database, @@ -166,10 +165,10 @@ pub struct Index { pub field_id_docid_facet_strings: Database, /// Maps a vector id to the document id that have it. - pub vector_id_docid: Database, OwnedType>, + pub vector_id_docid: Database, /// Maps the document id to the document as an obkv store. - pub(crate) documents: Database, ObkvCodec>, + pub(crate) documents: Database, } impl Index { @@ -182,11 +181,10 @@ impl Index { use db_name::*; options.max_dbs(24); - unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; let mut wtxn = env.write_txn()?; - let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; + let main = env.database_options().name(MAIN).create(&mut wtxn)?; let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; let external_documents_ids = env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?; @@ -264,20 +262,23 @@ impl Index { fn set_creation_dates( env: &heed::Env, - main: PolyDatabase, + main: Database, created_at: OffsetDateTime, updated_at: OffsetDateTime, ) -> heed::Result<()> { let mut txn = env.write_txn()?; // The db was just created, we update its metadata with the relevant information. - if main.get::<_, Str, SerdeJson>(&txn, main_key::CREATED_AT_KEY)?.is_none() + if main + .remap_types::>() + .get(&txn, main_key::CREATED_AT_KEY)? + .is_none() { - main.put::<_, Str, SerdeJson>( + main.remap_types::>().put( &mut txn, main_key::UPDATED_AT_KEY, &updated_at, )?; - main.put::<_, Str, SerdeJson>( + main.remap_types::>().put( &mut txn, main_key::CREATED_AT_KEY, &created_at, @@ -319,11 +320,11 @@ impl Index { /// This value is the maximum between the map size passed during the opening of the index /// and the on-disk size of the index at the time of opening. pub fn map_size(&self) -> Result { - Ok(self.env.map_size()?) + Ok(self.env.info().map_size) // TODO remove Result } pub fn copy_to_path>(&self, path: P, option: CompactionOption) -> Result { - self.env.copy_to_path(path, option).map_err(Into::into) + self.env.copy_to_file(path, option).map_err(Into::into) } /// Returns an `EnvClosingEvent` that can be used to wait for the closing event, @@ -343,21 +344,28 @@ impl Index { wtxn: &mut RwTxn, docids: &RoaringBitmap, ) -> heed::Result<()> { - self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids) + self.main.remap_types::().put( + wtxn, + main_key::DOCUMENTS_IDS_KEY, + docids, + ) } /// Returns the internal documents ids. pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result { Ok(self .main - .get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)? + .remap_types::() + .get(rtxn, main_key::DOCUMENTS_IDS_KEY)? .unwrap_or_default()) } /// Returns the number of documents indexed in the database. pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result { - let count = - self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?; + let count = self + .main + .remap_types::() + .get(rtxn, main_key::DOCUMENTS_IDS_KEY)?; Ok(count.unwrap_or_default()) } @@ -366,17 +374,17 @@ impl Index { /// Writes the documents primary key, this is the field name that is used to store the id. pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; - self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, primary_key) + self.main.remap_types::().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key) } /// Deletes the primary key of the documents, this can be done to reset indexes settings. pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::PRIMARY_KEY_KEY) } /// Returns the documents primary key, `None` if it hasn't been defined. pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { - self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY) + self.main.remap_types::().get(rtxn, main_key::PRIMARY_KEY_KEY) } /* external documents ids */ @@ -396,7 +404,11 @@ impl Index { wtxn: &mut RwTxn, map: &FieldsIdsMap, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map) + self.main.remap_types::>().put( + wtxn, + main_key::FIELDS_IDS_MAP_KEY, + map, + ) } /// Returns the fields ids map which associate the documents keys with an internal field id @@ -404,7 +416,8 @@ impl Index { pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::FIELDS_IDS_MAP_KEY)? + .remap_types::>() + .get(rtxn, main_key::FIELDS_IDS_MAP_KEY)? .unwrap_or_default()) } @@ -416,19 +429,24 @@ impl Index { wtxn: &mut RwTxn, rtree: &RTree, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode>>(wtxn, main_key::GEO_RTREE_KEY, rtree) + self.main.remap_types::>>().put( + wtxn, + main_key::GEO_RTREE_KEY, + rtree, + ) } /// Delete the `rtree` which associates coordinates to documents ids. pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::GEO_RTREE_KEY) } /// Returns the `rtree` which associates coordinates to documents ids. pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result>> { match self .main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::GEO_RTREE_KEY)? + .remap_types::>>() + .get(rtxn, main_key::GEO_RTREE_KEY)? { Some(rtree) => Ok(Some(rtree)), None => Ok(None), @@ -443,7 +461,7 @@ impl Index { wtxn: &mut RwTxn, docids: &RoaringBitmap, ) -> heed::Result<()> { - self.main.put::<_, Str, RoaringBitmapCodec>( + self.main.remap_types::().put( wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY, docids, @@ -452,14 +470,15 @@ impl Index { /// Delete the documents ids that are faceted with a _geo field. pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) } /// Retrieve all the documents ids that are faceted with a _geo field. pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result { match self .main - .get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)? + .remap_types::() + .get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), @@ -474,22 +493,22 @@ impl Index { self.delete_vector_hnsw(wtxn)?; let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB - let bytes = bincode::serialize(hnsw).map_err(|_| heed::Error::Encoding)?; + let bytes = bincode::serialize(hnsw).map_err(Into::into).map_err(heed::Error::Encoding)?; for (i, chunk) in bytes.chunks(chunk_size).enumerate() { let i = i as u32; let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec(); key.extend_from_slice(&i.to_be_bytes()); - self.main.put::<_, ByteSlice, ByteSlice>(wtxn, &key, chunk)?; + self.main.remap_types::().put(wtxn, &key, chunk)?; } Ok(()) } /// Delete the `hnsw`. pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result { - let mut iter = self.main.prefix_iter_mut::<_, ByteSlice, DecodeIgnore>( - wtxn, - main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes(), - )?; + let mut iter = self + .main + .remap_types::() + .prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?; let mut deleted = false; while iter.next().transpose()?.is_some() { // We do not keep a reference to the key or the value. @@ -501,8 +520,10 @@ impl Index { /// Returns the `hnsw`. pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result> { let mut slices = Vec::new(); - for result in - self.main.prefix_iter::<_, Str, ByteSlice>(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? + for result in self + .main + .remap_types::() + .prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? { let (_, slice) = result?; slices.push(slice); @@ -512,7 +533,11 @@ impl Index { Ok(None) } else { let readable_slices: ReadableSlices<_> = slices.into_iter().collect(); - Ok(Some(bincode::deserialize_from(readable_slices).map_err(|_| heed::Error::Decoding)?)) + Ok(Some( + bincode::deserialize_from(readable_slices) + .map_err(Into::into) + .map_err(heed::Error::Decoding)?, + )) } } @@ -525,7 +550,7 @@ impl Index { wtxn: &mut RwTxn, distribution: &FieldDistribution, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>( + self.main.remap_types::>().put( wtxn, main_key::FIELD_DISTRIBUTION_KEY, distribution, @@ -537,7 +562,8 @@ impl Index { pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::FIELD_DISTRIBUTION_KEY)? + .remap_types::>() + .get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)? .unwrap_or_default()) } @@ -550,7 +576,7 @@ impl Index { wtxn: &mut RwTxn, fields: &[&str], ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>( + self.main.remap_types::>().put( wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields, @@ -560,13 +586,15 @@ impl Index { /// Deletes the displayed fields ids, this will make the engine to display /// all the documents attributes in the order of the `FieldsIdsMap`. pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY) } /// Returns the displayed fields in the order they were set by the user. If it returns /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`. pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { - self.main.get::<_, Str, SerdeBincode>>(rtxn, main_key::DISPLAYED_FIELDS_KEY) + self.main + .remap_types::>>() + .get(rtxn, main_key::DISPLAYED_FIELDS_KEY) } /// Identical to `displayed_fields`, but returns the ids instead. @@ -646,7 +674,7 @@ impl Index { /// Writes the searchable fields, when this list is specified, only these are indexed. fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>( + self.main.remap_types::>().put( wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields, @@ -655,13 +683,15 @@ impl Index { /// Deletes the searchable fields, when no fields are specified, all fields are indexed. fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY) } /// Returns the searchable fields, those are the fields that are indexed, /// if the searchable fields aren't there it means that **all** the fields are indexed. pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { - self.main.get::<_, Str, SerdeBincode>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY) + self.main + .remap_types::>>() + .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY) } /// Identical to `searchable_fields`, but returns the ids instead. @@ -687,7 +717,7 @@ impl Index { wtxn: &mut RwTxn, fields: &[&str], ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>( + self.main.remap_types::>().put( wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY, &fields, @@ -699,7 +729,7 @@ impl Index { &self, wtxn: &mut RwTxn, ) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) } /// Returns the user defined searchable fields. @@ -708,7 +738,8 @@ impl Index { rtxn: &'t RoTxn, ) -> heed::Result>> { self.main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) + .remap_types::>>() + .get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) } /* filterable fields */ @@ -719,19 +750,24 @@ impl Index { wtxn: &mut RwTxn, fields: &HashSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields) + self.main.remap_types::>().put( + wtxn, + main_key::FILTERABLE_FIELDS_KEY, + fields, + ) } /// Deletes the filterable fields ids in the database. pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY) } /// Returns the filterable fields names. pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self .main - .get::<_, Str, SerdeJson<_>>(rtxn, main_key::FILTERABLE_FIELDS_KEY)? + .remap_types::>() + .get(rtxn, main_key::FILTERABLE_FIELDS_KEY)? .unwrap_or_default()) } @@ -758,19 +794,24 @@ impl Index { wtxn: &mut RwTxn, fields: &HashSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields) + self.main.remap_types::>().put( + wtxn, + main_key::SORTABLE_FIELDS_KEY, + fields, + ) } /// Deletes the sortable fields ids in the database. pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SORTABLE_FIELDS_KEY) } /// Returns the sortable fields names. pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self .main - .get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)? + .remap_types::>() + .get(rtxn, main_key::SORTABLE_FIELDS_KEY)? .unwrap_or_default()) } @@ -789,14 +830,19 @@ impl Index { wtxn: &mut RwTxn, fields: &HashSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::HIDDEN_FACETED_FIELDS_KEY, fields) + self.main.remap_types::>().put( + wtxn, + main_key::HIDDEN_FACETED_FIELDS_KEY, + fields, + ) } /// Returns the faceted fields names. pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self .main - .get::<_, Str, SerdeJson<_>>(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? + .remap_types::>() + .get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? .unwrap_or_default()) } @@ -863,7 +909,7 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? { + match self.facet_id_is_null_docids.get(rtxn, &field_id)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), } @@ -875,7 +921,7 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? { + match self.facet_id_is_empty_docids.get(rtxn, &field_id)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), } @@ -887,7 +933,7 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - match self.facet_id_exists_docids.get(rtxn, &BEU16::new(field_id))? { + match self.facet_id_exists_docids.get(rtxn, &field_id)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), } @@ -900,15 +946,15 @@ impl Index { wtxn: &mut RwTxn, distinct_field: &str, ) -> heed::Result<()> { - self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) + self.main.remap_types::().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) } pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result> { - self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY) + self.main.remap_types::().get(rtxn, main_key::DISTINCT_FIELD_KEY) } pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::DISTINCT_FIELD_KEY) } /* criteria */ @@ -918,15 +964,23 @@ impl Index { wtxn: &mut RwTxn, criteria: &[Criterion], ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria) + self.main.remap_types::>().put( + wtxn, + main_key::CRITERIA_KEY, + &criteria, + ) } pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::CRITERIA_KEY) } pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result> { - match self.main.get::<_, Str, SerdeJson>>(rtxn, main_key::CRITERIA_KEY)? { + match self + .main + .remap_types::>>() + .get(rtxn, main_key::CRITERIA_KEY)? + { Some(criteria) => Ok(criteria), None => Ok(default_criteria()), } @@ -940,12 +994,16 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes()) + self.main.remap_types::().put( + wtxn, + main_key::WORDS_FST_KEY, + fst.as_fst().as_bytes(), + ) } /// Returns the FST which is the words dictionary of the engine. pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? { + match self.main.remap_types::().get(rtxn, main_key::WORDS_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -958,15 +1016,19 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes()) + self.main.remap_types::().put( + wtxn, + main_key::STOP_WORDS_KEY, + fst.as_fst().as_bytes(), + ) } pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::STOP_WORDS_KEY) } pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? { + match self.main.remap_types::().get(rtxn, main_key::STOP_WORDS_KEY)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), None => Ok(None), } @@ -979,18 +1041,22 @@ impl Index { wtxn: &mut RwTxn, set: &BTreeSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set) + self.main.remap_types::>().put( + wtxn, + main_key::NON_SEPARATOR_TOKENS_KEY, + set, + ) } pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) } pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result>> { - Ok(self.main.get::<_, Str, SerdeBincode>>( - rtxn, - main_key::NON_SEPARATOR_TOKENS_KEY, - )?) + Ok(self + .main + .remap_types::>>() + .get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?) } /* separator tokens */ @@ -1000,17 +1066,22 @@ impl Index { wtxn: &mut RwTxn, set: &BTreeSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set) + self.main.remap_types::>().put( + wtxn, + main_key::SEPARATOR_TOKENS_KEY, + set, + ) } pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY) } pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result>> { Ok(self .main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) + .remap_types::>>() + .get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) } /* separators easing method */ @@ -1040,17 +1111,18 @@ impl Index { wtxn: &mut RwTxn, set: &BTreeSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set) + self.main.remap_types::>().put(wtxn, main_key::DICTIONARY_KEY, set) } pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::DICTIONARY_KEY) } pub fn dictionary(&self, rtxn: &RoTxn) -> Result>> { Ok(self .main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::DICTIONARY_KEY)?) + .remap_types::>>() + .get(rtxn, main_key::DICTIONARY_KEY)?) } /* synonyms */ @@ -1061,8 +1133,12 @@ impl Index { synonyms: &HashMap, Vec>>, user_defined_synonyms: &BTreeMap>, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?; - self.main.put::<_, Str, SerdeBincode<_>>( + self.main.remap_types::>().put( + wtxn, + main_key::SYNONYMS_KEY, + synonyms, + )?; + self.main.remap_types::>().put( wtxn, main_key::USER_DEFINED_SYNONYMS_KEY, user_defined_synonyms, @@ -1070,8 +1146,8 @@ impl Index { } pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?; - self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SYNONYMS_KEY)?; + self.main.remap_key_type::().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) } pub fn user_defined_synonyms( @@ -1080,14 +1156,16 @@ impl Index { ) -> heed::Result>> { Ok(self .main - .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? + .remap_types::>() + .get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? .unwrap_or_default()) } pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result, Vec>>> { Ok(self .main - .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)? + .remap_types::>() + .get(rtxn, main_key::SYNONYMS_KEY)? .unwrap_or_default()) } @@ -1108,7 +1186,7 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>( + self.main.remap_types::().put( wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes(), @@ -1117,7 +1195,11 @@ impl Index { /// Returns the FST which is the words prefixes dictionnary of the engine. pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? { + match self + .main + .remap_types::() + .get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? + { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -1142,7 +1224,7 @@ impl Index { Ok(ids.into_iter().map(move |id| { let kv = self .documents - .get(rtxn, &BEU32::new(id))? + .get(rtxn, &id)? .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?; Ok((id, kv)) })) @@ -1207,7 +1289,8 @@ impl Index { pub fn created_at(&self, rtxn: &RoTxn) -> Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::CREATED_AT_KEY)? + .remap_types::>() + .get(rtxn, main_key::CREATED_AT_KEY)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::MAIN, key: Some(main_key::CREATED_AT_KEY), @@ -1218,7 +1301,8 @@ impl Index { pub fn updated_at(&self, rtxn: &RoTxn) -> Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::UPDATED_AT_KEY)? + .remap_types::>() + .get(rtxn, main_key::UPDATED_AT_KEY)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::MAIN, key: Some(main_key::UPDATED_AT_KEY), @@ -1230,14 +1314,18 @@ impl Index { wtxn: &mut RwTxn, time: &OffsetDateTime, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>(wtxn, main_key::UPDATED_AT_KEY, time) + self.main.remap_types::>().put( + wtxn, + main_key::UPDATED_AT_KEY, + time, + ) } pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - match self.main.get::<_, Str, OwnedType>(txn, main_key::AUTHORIZE_TYPOS)? { + match self.main.remap_types::>().get(txn, main_key::AUTHORIZE_TYPOS)? { Some(0) => Ok(false), _ => Ok(true), } @@ -1247,7 +1335,11 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.put::<_, Str, OwnedType>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; + self.main.remap_types::>().put( + txn, + main_key::AUTHORIZE_TYPOS, + &(flag as u8), + )?; Ok(()) } @@ -1258,7 +1350,8 @@ impl Index { // because by default, we authorize typos. Ok(self .main - .get::<_, Str, OwnedType>(txn, main_key::ONE_TYPO_WORD_LEN)? + .remap_types::>() + .get(txn, main_key::ONE_TYPO_WORD_LEN)? .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO)) } @@ -1266,7 +1359,11 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.put::<_, Str, OwnedType>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; + self.main.remap_types::>().put( + txn, + main_key::ONE_TYPO_WORD_LEN, + &val, + )?; Ok(()) } @@ -1276,7 +1373,8 @@ impl Index { // because by default, we authorize typos. Ok(self .main - .get::<_, Str, OwnedType>(txn, main_key::TWO_TYPOS_WORD_LEN)? + .remap_types::>() + .get(txn, main_key::TWO_TYPOS_WORD_LEN)? .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } @@ -1284,13 +1382,17 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.put::<_, Str, OwnedType>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; + self.main.remap_types::>().put( + txn, + main_key::TWO_TYPOS_WORD_LEN, + &val, + )?; Ok(()) } /// List the words on which typo are not allowed pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result>>> { - match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? { + match self.main.remap_types::().get(txn, main_key::EXACT_WORDS)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)), None => Ok(None), } @@ -1301,7 +1403,7 @@ impl Index { txn: &mut RwTxn, words: &fst::Set, ) -> Result<()> { - self.main.put::<_, Str, ByteSlice>( + self.main.remap_types::().put( txn, main_key::EXACT_WORDS, words.as_fst().as_bytes(), @@ -1313,7 +1415,8 @@ impl Index { pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result> { Ok(self .main - .get::<_, Str, SerdeBincode>>(txn, main_key::EXACT_ATTRIBUTES)? + .remap_types::>>() + .get(txn, main_key::EXACT_ATTRIBUTES)? .unwrap_or_default()) } @@ -1326,34 +1429,40 @@ impl Index { /// Writes the exact attributes to the database. pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>(txn, main_key::EXACT_ATTRIBUTES, &attrs)?; + self.main.remap_types::>().put( + txn, + main_key::EXACT_ATTRIBUTES, + &attrs, + )?; Ok(()) } /// Clears the exact attributes from the store. pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES) + self.main.remap_key_type::().delete(txn, main_key::EXACT_ATTRIBUTES) } pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result> { - self.main.get::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET) + self.main.remap_types::>().get(txn, main_key::MAX_VALUES_PER_FACET) } pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> { - self.main.put::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET, &val) + self.main.remap_types::>().put( + txn, + main_key::MAX_VALUES_PER_FACET, + &val, + ) } pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) + self.main.remap_key_type::().delete(txn, main_key::MAX_VALUES_PER_FACET) } pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result> { let mut orders = self .main - .get::<_, Str, SerdeJson>>( - txn, - main_key::SORT_FACET_VALUES_BY, - )? + .remap_types::>>() + .get(txn, main_key::SORT_FACET_VALUES_BY)? .unwrap_or_default(); // Insert the default ordering if it is not already overwritten by the user. orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); @@ -1365,15 +1474,17 @@ impl Index { txn: &mut RwTxn, val: &HashMap, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val) + self.main.remap_types::>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) } pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY) + self.main.remap_key_type::().delete(txn, main_key::SORT_FACET_VALUES_BY) } pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result> { - self.main.get::<_, Str, OwnedType>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) + self.main + .remap_types::>() + .get(txn, main_key::PAGINATION_MAX_TOTAL_HITS) } pub(crate) fn put_pagination_max_total_hits( @@ -1381,11 +1492,15 @@ impl Index { txn: &mut RwTxn, val: usize, ) -> heed::Result<()> { - self.main.put::<_, Str, OwnedType>(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val) + self.main.remap_types::>().put( + txn, + main_key::PAGINATION_MAX_TOTAL_HITS, + &val, + ) } pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) + self.main.remap_key_type::().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS) } /* script language docids */ diff --git a/milli/src/lib.rs b/milli/src/lib.rs index cfa438609..acea72c41 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -66,9 +66,9 @@ pub use self::search::{ pub type Result = std::result::Result; pub type Attribute = u32; -pub type BEU16 = heed::zerocopy::U16; -pub type BEU32 = heed::zerocopy::U32; -pub type BEU64 = heed::zerocopy::U64; +pub type BEU16 = heed::types::U16; +pub type BEU32 = heed::types::U32; +pub type BEU64 = heed::types::U64; pub type DocumentId = u32; pub type FastMap4 = HashMap>; pub type FastMap8 = HashMap>; diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index 26854bc1a..f90c331d7 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -25,11 +25,11 @@ where let inner; let left = match left { Bound::Included(left) => { - inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?; Bound::Included(inner.as_ref()) } Bound::Excluded(left) => { - inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?; Bound::Excluded(inner.as_ref()) } Bound::Unbounded => Bound::Unbounded, @@ -37,11 +37,11 @@ where let inner; let right = match right { Bound::Included(right) => { - inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?; Bound::Included(inner.as_ref()) } Bound::Excluded(right) => { - inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?; Bound::Excluded(inner.as_ref()) } Bound::Unbounded => Bound::Unbounded, diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index ebc9e1da0..0572d7297 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -22,8 +22,10 @@ fn facet_extreme_value<'t>( let extreme_value = if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; let (_, extreme_value) = extreme_value?; - - Ok(OrderedF64Codec::bytes_decode(extreme_value)) + OrderedF64Codec::bytes_decode(extreme_value) + .map(Some) + .map_err(heed::Error::Decoding) + .map_err(Into::into) } pub fn facet_min_value<'t>( @@ -60,13 +62,12 @@ where let mut level0prefix = vec![]; level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.push(0); - let mut level0_iter_forward = db - .as_polymorph() - .prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?; + let mut level0_iter_forward = + db.remap_types::().prefix_iter(txn, level0prefix.as_slice())?; if let Some(first) = level0_iter_forward.next() { let (first_key, _) = first?; let first_key = FacetGroupKeyCodec::::bytes_decode(first_key) - .ok_or(heed::Error::Encoding)?; + .map_err(heed::Error::Decoding)?; Ok(Some(first_key.left_bound)) } else { Ok(None) @@ -86,12 +87,12 @@ where level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.push(0); let mut level0_iter_backward = db - .as_polymorph() - .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?; + .remap_types::() + .rev_prefix_iter(txn, level0prefix.as_slice())?; if let Some(last) = level0_iter_backward.next() { let (last_key, _) = last?; let last_key = FacetGroupKeyCodec::::bytes_decode(last_key) - .ok_or(heed::Error::Encoding)?; + .map_err(heed::Error::Decoding)?; Ok(Some(last_key.left_bound)) } else { Ok(None) @@ -106,8 +107,8 @@ pub(crate) fn get_highest_level<'t>( ) -> heed::Result { let field_id_prefix = &field_id.to_be_bytes(); Ok(db - .as_polymorph() - .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, field_id_prefix)? + .remap_types::() + .rev_prefix_iter(txn, field_id_prefix)? .next() .map(|el| { let (key, _) = el.unwrap(); diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 786b565ae..ee8cd1faf 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -17,8 +17,7 @@ use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, - SearchContext, BEU16, + execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, SearchContext, }; // Building these factories is not free. @@ -299,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> { None => return Ok(Vec::new()), }; - let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { Some(fst) => fst, None => return Ok(vec![]), }; diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 2c670658d..5a74bc201 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -63,12 +63,14 @@ impl<'ctx> DatabaseCache<'ctx> { } match cache.get(&cache_key).unwrap() { - Some(Cow::Borrowed(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } - Some(Cow::Owned(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } + Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes) + .map_err(heed::Error::Decoding) + .map_err(Into::into) + .map(Some), + Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes) + .map_err(heed::Error::Decoding) + .map_err(Into::into) + .map(Some), None => Ok(None), } } @@ -110,12 +112,14 @@ impl<'ctx> DatabaseCache<'ctx> { } match cache.get(&cache_key).unwrap() { - Some(Cow::Borrowed(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } - Some(Cow::Owned(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } + Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes) + .map_err(heed::Error::Decoding) + .map_err(Into::into) + .map(Some), + Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes) + .map_err(heed::Error::Decoding) + .map_err(Into::into) + .map(Some), None => Ok(None), } } diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 6ceb78223..60386258e 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -50,9 +50,7 @@ use crate::distance::NDotProductPoint; use crate::error::FieldIdMapMissingEntry; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; -use crate::{ - AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32, -}; +use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; /// A structure used throughout the execution of a search query. pub struct SearchContext<'ctx> { @@ -451,8 +449,8 @@ pub fn execute_search( let mut docids = Vec::new(); let mut uniq_docids = RoaringBitmap::new(); for instant_distance::Item { distance: _, pid, point: _ } in neighbors { - let index = BEU32::new(pid.into_inner()); - let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get(); + let index = pid.into_inner(); + let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap(); if universe.contains(docid) && uniq_docids.insert(docid) { docids.push(docid); if docids.len() == (from + length) { diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index afe0191b1..59adda3e8 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -1,15 +1,16 @@ +use heed::RwTxn; use roaring::RoaringBitmap; use time::OffsetDateTime; use crate::{FieldDistribution, Index, Result}; -pub struct ClearDocuments<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct ClearDocuments<'t, 'i> { + wtxn: &'t mut RwTxn<'i>, index: &'i Index, } -impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { - pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> ClearDocuments<'t, 'u, 'i> { +impl<'t, 'i> ClearDocuments<'t, 'i> { + pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> { ClearDocuments { wtxn, index } } diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index 5626a4aae..bfee002fd 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -3,7 +3,7 @@ use std::io::BufReader; use grenad::CompressionType; use heed::types::ByteSlice; -use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn}; +use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn}; use roaring::RoaringBitmap; use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; @@ -146,7 +146,13 @@ impl FacetsUpdateBulkInner { buffer.push(1); // then we extend the buffer with the docids bitmap buffer.extend_from_slice(value); - unsafe { database.append(key, &buffer)? }; + unsafe { + database.put_current_with_options::( + PutFlags::APPEND, + key, + &buffer, + )? + }; } } else { let mut buffer = Vec::new(); @@ -219,8 +225,8 @@ impl FacetsUpdateBulkInner { let level_0_iter = self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())? + .remap_types::() + .prefix_iter(rtxn, level_0_prefix.as_slice())? .remap_types::, FacetGroupValueCodec>(); let mut left_bound: &[u8] = &[]; @@ -308,10 +314,10 @@ impl FacetsUpdateBulkInner { { let key = FacetGroupKey { field_id, level, left_bound }; let key = FacetGroupKeyCodec::::bytes_encode(&key) - .ok_or(Error::Encoding)?; + .map_err(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = - FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; + FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?; cur_writer.insert(key, value)?; cur_writer_len += 1; } @@ -337,9 +343,9 @@ impl FacetsUpdateBulkInner { { let key = FacetGroupKey { field_id, level, left_bound }; let key = FacetGroupKeyCodec::::bytes_encode(&key) - .ok_or(Error::Encoding)?; + .map_err(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; - let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; + let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?; cur_writer.insert(key, value)?; cur_writer_len += 1; } diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index e241c499c..dc47ca126 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -68,18 +68,18 @@ impl FacetsUpdateIncremental { continue; } let key = FacetGroupKeyCodec::::bytes_decode(key) - .ok_or(heed::Error::Encoding)?; + .map_err(heed::Error::Encoding)?; let value = KvReader::new(value); let docids_to_delete = value .get(DelAdd::Deletion) .map(CboRoaringBitmapCodec::bytes_decode) - .map(|o| o.ok_or(heed::Error::Encoding)); + .map(|o| o.map_err(heed::Error::Encoding)); let docids_to_add = value .get(DelAdd::Addition) .map(CboRoaringBitmapCodec::bytes_decode) - .map(|o| o.ok_or(heed::Error::Encoding)); + .map(|o| o.map_err(heed::Error::Encoding)); if let Some(docids_to_delete) = docids_to_delete { let docids_to_delete = docids_to_delete?; @@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner { prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.push(level); - let mut iter = - self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( - txn, - prefix.as_slice(), - )?; + let mut iter = self + .db + .remap_types::() + .prefix_iter(txn, prefix.as_slice())?; let (key_bytes, value) = iter.next().unwrap()?; Ok(( FacetGroupKeyCodec::::bytes_decode(key_bytes) - .ok_or(Error::Encoding)? + .map_err(Error::Encoding)? .into_owned(), value, )) @@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner { level0_prefix.extend_from_slice(&field_id.to_be_bytes()); level0_prefix.push(0); - let mut iter = self - .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?; + let mut iter = + self.db.remap_types::().prefix_iter(txn, &level0_prefix)?; if iter.next().is_none() { drop(iter); @@ -384,8 +381,8 @@ impl FacetsUpdateIncrementalInner { let size_highest_level = self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)? + .remap_types::() + .prefix_iter(txn, &highest_level_prefix)? .count(); if size_highest_level < self.group_size as usize * self.min_level_size as usize { @@ -394,8 +391,8 @@ impl FacetsUpdateIncrementalInner { let mut groups_iter = self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?; + .remap_types::() + .prefix_iter(txn, &highest_level_prefix)?; let nbr_new_groups = size_highest_level / self.group_size as usize; let nbr_leftover_elements = size_highest_level % self.group_size as usize; @@ -407,7 +404,7 @@ impl FacetsUpdateIncrementalInner { for _ in 0..group_size { let (key_bytes, value_i) = groups_iter.next().unwrap()?; let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) - .ok_or(Error::Encoding)?; + .map_err(Error::Encoding)?; if first_key.is_none() { first_key = Some(key_i); @@ -430,7 +427,7 @@ impl FacetsUpdateIncrementalInner { for _ in 0..nbr_leftover_elements { let (key_bytes, value_i) = groups_iter.next().unwrap()?; let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) - .ok_or(Error::Encoding)?; + .map_err(Error::Encoding)?; if first_key.is_none() { first_key = Some(key_i); @@ -597,8 +594,8 @@ impl FacetsUpdateIncrementalInner { if highest_level == 0 || self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)? + .remap_types::() + .prefix_iter(txn, &highest_level_prefix)? .count() >= self.min_level_size as usize { @@ -607,13 +604,13 @@ impl FacetsUpdateIncrementalInner { let mut to_delete = vec![]; let mut iter = self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?; + .remap_types::() + .prefix_iter(txn, &highest_level_prefix)?; for el in iter.by_ref() { let (k, _) = el?; to_delete.push( FacetGroupKeyCodec::::bytes_decode(k) - .ok_or(Error::Encoding)? + .map_err(Error::Encoding)? .into_owned(), ); } diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 52fea0f5f..9c1d2a370 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -95,7 +95,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu use crate::heed_codec::ByteSliceRefCodec; use crate::update::index_documents::create_sorter; use crate::update::merge_btreeset_string; -use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH}; +use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH}; pub mod bulk; pub mod incremental; @@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> { } let set = BTreeSet::from_iter(std::iter::once(left_bound)); let key = (field_id, normalized_facet.as_ref()); - let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; - let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?; + let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; + let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?; sorter.insert(key, val)?; } } @@ -252,7 +252,7 @@ impl<'i> FacetsUpdate<'i> { // We write those FSTs in LMDB now for (field_id, fst) in text_fsts { - self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?; + self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?; } Ok(()) diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 3fcec3e79..b7de1e621 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -6,8 +6,8 @@ use std::io::{self, BufReader}; use std::mem::size_of; use std::result::Result as StdResult; +use bytemuck::bytes_of; use grenad::Sorter; -use heed::zerocopy::AsBytes; use heed::BytesEncode; use itertools::EitherOrBoth; use ordered_float::OrderedFloat; @@ -20,9 +20,7 @@ use crate::error::InternalError; use crate::facet::value_encoding::f64_into_bytes; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::index_documents::{create_writer, writer_into_reader}; -use crate::{ - CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH, -}; +use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH}; /// The length of the elements that are always in the buffer when inserting new values. const TRUNCATE_SIZE: usize = size_of::() + size_of::(); @@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values( strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); - let document = BEU32::from(document).get(); + let document = DocumentId::from_be_bytes(document); // For the other extraction tasks, prefix the key with the field_id and the document_id numbers_key_buffer.extend_from_slice(docid_bytes); @@ -323,7 +321,7 @@ where // We insert only the Del part of the Obkv to inform // that we only want to remove all those numbers. let mut obkv = KvWriterDelAdd::memory(); - obkv.insert(DelAdd::Deletion, ().as_bytes())?; + obkv.insert(DelAdd::Deletion, bytes_of(&()))?; let bytes = obkv.into_inner()?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } @@ -336,7 +334,7 @@ where // We insert only the Add part of the Obkv to inform // that we only want to remove all those numbers. let mut obkv = KvWriterDelAdd::memory(); - obkv.insert(DelAdd::Addition, ().as_bytes())?; + obkv.insert(DelAdd::Addition, bytes_of(&()))?; let bytes = obkv.into_inner()?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index f278012c7..66092821f 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -118,7 +118,7 @@ pub fn extract_word_docids( } let (word, fid) = StrBEU16Codec::bytes_decode(key) - .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; + .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; // every words contained in an attribute set to exact must be pushed in the exact_words list. if exact_attributes.contains(&fid) { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 113114681..82858a09d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod { } } -pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct IndexDocuments<'t, 'i, 'a, FP, FA> { + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, config: IndexDocumentsConfig, indexer_config: &'a IndexerConfig, @@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig { pub autogenerate_docids: bool, } -impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA> +impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA> where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, indexer_config: &'a IndexerConfig, config: IndexDocumentsConfig, progress: FP, should_abort: FA, - ) -> Result> { + ) -> Result> { let transform = Some(Transform::new( wtxn, index, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 323bc3da7..0c70b592f 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -24,9 +24,7 @@ use crate::index::{db_name, main_key}; use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd}; use crate::update::index_documents::GrenadParameters; use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; -use crate::{ - FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32, -}; +use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result}; pub struct TransformOutput { pub primary_key: String, @@ -245,7 +243,7 @@ impl<'a, 'i> Transform<'a, 'i> { let mut skip_insertion = false; if let Some(original_docid) = original_docid { - let original_key = BEU32::new(original_docid); + let original_key = original_docid; let base_obkv = self .index .documents @@ -499,7 +497,7 @@ impl<'a, 'i> Transform<'a, 'i> { self.replaced_documents_ids.insert(internal_docid); // fetch the obkv document - let original_key = BEU32::new(internal_docid); + let original_key = internal_docid; let base_obkv = self .index .documents @@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> { // TODO this can be done in parallel by using the rayon `ThreadPool`. pub fn prepare_for_documents_reindexing( self, - wtxn: &mut heed::RwTxn<'i, '_>, + wtxn: &mut heed::RwTxn<'i>, old_fields_ids_map: FieldsIdsMap, mut new_fields_ids_map: FieldsIdsMap, ) -> Result { @@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> { let obkv = self.index.documents.get(wtxn, &docid)?.ok_or( InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None }, )?; - let docid = docid.get(); obkv_buffer.clear(); let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer); diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 4f9f0ef6f..6ec00caae 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec; use charabia::{Language, Script}; use grenad::MergerBuilder; use heed::types::ByteSlice; -use heed::RwTxn; +use heed::{PutFlags, RwTxn}; use log::error; use obkv::{KvReader, KvWriter}; use ordered_float::OrderedFloat; @@ -27,9 +27,7 @@ use crate::index::Hnsw; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at}; -use crate::{ - lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32, -}; +use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError}; pub(crate) enum TypedChunk { FieldIdDocidFacetStrings(grenad::Reader), @@ -149,7 +147,7 @@ pub(crate) fn write_typed_chunk_into_index( let db = index.documents.remap_data_type::(); if !writer.is_empty() { - db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?; + db.put(wtxn, &docid, &writer.into_inner().unwrap())?; operations.push(DocumentOperation { external_id: external_id.to_string(), internal_id: docid, @@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index( }); docids.insert(docid); } else { - db.delete(wtxn, &BEU32::new(docid))?; + db.delete(wtxn, &docid)?; operations.push(DocumentOperation { external_id: external_id.to_string(), internal_id: docid, @@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index( // We extract and store the previous vectors if let Some(hnsw) = index.vector_hnsw(wtxn)? { for (pid, point) in hnsw.iter() { - let pid_key = BEU32::new(pid.into_inner()); - let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get(); + let pid_key = pid.into_inner(); + let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap(); let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect(); vectors_set.insert((docid, vector)); } @@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index( // Store the vectors in the point-docid relation database index.vector_id_docid.clear(wtxn)?; for (docid, pid) in docids.into_iter().zip(pids) { - index.vector_id_docid.put( - wtxn, - &BEU32::new(pid.into_inner()), - &BEU32::new(docid), - )?; + index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?; } log::debug!("There are {} entries in the HNSW so far", hnsw_length); @@ -568,14 +562,17 @@ where while let Some((key, value)) = cursor.move_on_next()? { if valid_lmdb_key(key) { debug_assert!( - K::bytes_decode(key).is_some(), + K::bytes_decode(key).is_ok(), "Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}", key.len(), &key ); buffer.clear(); let value = serialize_value(value, &mut buffer)?; - unsafe { database.append(key, value)? }; + unsafe { + // safety: We do not keep a reference to anything that lives inside the database + database.put_current_with_options::(PutFlags::APPEND, key, value)? + }; } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index fd7ffa760..926c89cf5 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -100,8 +100,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { } } -pub struct Settings<'a, 't, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct Settings<'a, 't, 'i> { + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, indexer_config: &'a IndexerConfig, @@ -129,12 +129,12 @@ pub struct Settings<'a, 't, 'u, 'i> { pagination_max_total_hits: Setting, } -impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { +impl<'a, 't, 'i> Settings<'a, 't, 'i> { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, indexer_config: &'a IndexerConfig, - ) -> Settings<'a, 't, 'u, 'i> { + ) -> Settings<'a, 't, 'i> { Settings { wtxn, index, diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 618f451dc..5d60c3765 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -12,8 +12,8 @@ use crate::update::index_documents::{ }; use crate::{CboRoaringBitmapCodec, Result}; -pub struct WordPrefixDocids<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct WordPrefixDocids<'t, 'i> { + wtxn: &'t mut heed::RwTxn<'i>, word_docids: Database, word_prefix_docids: Database, pub(crate) chunk_compression_type: CompressionType, @@ -22,12 +22,12 @@ pub struct WordPrefixDocids<'t, 'u, 'i> { pub(crate) max_memory: Option, } -impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { +impl<'t, 'i> WordPrefixDocids<'t, 'i> { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, word_docids: Database, word_prefix_docids: Database, - ) -> WordPrefixDocids<'t, 'u, 'i> { + ) -> WordPrefixDocids<'t, 'i> { WordPrefixDocids { wtxn, word_docids, diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index e083f510a..a763ecaeb 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -17,8 +17,8 @@ use crate::update::index_documents::{ }; use crate::{CboRoaringBitmapCodec, Result}; -pub struct WordPrefixIntegerDocids<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct WordPrefixIntegerDocids<'t, 'i> { + wtxn: &'t mut heed::RwTxn<'i>, prefix_database: Database, word_database: Database, pub(crate) chunk_compression_type: CompressionType, @@ -27,12 +27,12 @@ pub struct WordPrefixIntegerDocids<'t, 'u, 'i> { pub(crate) max_memory: Option, } -impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { +impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, prefix_database: Database, word_database: Database, - ) -> WordPrefixIntegerDocids<'t, 'u, 'i> { + ) -> WordPrefixIntegerDocids<'t, 'i> { WordPrefixIntegerDocids { wtxn, prefix_database, @@ -72,7 +72,8 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { let mut current_prefixes: Option<&&[String]> = None; let mut prefixes_cache = HashMap::new(); while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? { - let (word, pos) = StrBEU16Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?; + let (word, pos) = + StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?; current_prefixes = match current_prefixes.take() { Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes), diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index 121b45c4a..f26bf93e5 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -2,21 +2,19 @@ use std::iter::{repeat_with, FromIterator}; use std::str; use fst::{SetBuilder, Streamer}; +use heed::RwTxn; use crate::{Index, Result, SmallString32}; -pub struct WordsPrefixesFst<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct WordsPrefixesFst<'t, 'i> { + wtxn: &'t mut RwTxn<'i>, index: &'i Index, threshold: u32, max_prefix_length: usize, } -impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> { - pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, - index: &'i Index, - ) -> WordsPrefixesFst<'t, 'u, 'i> { +impl<'t, 'i> WordsPrefixesFst<'t, 'i> { + pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> WordsPrefixesFst<'t, 'i> { WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 } }