4223: Update to heed 0.20 r=dureuill a=Kerollmops

This PR brings the v0.20-alpha.9 version of heed into Meilisearch 🎉 The main goal is to test it in a real environment to make the necessary changes if needed. We also want to merge it as soon as possible during the pre-release phase to ensure we catch bugs before the release.

Most of the calls to heed are the same as before, except:
 - The `PolyDatabase` has been replaced with a `Database<Unspecified, Unspecified>`. We replaced the `get<T, U>()` by a `remap<T, U>().get()` calls.
 - The `Database` `append(...)` method has been replaced with a `put_with_flags(PutFlags::APPEND, ...)`.
 - The `RwTxn<'e, 'p>` has been simplified into a `RwTxn<'e>`.
 - The `BytesEncode/Decode` traits return a `Result<_, BoxedError>` instead of an `Option<_>`.
 - We no longer need to wrap and unwrap the `BEU32` integer when storing/getting them from heed.

### TODO
 - [x] Create actual, simple error types instead of using strings in the codecs.

### Follow-up work
 - Move the codecs into another member crate (we depend on the uuid one in the meilitool crate).
 - Display the internal decoding error in the `SerializationError` internal error variant.

Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2023-11-28 13:39:44 +00:00 committed by GitHub
commit 6376c342c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
83 changed files with 962 additions and 904 deletions

103
Cargo.lock generated
View File

@ -520,6 +520,9 @@ name = "bitflags"
version = "2.3.3" version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
@ -1255,6 +1258,15 @@ dependencies = [
"syn 2.0.28", "syn 2.0.28",
] ]
[[package]]
name = "doxygen-rs"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
dependencies = [
"phf",
]
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.5.0" version = "1.5.0"
@ -1811,36 +1823,40 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]] [[package]]
name = "heed" name = "heed"
version = "0.12.7" version = "0.20.0-alpha.9"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
dependencies = [ dependencies = [
"bitflags 2.3.3",
"bytemuck",
"byteorder", "byteorder",
"heed-traits", "heed-traits",
"heed-types", "heed-types",
"libc", "libc",
"lmdb-rkv-sys", "lmdb-master-sys",
"once_cell", "once_cell",
"page_size 0.4.2", "page_size 0.6.0",
"synchronoise", "synchronoise",
"url", "url",
"zerocopy",
] ]
[[package]] [[package]]
name = "heed-traits" name = "heed-traits"
version = "0.7.0" version = "0.20.0-alpha.9"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
[[package]] [[package]]
name = "heed-types" name = "heed-types"
version = "0.7.2" version = "0.20.0-alpha.9"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder",
"heed-traits", "heed-traits",
"serde", "serde",
"serde_json", "serde_json",
"zerocopy",
] ]
[[package]] [[package]]
@ -2968,11 +2984,13 @@ dependencies = [
] ]
[[package]] [[package]]
name = "lmdb-rkv-sys" name = "lmdb-master-sys"
version = "0.15.1" version = "0.1.0"
source = "git+https://github.com/meilisearch/lmdb-rs#501aa34a1ab7f092e3ff54a6c22ff6c55931a2d8" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
dependencies = [ dependencies = [
"cc", "cc",
"doxygen-rs",
"libc", "libc",
"pkg-config", "pkg-config",
] ]
@ -3472,9 +3490,9 @@ dependencies = [
[[package]] [[package]]
name = "page_size" name = "page_size"
version = "0.4.2" version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561"
dependencies = [ dependencies = [
"libc", "libc",
"winapi", "winapi",
@ -3482,9 +3500,9 @@ dependencies = [
[[package]] [[package]]
name = "page_size" name = "page_size"
version = "0.5.0" version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561" checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
dependencies = [ dependencies = [
"libc", "libc",
"winapi", "winapi",
@ -3630,6 +3648,7 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [ dependencies = [
"phf_macros",
"phf_shared", "phf_shared",
] ]
@ -3653,6 +3672,19 @@ dependencies = [
"rand", "rand",
] ]
[[package]]
name = "phf_macros"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
"syn 2.0.28",
]
[[package]] [[package]]
name = "phf_shared" name = "phf_shared"
version = "0.11.2" version = "0.11.2"
@ -4479,18 +4511,6 @@ dependencies = [
"crossbeam-queue", "crossbeam-queue",
] ]
[[package]]
name = "synstructure"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"unicode-xid",
]
[[package]] [[package]]
name = "synstructure" name = "synstructure"
version = "0.13.0" version = "0.13.0"
@ -5359,28 +5379,7 @@ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.28", "syn 2.0.28",
"synstructure 0.13.0", "synstructure",
]
[[package]]
name = "zerocopy"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6580539ad917b7c026220c4b3f2c08d52ce54d6ce0dc491e66002e35388fab46"
dependencies = [
"byteorder",
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb"
dependencies = [
"proc-macro2",
"syn 1.0.109",
"synstructure 0.12.6",
] ]
[[package]] [[package]]
@ -5401,7 +5400,7 @@ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.28", "syn 2.0.28",
"synstructure 0.13.0", "synstructure",
] ]
[[package]] [[package]]

View File

@ -36,7 +36,7 @@ fn setup_index() -> Index {
} }
fn setup_settings<'t>( fn setup_settings<'t>(
wtxn: &mut RwTxn<'t, '_>, wtxn: &mut RwTxn<'t>,
index: &'t Index, index: &'t Index,
primary_key: &str, primary_key: &str,
searchable_fields: &[&str], searchable_fields: &[&str],

View File

@ -13,12 +13,12 @@ use crate::{Result, Version};
mod compat; mod compat;
pub(self) mod v1; mod v1;
pub(self) mod v2; mod v2;
pub(self) mod v3; mod v3;
pub(self) mod v4; mod v4;
pub(self) mod v5; mod v5;
pub(self) mod v6; mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>; pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>; pub type UpdateFile = dyn Iterator<Item = Result<Document>>;

View File

@ -56,8 +56,7 @@ pub enum RankingRule {
Desc(String), Desc(String),
} }
static ASC_DESC_REGEX: Lazy<Regex> = static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
impl FromStr for RankingRule { impl FromStr for RankingRule {
type Err = (); type Err = ();

View File

@ -564,10 +564,10 @@ pub mod tests {
#[test] #[test]
fn parse_escaped() { fn parse_escaped() {
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies // but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}"); insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
} }

View File

@ -270,8 +270,8 @@ pub mod test {
("aaaa", "", rtok("", "aaaa"), "aaaa"), ("aaaa", "", rtok("", "aaaa"), "aaaa"),
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"), (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#), (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#), (r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#), (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"),
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#), (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#), (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
]; ];
@ -301,12 +301,12 @@ pub mod test {
); );
// simple quote // simple quote
assert_eq!( assert_eq!(
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''), unescape(Span::new_extra(r"Hello \'World\'", ""), '\''),
r#"Hello 'World'"#.to_string() r#"Hello 'World'"#.to_string()
); );
assert_eq!( assert_eq!(
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''), unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''),
r#"Hello \\'World\\'"#.to_string() r"Hello \\'World\\'".to_string()
); );
} }
@ -335,19 +335,19 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false), ("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false), ("I'm tamo", "I", false),
// escaped thing but not quote // escaped thing but not quote
(r#""\\""#, r#"\"#, true), (r#""\\""#, r"\", true),
(r#""\\\\\\""#, r#"\\\"#, true), (r#""\\\\\\""#, r"\\\", true),
(r#""aa\\aa""#, r#"aa\aa"#, true), (r#""aa\\aa""#, r"aa\aa", true),
// with double quote // with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true), (r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true), (r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true), (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true), (r#""\"\"""#, r#""""#, true),
// with simple quote // with simple quote
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true), (r"'Hello \'world\''", r#"Hello 'world'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true), (r"'Hello \\\'world\\\''", r"Hello \'world\'", true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true), (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r#"'\'\''"#, r#"''"#, true), (r"'\'\''", r#"''"#, true),
]; ];
for (input, expected, escaped) in test_case { for (input, expected, escaped) in test_case {

View File

@ -113,7 +113,7 @@ fn main() {
index.documents(&wtxn, res.documents_ids).unwrap(); index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed); progression.fetch_add(1, Ordering::Relaxed);
} }
wtxn.abort().unwrap(); wtxn.abort();
}); });
if let err @ Err(_) = handle.join() { if let err @ Err(_) = handle.join() {
stop.store(true, Ordering::Relaxed); stop.store(true, Ordering::Relaxed);

View File

@ -32,7 +32,7 @@ use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{ use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
}; };
use meilisearch_types::milli::{self, Filter, BEU32}; use meilisearch_types::milli::{self, Filter};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -715,7 +715,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env // 2. Snapshot the index-scheduler LMDB env
// //
// When we call copy_to_path, LMDB opens a read transaction by itself, // When we call copy_to_file, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know // we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy // the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks. // of the env and the new transaction we open to retrieve the enqueued tasks.
@ -728,7 +728,7 @@ impl IndexScheduler {
// 2.1 First copy the LMDB env of the index-scheduler // 2.1 First copy the LMDB env of the index-scheduler
let dst = temp_snapshot_dir.path().join("tasks"); let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler // 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -753,7 +753,7 @@ impl IndexScheduler {
let index = self.index_mapper.index(&rtxn, name)?; let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
} }
drop(rtxn); drop(rtxn);
@ -766,7 +766,7 @@ impl IndexScheduler {
.map_size(1024 * 1024 * 1024) // 1 GiB .map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2) .max_dbs(2)
.open(&self.auth_path)?; .open(&self.auth_path)?;
auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot
// 5.1 Find the original name of the database // 5.1 Find the original name of the database
@ -1106,7 +1106,7 @@ impl IndexScheduler {
for task_id in &index_lhs_task_ids | &index_rhs_task_ids { for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs)); swap_index_uid_in_task(&mut task, (lhs, rhs));
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?; self.all_tasks.put(wtxn, &task_id, &task)?;
} }
// 4. remove the task from indexuid = before_name // 4. remove the task from indexuid = before_name
@ -1132,7 +1132,7 @@ impl IndexScheduler {
/// The list of processed tasks. /// The list of processed tasks.
fn apply_index_operation<'i>( fn apply_index_operation<'i>(
&self, &self,
index_wtxn: &mut RwTxn<'i, '_>, index_wtxn: &mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
@ -1479,10 +1479,9 @@ impl IndexScheduler {
} }
for task in to_delete_tasks.iter() { for task in to_delete_tasks.iter() {
self.all_tasks.delete(wtxn, &BEU32::new(task))?; self.all_tasks.delete(wtxn, &task)?;
} }
for canceled_by in affected_canceled_by { for canceled_by in affected_canceled_by {
let canceled_by = BEU32::new(canceled_by);
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
tasks -= &to_delete_tasks; tasks -= &to_delete_tasks;
if tasks.is_empty() { if tasks.is_empty() {
@ -1530,14 +1529,14 @@ impl IndexScheduler {
task.details = task.details.map(|d| d.to_failed()); task.details = task.details.map(|d| d.to_failed());
self.update_task(wtxn, &task)?; self.update_task(wtxn, &task)?;
} }
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?; self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?;
Ok(content_files_to_delete) Ok(content_files_to_delete)
} }
} }
fn delete_document_by_filter<'a>( fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a, '_>, wtxn: &mut RwTxn<'a>,
filter: &serde_json::Value, filter: &serde_json::Value,
indexer_config: &IndexerConfig, indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing, must_stop_processing: MustStopProcessing,

View File

@ -1,12 +1,8 @@
/// the map size to use when we don't succeed in reading it in indexes.
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::path::Path; use std::path::Path;
use std::time::Duration; use std::time::Duration;
use meilisearch_types::heed::flags::Flags; use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index; use meilisearch_types::milli::Index;
use time::OffsetDateTime; use time::OffsetDateTime;
use uuid::Uuid; use uuid::Uuid;
@ -236,7 +232,7 @@ impl IndexMap {
enable_mdb_writemap: bool, enable_mdb_writemap: bool,
map_size_growth: usize, map_size_growth: usize,
) { ) {
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth; let map_size = index.map_size() + map_size_growth;
let closing_event = index.prepare_for_closing(); let closing_event = index.prepare_for_closing();
let generation = self.next_generation(); let generation = self.next_generation();
self.unavailable.insert( self.unavailable.insert(
@ -309,7 +305,7 @@ fn create_or_open_index(
options.map_size(clamp_to_page_size(map_size)); options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024); options.max_readers(1024);
if enable_mdb_writemap { if enable_mdb_writemap {
unsafe { options.flag(Flags::MdbWriteMap) }; unsafe { options.flags(EnvFlags::WRITE_MAP) };
} }
if let Some((created, updated)) = date { if let Some((created, updated)) = date {
@ -388,7 +384,7 @@ mod tests {
fn assert_index_size(index: Index, expected: usize) { fn assert_index_size(index: Index, expected: usize) {
let expected = clamp_to_page_size(expected); let expected = clamp_to_page_size(expected);
let index_map_size = index.map_size().unwrap(); let index_map_size = index.map_size();
assert_eq!(index_map_size, expected); assert_eq!(index_map_size, expected);
} }
} }

View File

@ -1,7 +1,7 @@
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fmt::Write; use std::fmt::Write;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn}; use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Task}; use meilisearch_types::tasks::{Details, Task};
@ -115,7 +115,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
snap snap
} }
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String { pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String {
let mut snap = String::new(); let mut snap = String::new();
let iter = db.iter(rtxn).unwrap(); let iter = db.iter(rtxn).unwrap();
for next in iter { for next in iter {
@ -125,10 +125,7 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson
snap snap
} }
pub fn snapshot_date_db( pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
rtxn: &RoTxn,
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
) -> String {
let mut snap = String::new(); let mut snap = String::new();
let iter = db.iter(rtxn).unwrap(); let iter = db.iter(rtxn).unwrap();
for next in iter { for next in iter {
@ -248,10 +245,7 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
} }
snap snap
} }
pub fn snapshot_canceled_by( pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
rtxn: &RoTxn,
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
) -> String {
let mut snap = String::new(); let mut snap = String::new();
let iter = db.iter(rtxn).unwrap(); let iter = db.iter(rtxn).unwrap();
for next in iter { for next in iter {

View File

@ -47,8 +47,9 @@ pub use features::RoFeatures;
use file_store::FileStore; use file_store::FileStore;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
@ -64,8 +65,7 @@ use uuid::Uuid;
use crate::index_mapper::IndexMapper; use crate::index_mapper::IndexMapper;
use crate::utils::{check_index_swap_validity, clamp_to_page_size}; use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 = pub(crate) type BEI128 = I128<BE>;
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. /// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
/// ///
@ -278,7 +278,7 @@ pub struct IndexScheduler {
pub(crate) file_store: FileStore, pub(crate) file_store: FileStore,
// The main database, it contains all the tasks accessible by their Id. // The main database, it contains all the tasks accessible by their Id.
pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>, pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
/// All the tasks ids grouped by their status. /// All the tasks ids grouped by their status.
// TODO we should not be able to serialize a `Status::Processing` in this database. // TODO we should not be able to serialize a `Status::Processing` in this database.
@ -289,16 +289,16 @@ pub struct IndexScheduler {
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>, pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the tasks that were canceled by a task uid /// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<OwnedType<BEU32>, RoaringBitmapCodec>, pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date /// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>, pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date /// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>, pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date /// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>, pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
/// In charge of creating, opening, storing and returning indexes. /// In charge of creating, opening, storing and returning indexes.
pub(crate) index_mapper: IndexMapper, pub(crate) index_mapper: IndexMapper,
@ -730,9 +730,7 @@ impl IndexScheduler {
if let Some(canceled_by) = &query.canceled_by { if let Some(canceled_by) = &query.canceled_by {
let mut all_canceled_tasks = RoaringBitmap::new(); let mut all_canceled_tasks = RoaringBitmap::new();
for cancel_task_uid in canceled_by { for cancel_task_uid in canceled_by {
if let Some(canceled_by_uid) = if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? {
self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))?
{
all_canceled_tasks |= canceled_by_uid; all_canceled_tasks |= canceled_by_uid;
} }
} }
@ -983,7 +981,7 @@ impl IndexScheduler {
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty()) if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
&& (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50 && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
{ {
return Err(Error::NoSpaceLeftInTaskQueue); return Err(Error::NoSpaceLeftInTaskQueue);
} }
@ -1009,7 +1007,7 @@ impl IndexScheduler {
// Get rid of the mutability. // Get rid of the mutability.
let task = task; let task = task;
self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?; self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?;
for index in task.indexes() { for index in task.indexes() {
self.update_index(&mut wtxn, index, |bitmap| { self.update_index(&mut wtxn, index, |bitmap| {
@ -1187,7 +1185,7 @@ impl IndexScheduler {
| Err(Error::AbortedTask) => { | Err(Error::AbortedTask) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::AbortedIndexation); self.breakpoint(Breakpoint::AbortedIndexation);
wtxn.abort().map_err(Error::HeedTransaction)?; wtxn.abort();
// We make sure that we don't call `stop_processing` on the `processing_tasks`, // We make sure that we don't call `stop_processing` on the `processing_tasks`,
// this is because we want to let the next tick call `create_next_batch` and keep // this is because we want to let the next tick call `create_next_batch` and keep
@ -1208,7 +1206,7 @@ impl IndexScheduler {
let index_uid = index_uid.unwrap(); let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen // fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?; self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort().map_err(Error::HeedTransaction)?; wtxn.abort();
return Ok(TickOutcome::TickAgain(0)); return Ok(TickOutcome::TickAgain(0));
} }
@ -1354,7 +1352,7 @@ impl IndexScheduler {
pub struct Dump<'a> { pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler, index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a, 'a>, wtxn: RwTxn<'a>,
indexes: HashMap<String, RoaringBitmap>, indexes: HashMap<String, RoaringBitmap>,
statuses: HashMap<Status, RoaringBitmap>, statuses: HashMap<Status, RoaringBitmap>,
@ -1469,7 +1467,7 @@ impl<'a> Dump<'a> {
}, },
}; };
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?; self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
for index in task.indexes() { for index in task.indexes() {
match self.indexes.get_mut(index) { match self.indexes.get_mut(index) {
@ -1511,8 +1509,8 @@ impl<'a> Dump<'a> {
} }
} }
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid); self.statuses.entry(task.status).or_default().insert(task.uid);
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid); self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
Ok(task) Ok(task)
} }

View File

@ -3,9 +3,9 @@
use std::collections::{BTreeSet, HashSet}; use std::collections::{BTreeSet, HashSet};
use std::ops::Bound; use std::ops::Bound;
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType}; use meilisearch_types::heed::types::DecodeIgnore;
use meilisearch_types::heed::{Database, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime; use time::OffsetDateTime;
@ -18,7 +18,7 @@ impl IndexScheduler {
} }
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> { pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1)) Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
} }
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> { pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
@ -26,7 +26,7 @@ impl IndexScheduler {
} }
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> { pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?) Ok(self.all_tasks.get(rtxn, &task_id)?)
} }
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@ -88,7 +88,7 @@ impl IndexScheduler {
} }
} }
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?; self.all_tasks.put(wtxn, &task.uid, task)?;
Ok(()) Ok(())
} }
@ -169,11 +169,11 @@ impl IndexScheduler {
pub(crate) fn insert_task_datetime( pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>, database: Database<BEI128, CboRoaringBitmapCodec>,
time: OffsetDateTime, time: OffsetDateTime,
task_id: TaskId, task_id: TaskId,
) -> Result<()> { ) -> Result<()> {
let timestamp = BEI128::new(time.unix_timestamp_nanos()); let timestamp = time.unix_timestamp_nanos();
let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default(); let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
task_ids.insert(task_id); task_ids.insert(task_id);
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?; database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(
pub(crate) fn remove_task_datetime( pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>, database: Database<BEI128, CboRoaringBitmapCodec>,
time: OffsetDateTime, time: OffsetDateTime,
task_id: TaskId, task_id: TaskId,
) -> Result<()> { ) -> Result<()> {
let timestamp = BEI128::new(time.unix_timestamp_nanos()); let timestamp = time.unix_timestamp_nanos();
if let Some(mut existing) = database.get(wtxn, &timestamp)? { if let Some(mut existing) = database.get(wtxn, &timestamp)? {
existing.remove(task_id); existing.remove(task_id);
if existing.is_empty() { if existing.is_empty() {
@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn keep_tasks_within_datetimes( pub(crate) fn keep_tasks_within_datetimes(
rtxn: &RoTxn, rtxn: &RoTxn,
tasks: &mut RoaringBitmap, tasks: &mut RoaringBitmap,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>, database: Database<BEI128, CboRoaringBitmapCodec>,
after: Option<OffsetDateTime>, after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>, before: Option<OffsetDateTime>,
) -> Result<()> { ) -> Result<()> {
@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)), (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
}; };
let mut collected_task_ids = RoaringBitmap::new(); let mut collected_task_ids = RoaringBitmap::new();
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos())); let start = map_bound(start, |b| b.unix_timestamp_nanos());
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos())); let end = map_bound(end, |b| b.unix_timestamp_nanos());
let iter = database.range(rtxn, &(start, end))?; let iter = database.range(rtxn, &(start, end))?;
for r in iter { for r in iter {
let (_timestamp, task_ids) = r?; let (_timestamp, task_ids) = r?;
@ -337,8 +337,6 @@ impl IndexScheduler {
let rtxn = self.env.read_txn().unwrap(); let rtxn = self.env.read_txn().unwrap();
for task in self.all_tasks.iter(&rtxn).unwrap() { for task in self.all_tasks.iter(&rtxn).unwrap() {
let (task_id, task) = task.unwrap(); let (task_id, task) = task.unwrap();
let task_id = task_id.get();
let task_index_uid = task.index_uid().map(ToOwned::to_owned); let task_index_uid = task.index_uid().map(ToOwned::to_owned);
let Task { let Task {
@ -361,16 +359,13 @@ impl IndexScheduler {
.unwrap() .unwrap()
.contains(task.uid)); .contains(task.uid));
} }
let db_enqueued_at = self let db_enqueued_at =
.enqueued_at self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap();
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_enqueued_at.contains(task_id)); assert!(db_enqueued_at.contains(task_id));
if let Some(started_at) = started_at { if let Some(started_at) = started_at {
let db_started_at = self let db_started_at = self
.started_at .started_at
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos())) .get(&rtxn, &started_at.unix_timestamp_nanos())
.unwrap() .unwrap()
.unwrap(); .unwrap();
assert!(db_started_at.contains(task_id)); assert!(db_started_at.contains(task_id));
@ -378,7 +373,7 @@ impl IndexScheduler {
if let Some(finished_at) = finished_at { if let Some(finished_at) = finished_at {
let db_finished_at = self let db_finished_at = self
.finished_at .finished_at
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos())) .get(&rtxn, &finished_at.unix_timestamp_nanos())
.unwrap() .unwrap()
.unwrap(); .unwrap();
assert!(db_finished_at.contains(task_id)); assert!(db_finished_at.contains(task_id));

View File

@ -1,7 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use meilisearch_types::heed::{BytesDecode, BytesEncode}; use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
use uuid::Uuid; use uuid::Uuid;
/// A heed codec for value of struct Uuid. /// A heed codec for value of struct Uuid.
@ -10,15 +10,15 @@ pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec { impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid; type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().ok().map(Uuid::from_bytes) bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
} }
} }
impl BytesEncode<'_> for UuidCodec { impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid; type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Some(Cow::Borrowed(item.as_bytes())) Ok(Cow::Borrowed(item.as_bytes()))
} }
} }

View File

@ -4,17 +4,20 @@ use std::collections::HashSet;
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all; use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use std::result::Result as StdResult;
use std::str; use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use hmac::{Hmac, Mac}; use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError;
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId; use meilisearch_types::keys::KeyId;
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256; use sha2::Sha256;
use thiserror::Error;
use time::OffsetDateTime; use time::OffsetDateTime;
use uuid::fmt::Hyphenated; use uuid::fmt::Hyphenated;
use uuid::Uuid; use uuid::Uuid;
@ -30,7 +33,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
#[derive(Clone)] #[derive(Clone)]
pub struct HeedAuthStore { pub struct HeedAuthStore {
env: Arc<Env>, env: Arc<Env>,
keys: Database<ByteSlice, SerdeJson<Key>>, keys: Database<Bytes, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>, action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool, should_close_on_drop: bool,
} }
@ -276,7 +279,7 @@ impl HeedAuthStore {
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> { fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
let mut iter = self let mut iter = self
.action_keyid_index_expiration .action_keyid_index_expiration
.remap_types::<ByteSlice, DecodeIgnore>() .remap_types::<Bytes, DecodeIgnore>()
.prefix_iter_mut(wtxn, key.as_bytes())?; .prefix_iter_mut(wtxn, key.as_bytes())?;
while iter.next().transpose()?.is_some() { while iter.next().transpose()?.is_some() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
@ -294,23 +297,24 @@ pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>); type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?; let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let (action_bytes, index) = match try_split_array_at(action_bytes)? { let (&action_byte, index) =
(action, []) => (action, None), match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? {
(action, index) => (action, Some(index)), ([action], []) => (action, None),
([action], index) => (action, Some(index)),
}; };
let key_id = Uuid::from_bytes(*key_id_bytes); let key_id = Uuid::from_bytes(*key_id_bytes);
let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?; let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?;
Some((key_id, action, index)) Ok((key_id, action, index))
} }
} }
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::new(); let mut bytes = Vec::new();
bytes.extend_from_slice(key_id.as_bytes()); bytes.extend_from_slice(key_id.as_bytes());
@ -320,10 +324,20 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
bytes.extend_from_slice(index); bytes.extend_from_slice(index);
} }
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }
#[derive(Error, Debug)]
#[error("the slice is too short")]
pub struct SliceTooShortError;
#[derive(Error, Debug)]
#[error("cannot construct a valid Action from {action_byte}")]
pub struct InvalidActionError {
pub action_byte: u8,
}
pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String { pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
// format uid as hyphenated allowing user to generate their own keys. // format uid as hyphenated allowing user to generate their own keys.
let mut uid_buffer = [0; Hyphenated::LENGTH]; let mut uid_buffer = [0; Hyphenated::LENGTH];

View File

@ -386,11 +386,11 @@ impl ErrorCode for HeedError {
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile, HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
HeedError::Io(e) => e.error_code(), HeedError::Io(e) => e.error_code(),
HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding | HeedError::Encoding(_)
| HeedError::Decoding | HeedError::Decoding(_)
| HeedError::InvalidDatabaseTyping | HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing | HeedError::DatabaseClosing
| HeedError::BadOpenOptions => Code::Internal, | HeedError::BadOpenOptions { .. } => Code::Internal,
} }
} }
} }

View File

@ -532,7 +532,10 @@ pub fn settings(
let faceting = FacetingSettings { let faceting = FacetingSettings {
max_values_per_facet: Setting::Set( max_values_per_facet: Setting::Set(
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET), index
.max_values_per_facet(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET),
), ),
sort_facet_values_by: Setting::Set( sort_facet_values_by: Setting::Set(
index index
@ -545,7 +548,10 @@ pub fn settings(
let pagination = PaginationSettings { let pagination = PaginationSettings {
max_total_hits: Setting::Set( max_total_hits: Setting::Set(
index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), index
.pagination_max_total_hits(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
), ),
}; };

View File

@ -3,7 +3,7 @@ use std::io::ErrorKind;
use actix_web::http::header::CONTENT_TYPE; use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice; use bstr::ByteSlice as _;
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr; use deserr::Deserr;
use futures::StreamExt; use futures::StreamExt;

View File

@ -78,6 +78,7 @@ macro_rules! make_setting_route {
let body = body.into_inner(); let body = body.into_inner();
#[allow(clippy::redundant_closure_call)]
$analytics(&body, &req); $analytics(&body, &req);
let new_settings = Settings { let new_settings = Settings {

View File

@ -46,8 +46,7 @@ pub async fn multi_search_with_post(
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes. // changes.
let search_results: Result<_, (ResponseError, usize)> = (|| { let search_results: Result<_, (ResponseError, usize)> = async {
async {
let mut search_results = Vec::with_capacity(queries.len()); let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, mut query)) in for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
@ -59,8 +58,7 @@ pub async fn multi_search_with_post(
return Err(AuthenticationError::InvalidToken).with_index(query_index); return Err(AuthenticationError::InvalidToken).with_index(query_index);
} }
// Apply search rules from tenant token // Apply search rules from tenant token
if let Some(search_rules) = if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
index_scheduler.filters().get_index_search_rules(&index_uid)
{ {
add_search_rules(&mut query, search_rules); add_search_rules(&mut query, search_rules);
} }
@ -88,7 +86,6 @@ pub async fn multi_search_with_post(
} }
Ok(search_results) Ok(search_results)
} }
})()
.await; .await;
if search_results.is_ok() { if search_results.is_ok() {

View File

@ -360,6 +360,7 @@ fn prepare_search<'t>(
let max_total_hits = index let max_total_hits = index
.pagination_max_total_hits(rtxn) .pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination); search.exhaustive_number_hits(is_finite_pagination);
@ -586,6 +587,7 @@ pub fn perform_search(
let max_values_by_facet = index let max_values_by_facet = index
.max_values_per_facet(&rtxn) .max_values_per_facet(&rtxn)
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET); .unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet); facet_distribution.max_values_per_facet(max_values_by_facet);

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value}; use crate::common::{Server, Value};
use crate::json; use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| { static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"id": 1, "id": 1,
@ -107,8 +107,8 @@ pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id"; static DOCUMENT_PRIMARY_KEY: &str = "id";
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id"; static DOCUMENT_DISTINCT_KEY: &str = "product_id";
/// testing: https://github.com/meilisearch/meilisearch/issues/4078 /// testing: https://github.com/meilisearch/meilisearch/issues/4078
#[actix_rt::test] #[actix_rt::test]

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value}; use crate::common::{Server, Value};
use crate::json; use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| { static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"title": "Shazam!", "title": "Shazam!",

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value}; use crate::common::{Server, Value};
use crate::json; use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| { static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"id": 1, "id": 1,

View File

@ -15,7 +15,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value}; use crate::common::{Server, Value};
use crate::json; use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| { static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"title": "Shazam!", "title": "Shazam!",
@ -40,7 +40,7 @@ pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"id": 852, "id": 852,

View File

@ -7,8 +7,8 @@ use clap::{Parser, Subcommand};
use dump::{DumpWriter, IndexMetadata}; use dump::{DumpWriter, IndexMetadata};
use file_store::FileStore; use file_store::FileStore;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::{obkv_to_json, BEU32}; use meilisearch_types::milli::{obkv_to_json, BEU32};
use meilisearch_types::tasks::{Status, Task}; use meilisearch_types::tasks::{Status, Task};
@ -148,15 +148,17 @@ fn try_opening_poly_database(
env: &Env, env: &Env,
rtxn: &RoTxn, rtxn: &RoTxn,
db_name: &str, db_name: &str,
) -> anyhow::Result<PolyDatabase> { ) -> anyhow::Result<Database<Unspecified, Unspecified>> {
env.open_poly_database(rtxn, Some(db_name)) env.database_options()
.name(db_name)
.open(rtxn)
.with_context(|| format!("While opening the {db_name:?} poly database"))? .with_context(|| format!("While opening the {db_name:?} poly database"))?
.with_context(|| format!("Missing the {db_name:?} poly database")) .with_context(|| format!("Missing the {db_name:?} poly database"))
} }
fn try_clearing_poly_database( fn try_clearing_poly_database(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
database: PolyDatabase, database: Database<Unspecified, Unspecified>,
db_name: &str, db_name: &str,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database")) database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
@ -212,7 +214,7 @@ fn export_a_dump(
eprintln!("Successfully dumped {count} keys!"); eprintln!("Successfully dumped {count} keys!");
let rtxn = env.read_txn()?; let rtxn = env.read_txn()?;
let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> = let all_tasks: Database<BEU32, SerdeJson<Task>> =
try_opening_database(&env, &rtxn, "all-tasks")?; try_opening_database(&env, &rtxn, "all-tasks")?;
let index_mapping: Database<Str, UuidCodec> = let index_mapping: Database<Str, UuidCodec> =
try_opening_database(&env, &rtxn, "index-mapping")?; try_opening_database(&env, &rtxn, "index-mapping")?;

View File

@ -1,7 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use meilisearch_types::heed::{BytesDecode, BytesEncode}; use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
use uuid::Uuid; use uuid::Uuid;
/// A heed codec for value of struct Uuid. /// A heed codec for value of struct Uuid.
@ -10,15 +10,15 @@ pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec { impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid; type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().ok().map(Uuid::from_bytes) bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
} }
} }
impl BytesEncode<'_> for UuidCodec { impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid; type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Some(Cow::Borrowed(item.as_bytes())) Ok(Cow::Borrowed(item.as_bytes()))
} }
} }

View File

@ -29,8 +29,8 @@ geoutils = "0.5.1"
grenad = { version = "0.4.5", default-features = false, features = [ grenad = { version = "0.4.5", default-features = false, features = [
"rayon", "tempfile" "rayon", "tempfile"
] } ] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [ heed = { version = "0.20.0-alpha.9", default-features = false, features = [
"lmdb", "read-txn-no-tls" "serde-json", "serde-bincode", "read-txn-no-tls"
] } ] }
indexmap = { version = "2.0.0", features = ["serde"] } indexmap = { version = "2.0.0", features = ["serde"] }
instant-distance = { version = "0.6.1", features = ["with-serde"] } instant-distance = { version = "0.6.1", features = ["with-serde"] }

View File

@ -152,7 +152,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
valid_fields: BTreeSet<String>, valid_fields: BTreeSet<String>,
hidden_fields: bool, hidden_fields: bool,
}, },
#[error("{}", HeedError::BadOpenOptions)] #[error("an environment is already opened with different options")]
InvalidLmdbOpenOptions, InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
SortRankingRuleMissing, SortRankingRuleMissing,
@ -326,11 +326,12 @@ impl From<HeedError> for Error {
HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached), HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile), HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
HeedError::Mdb(error) => InternalError(Store(error)), HeedError::Mdb(error) => InternalError(Store(error)),
HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })), // TODO use the encoding
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })), HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping), HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
HeedError::DatabaseClosing => InternalError(DatabaseClosing), HeedError::DatabaseClosing => InternalError(DatabaseClosing),
HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions), HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
} }
} }
} }

View File

@ -1,6 +1,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use heed::types::{OwnedType, Str}; use heed::types::Str;
use heed::{Database, RoIter, RoTxn, RwTxn}; use heed::{Database, RoIter, RoTxn, RwTxn};
use crate::{DocumentId, BEU32}; use crate::{DocumentId, BEU32};
@ -16,10 +16,10 @@ pub struct DocumentOperation {
pub kind: DocumentOperationKind, pub kind: DocumentOperationKind,
} }
pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>); pub struct ExternalDocumentsIds(Database<Str, BEU32>);
impl ExternalDocumentsIds { impl ExternalDocumentsIds {
pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds { pub fn new(db: Database<Str, BEU32>) -> ExternalDocumentsIds {
ExternalDocumentsIds(db) ExternalDocumentsIds(db)
} }
@ -29,7 +29,7 @@ impl ExternalDocumentsIds {
} }
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> { pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get())) self.0.get(rtxn, external_id.as_ref())
} }
/// An helper function to debug this type, returns an `HashMap` of both, /// An helper function to debug this type, returns an `HashMap` of both,
@ -38,7 +38,7 @@ impl ExternalDocumentsIds {
let mut map = HashMap::default(); let mut map = HashMap::default();
for result in self.0.iter(rtxn)? { for result in self.0.iter(rtxn)? {
let (external, internal) = result?; let (external, internal) = result?;
map.insert(external.to_owned(), internal.get()); map.insert(external.to_owned(), internal);
} }
Ok(map) Ok(map)
} }
@ -55,7 +55,7 @@ impl ExternalDocumentsIds {
for DocumentOperation { external_id, internal_id, kind } in operations { for DocumentOperation { external_id, internal_id, kind } in operations {
match kind { match kind {
DocumentOperationKind::Create => { DocumentOperationKind::Create => {
self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?; self.0.put(wtxn, &external_id, &internal_id)?;
} }
DocumentOperationKind::Delete => { DocumentOperationKind::Delete => {
if !self.0.delete(wtxn, &external_id)? { if !self.0.delete(wtxn, &external_id)? {
@ -69,7 +69,7 @@ impl ExternalDocumentsIds {
} }
/// Returns an iterator over all the external ids. /// Returns an iterator over all the external ids.
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> { pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
self.0.iter(rtxn) self.0.iter(rtxn)
} }
} }

View File

@ -2,26 +2,28 @@ use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use std::str; use std::str;
use heed::BoxedError;
pub struct BEU16StrCodec; pub struct BEU16StrCodec;
impl<'a> heed::BytesDecode<'a> for BEU16StrCodec { impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
type DItem = (u16, &'a str); type DItem = (u16, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n_bytes, str_bytes) = bytes.split_at(2); let (n_bytes, str_bytes) = bytes.split_at(2);
let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?; let n = n_bytes.try_into().map(u16::from_be_bytes)?;
let s = str::from_utf8(str_bytes).ok()?; let s = str::from_utf8(str_bytes)?;
Some((n, s)) Ok((n, s))
} }
} }
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec { impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
type EItem = (u16, &'a str); type EItem = (u16, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 2); let mut bytes = Vec::with_capacity(s.len() + 2);
bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes()); bytes.extend_from_slice(s.as_bytes());
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -2,26 +2,28 @@ use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use std::str; use std::str;
use heed::BoxedError;
pub struct BEU32StrCodec; pub struct BEU32StrCodec;
impl<'a> heed::BytesDecode<'a> for BEU32StrCodec { impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
type DItem = (u32, &'a str); type DItem = (u32, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n_bytes, str_bytes) = bytes.split_at(4); let (n_bytes, str_bytes) = bytes.split_at(4);
let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?; let n = n_bytes.try_into().map(u32::from_be_bytes)?;
let s = str::from_utf8(str_bytes).ok()?; let s = str::from_utf8(str_bytes)?;
Some((n, s)) Ok((n, s))
} }
} }
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec { impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
type EItem = (u32, &'a str); type EItem = (u32, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 4); let mut bytes = Vec::with_capacity(s.len() + 4);
bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes()); bytes.extend_from_slice(s.as_bytes());
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -1,23 +1,23 @@
use std::borrow::Cow; use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode}; use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated /// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
pub struct ByteSliceRefCodec; pub struct BytesRefCodec;
impl<'a> BytesEncode<'a> for ByteSliceRefCodec { impl<'a> BytesEncode<'a> for BytesRefCodec {
type EItem = &'a [u8]; type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
Some(Cow::Borrowed(item)) Ok(Cow::Borrowed(item))
} }
} }
impl<'a> BytesDecode<'a> for ByteSliceRefCodec { impl<'a> BytesDecode<'a> for BytesRefCodec {
type DItem = &'a [u8]; type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Some(bytes) Ok(bytes)
} }
} }

View File

@ -1,8 +1,9 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::marker::PhantomData; use std::marker::PhantomData;
use heed::{BytesDecode, BytesEncode}; use heed::{BoxedError, BytesDecode, BytesEncode};
use crate::heed_codec::SliceTooShortError;
use crate::{try_split_array_at, DocumentId, FieldId}; use crate::{try_split_array_at, DocumentId, FieldId};
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>); pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
@ -13,16 +14,16 @@ where
{ {
type DItem = (FieldId, DocumentId, C::DItem); type DItem = (FieldId, DocumentId, C::DItem);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?; let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let field_id = u16::from_be_bytes(field_id_bytes); let field_id = u16::from_be_bytes(field_id_bytes);
let (document_id_bytes, bytes) = try_split_array_at(bytes)?; let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
let value = C::bytes_decode(bytes)?; let value = C::bytes_decode(bytes)?;
Some((field_id, document_id, value)) Ok((field_id, document_id, value))
} }
} }
@ -32,13 +33,15 @@ where
{ {
type EItem = (FieldId, DocumentId, C::EItem); type EItem = (FieldId, DocumentId, C::EItem);
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32); let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
let value_bytes = C::bytes_encode(value)?; let value_bytes = C::bytes_encode(value)?;
// variable length, if f64 -> 16 bytes, if string -> large, potentially // variable length, if f64 -> 16 bytes, if string -> large, potentially
bytes.extend_from_slice(&value_bytes); bytes.extend_from_slice(&value_bytes);
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -5,8 +5,8 @@ use std::borrow::Cow;
use std::convert::TryFrom; use std::convert::TryFrom;
use std::marker::PhantomData; use std::marker::PhantomData;
use heed::types::{DecodeIgnore, OwnedType}; use heed::types::DecodeIgnore;
use heed::{BytesDecode, BytesEncode}; use heed::{BoxedError, BytesDecode, BytesEncode};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec; pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>; pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>; pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
pub type FieldIdCodec = OwnedType<BEU16>; pub type FieldIdCodec = BEU16;
/// Tries to split a slice in half at the given middle point, /// Tries to split a slice in half at the given middle point,
/// `None` if the slice is too short. /// `None` if the slice is too short.
@ -58,7 +58,7 @@ where
{ {
type EItem = FacetGroupKey<T::EItem>; type EItem = FacetGroupKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![]; let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes()); v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]); v.extend_from_slice(&[value.level]);
@ -66,7 +66,7 @@ where
let bound = T::bytes_encode(&value.left_bound)?; let bound = T::bytes_encode(&value.left_bound)?;
v.extend_from_slice(&bound); v.extend_from_slice(&bound);
Some(Cow::Owned(v)) Ok(Cow::Owned(v))
} }
} }
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
@ -75,11 +75,11 @@ where
{ {
type DItem = FacetGroupKey<T::DItem>; type DItem = FacetGroupKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
let level = bytes[2]; let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?; let bound = T::bytes_decode(&bytes[3..])?;
Some(FacetGroupKey { field_id: fid, level, left_bound: bound }) Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
} }
} }
@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue; type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![value.size]; let mut v = vec![value.size];
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Some(Cow::Owned(v)) Ok(Cow::Owned(v))
} }
} }
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue; type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0]; let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Some(FacetGroupValue { size, bitmap }) Ok(FacetGroupValue { size, bitmap })
} }
} }

View File

@ -1,37 +1,45 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use heed::BytesDecode; use heed::{BoxedError, BytesDecode};
use thiserror::Error;
use crate::facet::value_encoding::f64_into_bytes; use crate::facet::value_encoding::f64_into_bytes;
use crate::heed_codec::SliceTooShortError;
pub struct OrderedF64Codec; pub struct OrderedF64Codec;
impl<'a> BytesDecode<'a> for OrderedF64Codec { impl<'a> BytesDecode<'a> for OrderedF64Codec {
type DItem = f64; type DItem = f64;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() < 16 { if bytes.len() < 16 {
return None; Err(SliceTooShortError.into())
} else {
bytes[8..].try_into().map(f64::from_be_bytes).map_err(Into::into)
} }
let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
Some(f)
} }
} }
impl heed::BytesEncode<'_> for OrderedF64Codec { impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64; type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buffer = [0u8; 16]; let mut buffer = [0u8; 16];
// write the globally ordered float // write the globally ordered float
let bytes = f64_into_bytes(*f)?; let bytes = f64_into_bytes(*f).ok_or(InvalidGloballyOrderedFloatError { float: *f })?;
buffer[..8].copy_from_slice(&bytes[..]); buffer[..8].copy_from_slice(&bytes[..]);
// Then the f64 value just to be able to read it back // Then the f64 value just to be able to read it back
let bytes = f.to_be_bytes(); let bytes = f.to_be_bytes();
buffer[8..16].copy_from_slice(&bytes[..]); buffer[8..16].copy_from_slice(&bytes[..]);
Some(Cow::Owned(buffer.to_vec())) Ok(Cow::Owned(buffer.to_vec()))
} }
} }
#[derive(Error, Debug)]
#[error("the float {float} cannot be converted to a globally ordered representation")]
pub struct InvalidGloballyOrderedFloatError {
float: f64,
}

View File

@ -1,5 +1,8 @@
use std::borrow::Cow; use std::borrow::Cow;
use heed::BoxedError;
use super::SliceTooShortError;
use crate::{try_split_array_at, FieldId}; use crate::{try_split_array_at, FieldId};
pub struct FieldIdWordCountCodec; pub struct FieldIdWordCountCodec;
@ -7,21 +10,21 @@ pub struct FieldIdWordCountCodec;
impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec { impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
type DItem = (FieldId, u8); type DItem = (FieldId, u8);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?; let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let field_id = u16::from_be_bytes(field_id_bytes); let field_id = u16::from_be_bytes(field_id_bytes);
let ([word_count], _nothing) = try_split_array_at(bytes)?; let ([word_count], _nothing) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
Some((field_id, word_count)) Ok((field_id, word_count))
} }
} }
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec { impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
type EItem = (FieldId, u8); type EItem = (FieldId, u8);
fn bytes_encode((field_id, word_count): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(2 + 1); let mut bytes = Vec::with_capacity(2 + 1);
bytes.extend_from_slice(&field_id.to_be_bytes()); bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*word_count); bytes.push(*word_count);
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -1,7 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use fst::Set; use fst::Set;
use heed::{BytesDecode, BytesEncode}; use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `Set<&[u8]>`. /// A codec for values of type `Set<&[u8]>`.
pub struct FstSetCodec; pub struct FstSetCodec;
@ -9,15 +9,15 @@ pub struct FstSetCodec;
impl<'a> BytesEncode<'a> for FstSetCodec { impl<'a> BytesEncode<'a> for FstSetCodec {
type EItem = Set<Vec<u8>>; type EItem = Set<Vec<u8>>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
Some(Cow::Borrowed(item.as_fst().as_bytes())) Ok(Cow::Borrowed(item.as_fst().as_bytes()))
} }
} }
impl<'a> BytesDecode<'a> for FstSetCodec { impl<'a> BytesDecode<'a> for FstSetCodec {
type DItem = Set<&'a [u8]>; type DItem = Set<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Set::new(bytes).ok() Set::new(bytes).map_err(Into::into)
} }
} }

View File

@ -12,8 +12,10 @@ mod str_beu32_codec;
mod str_ref; mod str_ref;
mod str_str_u8_codec; mod str_str_u8_codec;
pub use byte_slice_ref::ByteSliceRefCodec; pub use byte_slice_ref::BytesRefCodec;
use heed::BoxedError;
pub use str_ref::StrRefCodec; pub use str_ref::StrRefCodec;
use thiserror::Error;
pub use self::beu16_str_codec::BEU16StrCodec; pub use self::beu16_str_codec::BEU16StrCodec;
pub use self::beu32_str_codec::BEU32StrCodec; pub use self::beu32_str_codec::BEU32StrCodec;
@ -31,5 +33,9 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
pub trait BytesDecodeOwned { pub trait BytesDecodeOwned {
type DItem; type DItem;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>; fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError>;
} }
#[derive(Error, Debug)]
#[error("the slice is too short")]
pub struct SliceTooShortError;

View File

@ -1,5 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use heed::BoxedError;
use obkv::{KvReaderU16, KvWriterU16}; use obkv::{KvReaderU16, KvWriterU16};
pub struct ObkvCodec; pub struct ObkvCodec;
@ -7,15 +8,15 @@ pub struct ObkvCodec;
impl<'a> heed::BytesDecode<'a> for ObkvCodec { impl<'a> heed::BytesDecode<'a> for ObkvCodec {
type DItem = KvReaderU16<'a>; type DItem = KvReaderU16<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Some(KvReaderU16::new(bytes)) Ok(KvReaderU16::new(bytes))
} }
} }
impl heed::BytesEncode<'_> for ObkvCodec { impl heed::BytesEncode<'_> for ObkvCodec {
type EItem = KvWriterU16<Vec<u8>>; type EItem = KvWriterU16<Vec<u8>>;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
item.clone().into_inner().map(Cow::Owned).ok() item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
} }
} }

View File

@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::convert::TryInto; use std::convert::TryInto;
use std::mem::size_of; use std::mem::size_of;
use heed::BytesDecode; use heed::{BoxedError, BytesDecode};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned; use crate::heed_codec::BytesDecodeOwned;
@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec {
impl BytesDecode<'_> for BoRoaringBitmapCodec { impl BytesDecode<'_> for BoRoaringBitmapCodec {
type DItem = RoaringBitmap; type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
for chunk in bytes.chunks(size_of::<u32>()) { for chunk in bytes.chunks(size_of::<u32>()) {
let bytes = chunk.try_into().ok()?; let bytes = chunk.try_into()?;
bitmap.push(u32::from_ne_bytes(bytes)); bitmap.push(u32::from_ne_bytes(bytes));
} }
Some(bitmap) Ok(bitmap)
} }
} }
impl BytesDecodeOwned for BoRoaringBitmapCodec { impl BytesDecodeOwned for BoRoaringBitmapCodec {
type DItem = RoaringBitmap; type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes) Self::bytes_decode(bytes)
} }
} }
@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap; type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut out = Vec::new(); let mut out = Vec::new();
BoRoaringBitmapCodec::serialize_into(item, &mut out); BoRoaringBitmapCodec::serialize_into(item, &mut out);
Some(Cow::Owned(out)) Ok(Cow::Owned(out))
} }
} }

View File

@ -3,6 +3,7 @@ use std::io;
use std::mem::size_of; use std::mem::size_of;
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use heed::BoxedError;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned; use crate::heed_codec::BytesDecodeOwned;
@ -132,26 +133,26 @@ impl CboRoaringBitmapCodec {
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
type DItem = RoaringBitmap; type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).ok() Self::deserialize_from(bytes).map_err(Into::into)
} }
} }
impl BytesDecodeOwned for CboRoaringBitmapCodec { impl BytesDecodeOwned for CboRoaringBitmapCodec {
type DItem = RoaringBitmap; type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).ok() Self::deserialize_from(bytes).map_err(Into::into)
} }
} }
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
type EItem = RoaringBitmap; type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut vec = Vec::with_capacity(Self::serialized_size(item)); let mut vec = Vec::with_capacity(Self::serialized_size(item));
Self::serialize_into(item, &mut vec); Self::serialize_into(item, &mut vec);
Some(Cow::Owned(vec)) Ok(Cow::Owned(vec))
} }
} }

View File

@ -1,5 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use heed::BoxedError;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned; use crate::heed_codec::BytesDecodeOwned;
@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec;
impl heed::BytesDecode<'_> for RoaringBitmapCodec { impl heed::BytesDecode<'_> for RoaringBitmapCodec {
type DItem = RoaringBitmap; type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmap::deserialize_unchecked_from(bytes).ok() RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into)
} }
} }
impl BytesDecodeOwned for RoaringBitmapCodec { impl BytesDecodeOwned for RoaringBitmapCodec {
type DItem = RoaringBitmap; type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmap::deserialize_from(bytes).ok() RoaringBitmap::deserialize_from(bytes).map_err(Into::into)
} }
} }
impl heed::BytesEncode<'_> for RoaringBitmapCodec { impl heed::BytesEncode<'_> for RoaringBitmapCodec {
type EItem = RoaringBitmap; type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(item.serialized_size()); let mut bytes = Vec::with_capacity(item.serialized_size());
item.serialize_into(&mut bytes).ok()?; item.serialize_into(&mut bytes)?;
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -1,6 +1,6 @@
use std::mem; use std::mem;
use heed::BytesDecode; use heed::{BoxedError, BytesDecode};
use crate::heed_codec::BytesDecodeOwned; use crate::heed_codec::BytesDecodeOwned;
@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec;
impl BytesDecode<'_> for BoRoaringBitmapLenCodec { impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
type DItem = u64; type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Some((bytes.len() / mem::size_of::<u32>()) as u64) Ok((bytes.len() / mem::size_of::<u32>()) as u64)
} }
} }
impl BytesDecodeOwned for BoRoaringBitmapLenCodec { impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
type DItem = u64; type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes) Self::bytes_decode(bytes)
} }
} }

View File

@ -1,6 +1,6 @@
use std::mem; use std::mem;
use heed::BytesDecode; use heed::{BoxedError, BytesDecode};
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec}; use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD; use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec;
impl BytesDecode<'_> for CboRoaringBitmapLenCodec { impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
type DItem = u64; type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() <= THRESHOLD * mem::size_of::<u32>() { if bytes.len() <= THRESHOLD * mem::size_of::<u32>() {
// If there is threshold or less than threshold integers that can fit into this array // If there is threshold or less than threshold integers that can fit into this array
// of bytes it means that we used the ByteOrder codec serializer. // of bytes it means that we used the ByteOrder codec serializer.
@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
impl BytesDecodeOwned for CboRoaringBitmapLenCodec { impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
type DItem = u64; type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes) Self::bytes_decode(bytes)
} }
} }

View File

@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read};
use std::mem; use std::mem;
use byteorder::{LittleEndian, ReadBytesExt}; use byteorder::{LittleEndian, ReadBytesExt};
use heed::BoxedError;
use crate::heed_codec::BytesDecodeOwned; use crate::heed_codec::BytesDecodeOwned;
@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec {
impl heed::BytesDecode<'_> for RoaringBitmapLenCodec { impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
type DItem = u64; type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
} }
} }
impl BytesDecodeOwned for RoaringBitmapLenCodec { impl BytesDecodeOwned for RoaringBitmapLenCodec {
type DItem = u64; type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
} }
} }

View File

@ -1,30 +1,31 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::ffi::CStr;
use std::str; use std::str;
use charabia::{Language, Script}; use charabia::{Language, Script};
use heed::BoxedError;
pub struct ScriptLanguageCodec; pub struct ScriptLanguageCodec;
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec { impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
type DItem = (Script, Language); type DItem = (Script, Language);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let sep = bytes.iter().position(|b| *b == 0)?; let cstr = CStr::from_bytes_until_nul(bytes)?;
let (s_bytes, l_bytes) = bytes.split_at(sep); let script = cstr.to_str()?;
let script = str::from_utf8(s_bytes).ok()?;
let script_name = Script::from_name(script); let script_name = Script::from_name(script);
let lan = str::from_utf8(l_bytes).ok()?;
// skip '\0' byte between the two strings. // skip '\0' byte between the two strings.
let lan_name = Language::from_name(&lan[1..]); let lan = str::from_utf8(&bytes[script.len() + 1..])?;
let lan_name = Language::from_name(lan);
Some((script_name, lan_name)) Ok((script_name, lan_name))
} }
} }
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
type EItem = (Script, Language); type EItem = (Script, Language);
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let script_name = script.name().as_bytes(); let script_name = script.name().as_bytes();
let lan_name = lan.name().as_bytes(); let lan_name = lan.name().as_bytes();
@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
bytes.push(0); bytes.push(0);
bytes.extend_from_slice(lan_name); bytes.extend_from_slice(lan_name);
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -3,37 +3,41 @@ use std::convert::TryInto;
use std::mem::size_of; use std::mem::size_of;
use std::str; use std::str;
use heed::BoxedError;
use super::SliceTooShortError;
pub struct StrBEU32Codec; pub struct StrBEU32Codec;
impl<'a> heed::BytesDecode<'a> for StrBEU32Codec { impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
type DItem = (&'a str, u32); type DItem = (&'a str, u32);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let footer_len = size_of::<u32>(); let footer_len = size_of::<u32>();
if bytes.len() < footer_len { if bytes.len() < footer_len {
return None; return Err(SliceTooShortError.into());
} }
let (word, bytes) = bytes.split_at(bytes.len() - footer_len); let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
let word = str::from_utf8(word).ok()?; let word = str::from_utf8(word)?;
let pos = bytes.try_into().map(u32::from_be_bytes).ok()?; let pos = bytes.try_into().map(u32::from_be_bytes)?;
Some((word, pos)) Ok((word, pos))
} }
} }
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec { impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
type EItem = (&'a str, u32); type EItem = (&'a str, u32);
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes(); let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + pos.len()); let mut bytes = Vec::with_capacity(word.len() + pos.len());
bytes.extend_from_slice(word.as_bytes()); bytes.extend_from_slice(word.as_bytes());
bytes.extend_from_slice(&pos[..]); bytes.extend_from_slice(&pos[..]);
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }
@ -42,26 +46,27 @@ pub struct StrBEU16Codec;
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec { impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
type DItem = (&'a str, u16); type DItem = (&'a str, u16);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let footer_len = size_of::<u16>(); let footer_len = size_of::<u16>();
if bytes.len() < footer_len + 1 { if bytes.len() < footer_len + 1 {
return None; return Err(SliceTooShortError.into());
} }
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len); let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
let (_, word) = word_plus_nul_byte.split_last()?; // unwrap: we just checked the footer + 1 above.
let word = str::from_utf8(word).ok()?; let (_, word) = word_plus_nul_byte.split_last().unwrap();
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?; let word = str::from_utf8(word)?;
let pos = bytes.try_into().map(u16::from_be_bytes)?;
Some((word, pos)) Ok((word, pos))
} }
} }
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec { impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
type EItem = (&'a str, u16); type EItem = (&'a str, u16);
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes(); let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len()); let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
@ -69,6 +74,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
bytes.push(0); bytes.push(0);
bytes.extend_from_slice(&pos[..]); bytes.extend_from_slice(&pos[..]);
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -1,6 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode}; use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated /// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a str`) and these values can reside within another structure. /// types are equivalent (= `&'a str`) and these values can reside within another structure.
@ -8,15 +8,14 @@ pub struct StrRefCodec;
impl<'a> BytesEncode<'a> for StrRefCodec { impl<'a> BytesEncode<'a> for StrRefCodec {
type EItem = &'a str; type EItem = &'a str;
fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> { fn bytes_encode(item: &'a &'a str) -> Result<Cow<'a, [u8]>, BoxedError> {
Some(Cow::Borrowed(item.as_bytes())) Ok(Cow::Borrowed(item.as_bytes()))
} }
} }
impl<'a> BytesDecode<'a> for StrRefCodec { impl<'a> BytesDecode<'a> for StrRefCodec {
type DItem = &'a str; type DItem = &'a str;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let s = std::str::from_utf8(bytes).ok()?; std::str::from_utf8(bytes).map_err(Into::into)
Some(s)
} }
} }

View File

@ -1,32 +1,36 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::ffi::CStr;
use std::str; use std::str;
use heed::BoxedError;
use super::SliceTooShortError;
pub struct U8StrStrCodec; pub struct U8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for U8StrStrCodec { impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
type DItem = (u8, &'a str, &'a str); type DItem = (u8, &'a str, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n, bytes) = bytes.split_first()?; let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
let s1_end = bytes.iter().position(|b| *b == 0)?; let cstr = CStr::from_bytes_until_nul(bytes)?;
let (s1_bytes, rest) = bytes.split_at(s1_end); let s1 = cstr.to_str()?;
let s2_bytes = &rest[1..]; // skip '\0' byte between the two strings.
let s1 = str::from_utf8(s1_bytes).ok()?; let s2 = str::from_utf8(&bytes[s1.len() + 1..])?;
let s2 = str::from_utf8(s2_bytes).ok()?; Ok((*n, s1, s2))
Some((*n, s1, s2))
} }
} }
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec { impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
type EItem = (u8, &'a str, &'a str); type EItem = (u8, &'a str, &'a str);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n); bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes()); bytes.extend_from_slice(s1.as_bytes());
bytes.push(0); bytes.push(0);
bytes.extend_from_slice(s2.as_bytes()); bytes.extend_from_slice(s2.as_bytes());
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }
pub struct UncheckedU8StrStrCodec; pub struct UncheckedU8StrStrCodec;
@ -34,24 +38,25 @@ pub struct UncheckedU8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec { impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
type DItem = (u8, &'a [u8], &'a [u8]); type DItem = (u8, &'a [u8], &'a [u8]);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n, bytes) = bytes.split_first()?; let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
let s1_end = bytes.iter().position(|b| *b == 0)?; let cstr = CStr::from_bytes_until_nul(bytes)?;
let (s1_bytes, rest) = bytes.split_at(s1_end); let s1_bytes = cstr.to_bytes();
let s2_bytes = &rest[1..]; // skip '\0' byte between the two strings.
Some((*n, s1_bytes, s2_bytes)) let s2_bytes = &bytes[s1_bytes.len() + 1..];
Ok((*n, s1_bytes, s2_bytes))
} }
} }
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec { impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
type EItem = (u8, &'a [u8], &'a [u8]); type EItem = (u8, &'a [u8], &'a [u8]);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n); bytes.push(*n);
bytes.extend_from_slice(s1); bytes.extend_from_slice(s1);
bytes.push(0); bytes.push(0);
bytes.extend_from_slice(s2); bytes.extend_from_slice(s2);
Some(Cow::Owned(bytes)) Ok(Cow::Owned(bytes))
} }
} }

View File

@ -4,9 +4,8 @@ use std::fs::File;
use std::path::Path; use std::path::Path;
use charabia::{Language, Script}; use charabia::{Language, Script};
use heed::flags::Flags;
use heed::types::*; use heed::types::*;
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn}; use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
use time::OffsetDateTime; use time::OffsetDateTime;
@ -27,7 +26,7 @@ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
BEU32, BEU32, BEU64,
}; };
/// The HNSW data-structure that we serialize, fill and search in. /// The HNSW data-structure that we serialize, fill and search in.
@ -109,10 +108,10 @@ pub struct Index {
pub(crate) env: heed::Env, pub(crate) env: heed::Env,
/// Contains many different types (e.g. the fields ids map). /// Contains many different types (e.g. the fields ids map).
pub(crate) main: PolyDatabase, pub(crate) main: Database<Unspecified, Unspecified>,
/// Maps the external documents ids with the internal document id. /// Maps the external documents ids with the internal document id.
pub external_documents_ids: Database<Str, OwnedType<BEU32>>, pub external_documents_ids: Database<Str, BEU32>,
/// A word and all the documents ids containing the word. /// A word and all the documents ids containing the word.
pub word_docids: Database<Str, CboRoaringBitmapCodec>, pub word_docids: Database<Str, CboRoaringBitmapCodec>,
@ -158,7 +157,7 @@ pub struct Index {
/// Maps the facet field id of the normalized-for-search string facets with their original versions. /// Maps the facet field id of the normalized-for-search string facets with their original versions.
pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>, pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
/// Maps the facet field id of the string facets with an FST containing all the facets values. /// Maps the facet field id of the string facets with an FST containing all the facets values.
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>, pub facet_id_string_fst: Database<BEU16, FstSetCodec>,
/// Maps the document id, the facet field id and the numbers. /// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>, pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@ -166,10 +165,10 @@ pub struct Index {
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>, pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
/// Maps a vector id to the document id that have it. /// Maps a vector id to the document id that have it.
pub vector_id_docid: Database<OwnedType<BEU32>, OwnedType<BEU32>>, pub vector_id_docid: Database<BEU32, BEU32>,
/// Maps the document id to the document as an obkv store. /// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>, pub(crate) documents: Database<BEU32, ObkvCodec>,
} }
impl Index { impl Index {
@ -182,11 +181,10 @@ impl Index {
use db_name::*; use db_name::*;
options.max_dbs(24); options.max_dbs(24);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?; let env = options.open(path)?;
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; let main = env.database_options().name(MAIN).create(&mut wtxn)?;
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
let external_documents_ids = let external_documents_ids =
env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?; env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?;
@ -264,24 +262,16 @@ impl Index {
fn set_creation_dates( fn set_creation_dates(
env: &heed::Env, env: &heed::Env,
main: PolyDatabase, main: Database<Unspecified, Unspecified>,
created_at: OffsetDateTime, created_at: OffsetDateTime,
updated_at: OffsetDateTime, updated_at: OffsetDateTime,
) -> heed::Result<()> { ) -> heed::Result<()> {
let mut txn = env.write_txn()?; let mut txn = env.write_txn()?;
// The db was just created, we update its metadata with the relevant information. // The db was just created, we update its metadata with the relevant information.
if main.get::<_, Str, SerdeJson<OffsetDateTime>>(&txn, main_key::CREATED_AT_KEY)?.is_none() let main = main.remap_types::<Str, SerdeJson<OffsetDateTime>>();
{ if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
main.put::<_, Str, SerdeJson<OffsetDateTime>>( main.put(&mut txn, main_key::UPDATED_AT_KEY, &updated_at)?;
&mut txn, main.put(&mut txn, main_key::CREATED_AT_KEY, &created_at)?;
main_key::UPDATED_AT_KEY,
&updated_at,
)?;
main.put::<_, Str, SerdeJson<OffsetDateTime>>(
&mut txn,
main_key::CREATED_AT_KEY,
&created_at,
)?;
txn.commit()?; txn.commit()?;
} }
Ok(()) Ok(())
@ -318,12 +308,12 @@ impl Index {
/// ///
/// This value is the maximum between the map size passed during the opening of the index /// This value is the maximum between the map size passed during the opening of the index
/// and the on-disk size of the index at the time of opening. /// and the on-disk size of the index at the time of opening.
pub fn map_size(&self) -> Result<usize> { pub fn map_size(&self) -> usize {
Ok(self.env.map_size()?) self.env.info().map_size
} }
pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> { pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_path(path, option).map_err(Into::into) self.env.copy_to_file(path, option).map_err(Into::into)
} }
/// Returns an `EnvClosingEvent` that can be used to wait for the closing event, /// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
@ -343,21 +333,28 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
docids: &RoaringBitmap, docids: &RoaringBitmap,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids) self.main.remap_types::<Str, RoaringBitmapCodec>().put(
wtxn,
main_key::DOCUMENTS_IDS_KEY,
docids,
)
} }
/// Returns the internal documents ids. /// Returns the internal documents ids.
pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> { pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
Ok(self Ok(self
.main .main
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)? .remap_types::<Str, RoaringBitmapCodec>()
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
/// Returns the number of documents indexed in the database. /// Returns the number of documents indexed in the database.
pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> { pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
let count = let count = self
self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?; .main
.remap_types::<Str, RoaringBitmapLenCodec>()
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
Ok(count.unwrap_or_default()) Ok(count.unwrap_or_default())
} }
@ -366,17 +363,17 @@ impl Index {
/// Writes the documents primary key, this is the field name that is used to store the id. /// Writes the documents primary key, this is the field name that is used to store the id.
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, primary_key) self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
} }
/// Deletes the primary key of the documents, this can be done to reset indexes settings. /// Deletes the primary key of the documents, this can be done to reset indexes settings.
pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::PRIMARY_KEY_KEY)
} }
/// Returns the documents primary key, `None` if it hasn't been defined. /// Returns the documents primary key, `None` if it hasn't been defined.
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> { pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY) self.main.remap_types::<Str, Str>().get(rtxn, main_key::PRIMARY_KEY_KEY)
} }
/* external documents ids */ /* external documents ids */
@ -396,7 +393,11 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
map: &FieldsIdsMap, map: &FieldsIdsMap,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map) self.main.remap_types::<Str, SerdeJson<FieldsIdsMap>>().put(
wtxn,
main_key::FIELDS_IDS_MAP_KEY,
map,
)
} }
/// Returns the fields ids map which associate the documents keys with an internal field id /// Returns the fields ids map which associate the documents keys with an internal field id
@ -404,7 +405,8 @@ impl Index {
pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> { pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, main_key::FIELDS_IDS_MAP_KEY)? .remap_types::<Str, SerdeJson<FieldsIdsMap>>()
.get(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
@ -416,19 +418,24 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
rtree: &RTree<GeoPoint>, rtree: &RTree<GeoPoint>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<RTree<GeoPoint>>>(wtxn, main_key::GEO_RTREE_KEY, rtree) self.main.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>().put(
wtxn,
main_key::GEO_RTREE_KEY,
rtree,
)
} }
/// Delete the `rtree` which associates coordinates to documents ids. /// Delete the `rtree` which associates coordinates to documents ids.
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_RTREE_KEY)
} }
/// Returns the `rtree` which associates coordinates to documents ids. /// Returns the `rtree` which associates coordinates to documents ids.
pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> { pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> {
match self match self
.main .main
.get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)? .remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>()
.get(rtxn, main_key::GEO_RTREE_KEY)?
{ {
Some(rtree) => Ok(Some(rtree)), Some(rtree) => Ok(Some(rtree)),
None => Ok(None), None => Ok(None),
@ -443,7 +450,7 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
docids: &RoaringBitmap, docids: &RoaringBitmap,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, RoaringBitmapCodec>( self.main.remap_types::<Str, RoaringBitmapCodec>().put(
wtxn, wtxn,
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
docids, docids,
@ -452,14 +459,15 @@ impl Index {
/// Delete the documents ids that are faceted with a _geo field. /// Delete the documents ids that are faceted with a _geo field.
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
} }
/// Retrieve all the documents ids that are faceted with a _geo field. /// Retrieve all the documents ids that are faceted with a _geo field.
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> { pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
match self match self
.main .main
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)? .remap_types::<Str, RoaringBitmapCodec>()
.get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
{ {
Some(docids) => Ok(docids), Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()), None => Ok(RoaringBitmap::new()),
@ -474,22 +482,22 @@ impl Index {
self.delete_vector_hnsw(wtxn)?; self.delete_vector_hnsw(wtxn)?;
let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB
let bytes = bincode::serialize(hnsw).map_err(|_| heed::Error::Encoding)?; let bytes = bincode::serialize(hnsw).map_err(Into::into).map_err(heed::Error::Encoding)?;
for (i, chunk) in bytes.chunks(chunk_size).enumerate() { for (i, chunk) in bytes.chunks(chunk_size).enumerate() {
let i = i as u32; let i = i as u32;
let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec(); let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec();
key.extend_from_slice(&i.to_be_bytes()); key.extend_from_slice(&i.to_be_bytes());
self.main.put::<_, ByteSlice, ByteSlice>(wtxn, &key, chunk)?; self.main.remap_types::<Bytes, Bytes>().put(wtxn, &key, chunk)?;
} }
Ok(()) Ok(())
} }
/// Delete the `hnsw`. /// Delete the `hnsw`.
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
let mut iter = self.main.prefix_iter_mut::<_, ByteSlice, DecodeIgnore>( let mut iter = self
wtxn, .main
main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes(), .remap_types::<Bytes, DecodeIgnore>()
)?; .prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?;
let mut deleted = false; let mut deleted = false;
while iter.next().transpose()?.is_some() { while iter.next().transpose()?.is_some() {
// We do not keep a reference to the key or the value. // We do not keep a reference to the key or the value.
@ -501,8 +509,10 @@ impl Index {
/// Returns the `hnsw`. /// Returns the `hnsw`.
pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> { pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> {
let mut slices = Vec::new(); let mut slices = Vec::new();
for result in for result in self
self.main.prefix_iter::<_, Str, ByteSlice>(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? .main
.remap_types::<Str, Bytes>()
.prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
{ {
let (_, slice) = result?; let (_, slice) = result?;
slices.push(slice); slices.push(slice);
@ -512,7 +522,11 @@ impl Index {
Ok(None) Ok(None)
} else { } else {
let readable_slices: ReadableSlices<_> = slices.into_iter().collect(); let readable_slices: ReadableSlices<_> = slices.into_iter().collect();
Ok(Some(bincode::deserialize_from(readable_slices).map_err(|_| heed::Error::Decoding)?)) Ok(Some(
bincode::deserialize_from(readable_slices)
.map_err(Into::into)
.map_err(heed::Error::Decoding)?,
))
} }
} }
@ -525,7 +539,7 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
distribution: &FieldDistribution, distribution: &FieldDistribution,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<FieldDistribution>>( self.main.remap_types::<Str, SerdeJson<FieldDistribution>>().put(
wtxn, wtxn,
main_key::FIELD_DISTRIBUTION_KEY, main_key::FIELD_DISTRIBUTION_KEY,
distribution, distribution,
@ -537,7 +551,8 @@ impl Index {
pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldDistribution> { pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldDistribution> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeJson<FieldDistribution>>(rtxn, main_key::FIELD_DISTRIBUTION_KEY)? .remap_types::<Str, SerdeJson<FieldDistribution>>()
.get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
@ -550,7 +565,7 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fields: &[&str], fields: &[&str],
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>( self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
wtxn, wtxn,
main_key::DISPLAYED_FIELDS_KEY, main_key::DISPLAYED_FIELDS_KEY,
&fields, &fields,
@ -560,13 +575,15 @@ impl Index {
/// Deletes the displayed fields ids, this will make the engine to display /// Deletes the displayed fields ids, this will make the engine to display
/// all the documents attributes in the order of the `FieldsIdsMap`. /// all the documents attributes in the order of the `FieldsIdsMap`.
pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY)
} }
/// Returns the displayed fields in the order they were set by the user. If it returns /// Returns the displayed fields in the order they were set by the user. If it returns
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`. /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY) self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::DISPLAYED_FIELDS_KEY)
} }
/// Identical to `displayed_fields`, but returns the ids instead. /// Identical to `displayed_fields`, but returns the ids instead.
@ -646,7 +663,7 @@ impl Index {
/// Writes the searchable fields, when this list is specified, only these are indexed. /// Writes the searchable fields, when this list is specified, only these are indexed.
fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>( self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
wtxn, wtxn,
main_key::SEARCHABLE_FIELDS_KEY, main_key::SEARCHABLE_FIELDS_KEY,
&fields, &fields,
@ -655,13 +672,15 @@ impl Index {
/// Deletes the searchable fields, when no fields are specified, all fields are indexed. /// Deletes the searchable fields, when no fields are specified, all fields are indexed.
fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
} }
/// Returns the searchable fields, those are the fields that are indexed, /// Returns the searchable fields, those are the fields that are indexed,
/// if the searchable fields aren't there it means that **all** the fields are indexed. /// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY) self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
} }
/// Identical to `searchable_fields`, but returns the ids instead. /// Identical to `searchable_fields`, but returns the ids instead.
@ -687,7 +706,7 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fields: &[&str], fields: &[&str],
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>( self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn, wtxn,
main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY,
&fields, &fields,
@ -699,7 +718,7 @@ impl Index {
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
) -> heed::Result<bool> { ) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
} }
/// Returns the user defined searchable fields. /// Returns the user defined searchable fields.
@ -708,7 +727,8 @@ impl Index {
rtxn: &'t RoTxn, rtxn: &'t RoTxn,
) -> heed::Result<Option<Vec<&'t str>>> { ) -> heed::Result<Option<Vec<&'t str>>> {
self.main self.main
.get::<_, Str, SerdeBincode<Vec<_>>>(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) .remap_types::<Str, SerdeBincode<Vec<_>>>()
.get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
} }
/* filterable fields */ /* filterable fields */
@ -719,19 +739,24 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fields: &HashSet<String>, fields: &HashSet<String>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields) self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::FILTERABLE_FIELDS_KEY,
fields,
)
} }
/// Deletes the filterable fields ids in the database. /// Deletes the filterable fields ids in the database.
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
} }
/// Returns the filterable fields names. /// Returns the filterable fields names.
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> { pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::FILTERABLE_FIELDS_KEY)? .remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::FILTERABLE_FIELDS_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
@ -758,19 +783,24 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fields: &HashSet<String>, fields: &HashSet<String>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields) self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::SORTABLE_FIELDS_KEY,
fields,
)
} }
/// Deletes the sortable fields ids in the database. /// Deletes the sortable fields ids in the database.
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::SORTABLE_FIELDS_KEY)
} }
/// Returns the sortable fields names. /// Returns the sortable fields names.
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> { pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)? .remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::SORTABLE_FIELDS_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
@ -789,14 +819,19 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fields: &HashSet<String>, fields: &HashSet<String>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::HIDDEN_FACETED_FIELDS_KEY, fields) self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::HIDDEN_FACETED_FIELDS_KEY,
fields,
)
} }
/// Returns the faceted fields names. /// Returns the faceted fields names.
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> { pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? .remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
@ -863,7 +898,7 @@ impl Index {
rtxn: &RoTxn, rtxn: &RoTxn,
field_id: FieldId, field_id: FieldId,
) -> heed::Result<RoaringBitmap> { ) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? { match self.facet_id_is_null_docids.get(rtxn, &field_id)? {
Some(docids) => Ok(docids), Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()), None => Ok(RoaringBitmap::new()),
} }
@ -875,7 +910,7 @@ impl Index {
rtxn: &RoTxn, rtxn: &RoTxn,
field_id: FieldId, field_id: FieldId,
) -> heed::Result<RoaringBitmap> { ) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? { match self.facet_id_is_empty_docids.get(rtxn, &field_id)? {
Some(docids) => Ok(docids), Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()), None => Ok(RoaringBitmap::new()),
} }
@ -887,7 +922,7 @@ impl Index {
rtxn: &RoTxn, rtxn: &RoTxn,
field_id: FieldId, field_id: FieldId,
) -> heed::Result<RoaringBitmap> { ) -> heed::Result<RoaringBitmap> {
match self.facet_id_exists_docids.get(rtxn, &BEU16::new(field_id))? { match self.facet_id_exists_docids.get(rtxn, &field_id)? {
Some(docids) => Ok(docids), Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()), None => Ok(RoaringBitmap::new()),
} }
@ -900,15 +935,15 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
distinct_field: &str, distinct_field: &str,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) self.main.remap_types::<Str, Str>().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
} }
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> { pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY) self.main.remap_types::<Str, Str>().get(rtxn, main_key::DISTINCT_FIELD_KEY)
} }
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISTINCT_FIELD_KEY)
} }
/* criteria */ /* criteria */
@ -918,15 +953,23 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
criteria: &[Criterion], criteria: &[Criterion],
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria) self.main.remap_types::<Str, SerdeJson<&[Criterion]>>().put(
wtxn,
main_key::CRITERIA_KEY,
&criteria,
)
} }
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::CRITERIA_KEY)
} }
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> { pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? { match self
.main
.remap_types::<Str, SerdeJson<Vec<Criterion>>>()
.get(rtxn, main_key::CRITERIA_KEY)?
{
Some(criteria) => Ok(criteria), Some(criteria) => Ok(criteria),
None => Ok(default_criteria()), None => Ok(default_criteria()),
} }
@ -940,12 +983,16 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fst: &fst::Set<A>, fst: &fst::Set<A>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes()) self.main.remap_types::<Str, Bytes>().put(
wtxn,
main_key::WORDS_FST_KEY,
fst.as_fst().as_bytes(),
)
} }
/// Returns the FST which is the words dictionary of the engine. /// Returns the FST which is the words dictionary of the engine.
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> { pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? { match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?),
} }
@ -958,15 +1005,19 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fst: &fst::Set<A>, fst: &fst::Set<A>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes()) self.main.remap_types::<Str, Bytes>().put(
wtxn,
main_key::STOP_WORDS_KEY,
fst.as_fst().as_bytes(),
)
} }
pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::STOP_WORDS_KEY)
} }
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> { pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? { match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::STOP_WORDS_KEY)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
None => Ok(None), None => Ok(None),
} }
@ -979,18 +1030,22 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
set: &BTreeSet<String>, set: &BTreeSet<String>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set) self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn,
main_key::NON_SEPARATOR_TOKENS_KEY,
set,
)
} }
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
} }
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> { pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>( Ok(self
rtxn, .main
main_key::NON_SEPARATOR_TOKENS_KEY, .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
)?) .get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?)
} }
/* separator tokens */ /* separator tokens */
@ -1000,17 +1055,22 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
set: &BTreeSet<String>, set: &BTreeSet<String>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set) self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn,
main_key::SEPARATOR_TOKENS_KEY,
set,
)
} }
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY)
} }
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> { pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
.get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
} }
/* separators easing method */ /* separators easing method */
@ -1040,17 +1100,18 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
set: &BTreeSet<String>, set: &BTreeSet<String>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set) self.main.remap_types::<Str, SerdeBincode<_>>().put(wtxn, main_key::DICTIONARY_KEY, set)
} }
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::DICTIONARY_KEY)
} }
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> { pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?) .remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
.get(rtxn, main_key::DICTIONARY_KEY)?)
} }
/* synonyms */ /* synonyms */
@ -1061,8 +1122,12 @@ impl Index {
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>, synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
user_defined_synonyms: &BTreeMap<String, Vec<String>>, user_defined_synonyms: &BTreeMap<String, Vec<String>>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?; self.main.remap_types::<Str, SerdeBincode<_>>().put(
self.main.put::<_, Str, SerdeBincode<_>>( wtxn,
main_key::SYNONYMS_KEY,
synonyms,
)?;
self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn, wtxn,
main_key::USER_DEFINED_SYNONYMS_KEY, main_key::USER_DEFINED_SYNONYMS_KEY,
user_defined_synonyms, user_defined_synonyms,
@ -1070,8 +1135,8 @@ impl Index {
} }
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?; self.main.remap_key_type::<Str>().delete(wtxn, main_key::SYNONYMS_KEY)?;
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
} }
pub fn user_defined_synonyms( pub fn user_defined_synonyms(
@ -1080,14 +1145,16 @@ impl Index {
) -> heed::Result<BTreeMap<String, Vec<String>>> { ) -> heed::Result<BTreeMap<String, Vec<String>>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? .remap_types::<Str, SerdeBincode<_>>()
.get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> { pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)? .remap_types::<Str, SerdeBincode<_>>()
.get(rtxn, main_key::SYNONYMS_KEY)?
.unwrap_or_default()) .unwrap_or_default())
} }
@ -1108,7 +1175,7 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fst: &fst::Set<A>, fst: &fst::Set<A>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>( self.main.remap_types::<Str, Bytes>().put(
wtxn, wtxn,
main_key::WORDS_PREFIXES_FST_KEY, main_key::WORDS_PREFIXES_FST_KEY,
fst.as_fst().as_bytes(), fst.as_fst().as_bytes(),
@ -1117,7 +1184,7 @@ impl Index {
/// Returns the FST which is the words prefixes dictionnary of the engine. /// Returns the FST which is the words prefixes dictionnary of the engine.
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> { pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? { match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?),
} }
@ -1142,7 +1209,7 @@ impl Index {
Ok(ids.into_iter().map(move |id| { Ok(ids.into_iter().map(move |id| {
let kv = self let kv = self
.documents .documents
.get(rtxn, &BEU32::new(id))? .get(rtxn, &id)?
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })?; .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
Ok((id, kv)) Ok((id, kv))
})) }))
@ -1207,7 +1274,8 @@ impl Index {
pub fn created_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> { pub fn created_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::CREATED_AT_KEY)? .remap_types::<Str, SerdeJson<OffsetDateTime>>()
.get(rtxn, main_key::CREATED_AT_KEY)?
.ok_or(InternalError::DatabaseMissingEntry { .ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN, db_name: db_name::MAIN,
key: Some(main_key::CREATED_AT_KEY), key: Some(main_key::CREATED_AT_KEY),
@ -1218,7 +1286,8 @@ impl Index {
pub fn updated_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> { pub fn updated_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::UPDATED_AT_KEY)? .remap_types::<Str, SerdeJson<OffsetDateTime>>()
.get(rtxn, main_key::UPDATED_AT_KEY)?
.ok_or(InternalError::DatabaseMissingEntry { .ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN, db_name: db_name::MAIN,
key: Some(main_key::UPDATED_AT_KEY), key: Some(main_key::UPDATED_AT_KEY),
@ -1230,14 +1299,18 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
time: &OffsetDateTime, time: &OffsetDateTime,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, time) self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
wtxn,
main_key::UPDATED_AT_KEY,
time,
)
} }
pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> { pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true, // identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos. // because by default, we authorize typos.
match self.main.get::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS)? { match self.main.remap_types::<Str, U8>().get(txn, main_key::AUTHORIZE_TYPOS)? {
Some(0) => Ok(false), Some(0) => Ok(false),
_ => Ok(true), _ => Ok(true),
} }
@ -1247,7 +1320,7 @@ impl Index {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true, // identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos. // because by default, we authorize typos.
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; self.main.remap_types::<Str, U8>().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
Ok(()) Ok(())
} }
@ -1258,7 +1331,8 @@ impl Index {
// because by default, we authorize typos. // because by default, we authorize typos.
Ok(self Ok(self
.main .main
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)? .remap_types::<Str, U8>()
.get(txn, main_key::ONE_TYPO_WORD_LEN)?
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO)) .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
} }
@ -1266,7 +1340,7 @@ impl Index {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true, // identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos. // because by default, we authorize typos.
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; self.main.remap_types::<Str, U8>().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
Ok(()) Ok(())
} }
@ -1276,7 +1350,8 @@ impl Index {
// because by default, we authorize typos. // because by default, we authorize typos.
Ok(self Ok(self
.main .main
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)? .remap_types::<Str, U8>()
.get(txn, main_key::TWO_TYPOS_WORD_LEN)?
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
} }
@ -1284,13 +1359,13 @@ impl Index {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true, // identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos. // because by default, we authorize typos.
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; self.main.remap_types::<Str, U8>().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
Ok(()) Ok(())
} }
/// List the words on which typo are not allowed /// List the words on which typo are not allowed
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> { pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? { match self.main.remap_types::<Str, Bytes>().get(txn, main_key::EXACT_WORDS)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)), Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
None => Ok(None), None => Ok(None),
} }
@ -1301,7 +1376,7 @@ impl Index {
txn: &mut RwTxn, txn: &mut RwTxn,
words: &fst::Set<A>, words: &fst::Set<A>,
) -> Result<()> { ) -> Result<()> {
self.main.put::<_, Str, ByteSlice>( self.main.remap_types::<Str, Bytes>().put(
txn, txn,
main_key::EXACT_WORDS, main_key::EXACT_WORDS,
words.as_fst().as_bytes(), words.as_fst().as_bytes(),
@ -1313,7 +1388,8 @@ impl Index {
pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result<Vec<&'t str>> { pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result<Vec<&'t str>> {
Ok(self Ok(self
.main .main
.get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)? .remap_types::<Str, SerdeBincode<Vec<&str>>>()
.get(txn, main_key::EXACT_ATTRIBUTES)?
.unwrap_or_default()) .unwrap_or_default())
} }
@ -1326,34 +1402,36 @@ impl Index {
/// Writes the exact attributes to the database. /// Writes the exact attributes to the database.
pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> { pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>(txn, main_key::EXACT_ATTRIBUTES, &attrs)?; self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
txn,
main_key::EXACT_ATTRIBUTES,
&attrs,
)?;
Ok(()) Ok(())
} }
/// Clears the exact attributes from the store. /// Clears the exact attributes from the store.
pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES) self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
} }
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> { pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET) self.main.remap_types::<Str, BEU64>().get(txn, main_key::MAX_VALUES_PER_FACET)
} }
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> { pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: u64) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET, &val) self.main.remap_types::<Str, BEU64>().put(txn, main_key::MAX_VALUES_PER_FACET, &val)
} }
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
} }
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> { pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> {
let mut orders = self let mut orders = self
.main .main
.get::<_, Str, SerdeJson<HashMap<String, OrderBy>>>( .remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>()
txn, .get(txn, main_key::SORT_FACET_VALUES_BY)?
main_key::SORT_FACET_VALUES_BY,
)?
.unwrap_or_default(); .unwrap_or_default();
// Insert the default ordering if it is not already overwritten by the user. // Insert the default ordering if it is not already overwritten by the user.
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
@ -1365,27 +1443,27 @@ impl Index {
txn: &mut RwTxn, txn: &mut RwTxn,
val: &HashMap<String, OrderBy>, val: &HashMap<String, OrderBy>,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val) self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
} }
pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY) self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
} }
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<usize>> { pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) self.main.remap_types::<Str, BEU64>().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
} }
pub(crate) fn put_pagination_max_total_hits( pub(crate) fn put_pagination_max_total_hits(
&self, &self,
txn: &mut RwTxn, txn: &mut RwTxn,
val: usize, val: u64,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val) self.main.remap_types::<Str, BEU64>().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
} }
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
} }
/* script language docids */ /* script language docids */
@ -1479,7 +1557,7 @@ pub(crate) mod tests {
} }
pub fn add_documents_using_wtxn<'t, R>( pub fn add_documents_using_wtxn<'t, R>(
&'t self, &'t self,
wtxn: &mut RwTxn<'t, '_>, wtxn: &mut RwTxn<'t>,
documents: DocumentsBatchReader<R>, documents: DocumentsBatchReader<R>,
) -> Result<(), crate::error::Error> ) -> Result<(), crate::error::Error>
where where
@ -1523,7 +1601,7 @@ pub(crate) mod tests {
} }
pub fn update_settings_using_wtxn<'t>( pub fn update_settings_using_wtxn<'t>(
&'t self, &'t self,
wtxn: &mut RwTxn<'t, '_>, wtxn: &mut RwTxn<'t>,
update: impl Fn(&mut Settings), update: impl Fn(&mut Settings),
) -> Result<(), crate::error::Error> { ) -> Result<(), crate::error::Error> {
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config); let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
@ -1534,7 +1612,7 @@ pub(crate) mod tests {
pub fn delete_documents_using_wtxn<'t>( pub fn delete_documents_using_wtxn<'t>(
&'t self, &'t self,
wtxn: &mut RwTxn<'t, '_>, wtxn: &mut RwTxn<'t>,
external_document_ids: Vec<String>, external_document_ids: Vec<String>,
) { ) {
let builder = IndexDocuments::new( let builder = IndexDocuments::new(

View File

@ -66,9 +66,9 @@ pub use self::search::{
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;
pub type Attribute = u32; pub type Attribute = u32;
pub type BEU16 = heed::zerocopy::U16<heed::byteorder::BE>; pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>; pub type BEU32 = heed::types::U32<heed::byteorder::BE>;
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>; pub type BEU64 = heed::types::U64<heed::byteorder::BE>;
pub type DocumentId = u32; pub type DocumentId = u32;
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>; pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;

View File

@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use std::ops::ControlFlow; use std::ops::ControlFlow;
use std::{fmt, mem}; use std::{fmt, mem};
use heed::types::ByteSlice; use heed::types::Bytes;
use heed::BytesDecode; use heed::BytesDecode;
use indexmap::IndexMap; use indexmap::IndexMap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -13,7 +13,7 @@ use crate::facet::FacetType;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
}; };
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::search::facet::facet_distribution_iter::{ use crate::search::facet::facet_distribution_iter::{
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
}; };
@ -105,7 +105,7 @@ impl<'a> FacetDistribution<'a> {
key_buffer.truncate(mem::size_of::<FieldId>()); key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes()); key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db let iter = db
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(self.rtxn, &key_buffer)? .prefix_iter(self.rtxn, &key_buffer)?
.remap_key_type::<FieldDocIdFacetF64Codec>(); .remap_key_type::<FieldDocIdFacetF64Codec>();
@ -129,7 +129,7 @@ impl<'a> FacetDistribution<'a> {
key_buffer.truncate(mem::size_of::<FieldId>()); key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes()); key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db let iter = db
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(self.rtxn, &key_buffer)? .prefix_iter(self.rtxn, &key_buffer)?
.remap_key_type::<FieldDocIdFacetStringCodec>(); .remap_key_type::<FieldDocIdFacetStringCodec>();
@ -172,9 +172,7 @@ impl<'a> FacetDistribution<'a> {
search_function( search_function(
self.rtxn, self.rtxn,
self.index self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id, field_id,
candidates, candidates,
|facet_key, nbr_docids, _| { |facet_key, nbr_docids, _| {
@ -203,9 +201,7 @@ impl<'a> FacetDistribution<'a> {
search_function( search_function(
self.rtxn, self.rtxn,
self.index self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id, field_id,
candidates, candidates,
|facet_key, nbr_docids, any_docid| { |facet_key, nbr_docids, any_docid| {

View File

@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level}; use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::DocumentId; use crate::DocumentId;
/// Call the given closure on the facet distribution of the candidate documents. /// Call the given closure on the facet distribution of the candidate documents.
@ -23,7 +23,7 @@ use crate::DocumentId;
/// keep iterating over the different facet values or stop. /// keep iterating over the different facet values or stop.
pub fn lexicographically_iterate_over_facet_distribution<'t, CB>( pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
callback: CB, callback: CB,
@ -34,11 +34,11 @@ where
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback }; let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
let highest_level = get_highest_level( let highest_level = get_highest_level(
rtxn, rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id, field_id,
)?; )?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
Ok(()) Ok(())
} else { } else {
@ -48,7 +48,7 @@ where
pub fn count_iterate_over_facet_distribution<'t, CB>( pub fn count_iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
mut callback: CB, mut callback: CB,
@ -77,11 +77,11 @@ where
let mut heap = BinaryHeap::new(); let mut heap = BinaryHeap::new();
let highest_level = get_highest_level( let highest_level = get_highest_level(
rtxn, rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id, field_id,
)?; )?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
// We first fill the heap with values from the highest level // We first fill the heap with values from the highest level
let starting_key = let starting_key =
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
@ -146,7 +146,7 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{ {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
callback: CB, callback: CB,
} }

View File

@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::Result; use crate::Result;
/// Find all the document ids for which the given field contains a value contained within /// Find all the document ids for which the given field contains a value contained within
@ -25,11 +25,11 @@ where
let inner; let inner;
let left = match left { let left = match left {
Bound::Included(left) => { Bound::Included(left) => {
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
Bound::Included(inner.as_ref()) Bound::Included(inner.as_ref())
} }
Bound::Excluded(left) => { Bound::Excluded(left) => {
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
Bound::Excluded(inner.as_ref()) Bound::Excluded(inner.as_ref())
} }
Bound::Unbounded => Bound::Unbounded, Bound::Unbounded => Bound::Unbounded,
@ -37,25 +37,22 @@ where
let inner; let inner;
let right = match right { let right = match right {
Bound::Included(right) => { Bound::Included(right) => {
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
Bound::Included(inner.as_ref()) Bound::Included(inner.as_ref())
} }
Bound::Excluded(right) => { Bound::Excluded(right) => {
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
Bound::Excluded(inner.as_ref()) Bound::Excluded(inner.as_ref())
} }
Bound::Unbounded => Bound::Unbounded, Bound::Unbounded => Bound::Unbounded,
}; };
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
let highest_level = get_highest_level(rtxn, db, field_id)?; let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(starting_left_bound) = if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? let rightmost_bound =
{ Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
let rightmost_bound = Bound::Included(
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
); // will not fail because get_first_facet_value succeeded
let group_size = usize::MAX; let group_size = usize::MAX;
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
Ok(()) Ok(())
@ -67,7 +64,7 @@ where
/// Fetch the document ids that have a facet with a value between the two given bounds /// Fetch the document ids that have a facet with a value between the two given bounds
struct FacetRangeSearch<'t, 'b, 'bitmap> { struct FacetRangeSearch<'t, 'b, 'bitmap> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
left: Bound<&'b [u8]>, left: Bound<&'b [u8]>,
right: Bound<&'b [u8]>, right: Bound<&'b [u8]>,

View File

@ -5,7 +5,7 @@ use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
/// Return an iterator which iterates over the given candidate documents in /// Return an iterator which iterates over the given candidate documents in
/// ascending order of their facet value for the given field id. /// ascending order of their facet value for the given field id.
@ -31,12 +31,12 @@ use crate::heed_codec::ByteSliceRefCodec;
/// Note that once a document id is returned by the iterator, it is never returned again. /// Note that once a document id is returned by the iterator, it is never returned again.
pub fn ascending_facet_sort<'t>( pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?; let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
@ -53,14 +53,12 @@ pub fn ascending_facet_sort<'t>(
struct AscendingFacetSort<'t, 'e> { struct AscendingFacetSort<'t, 'e> {
rtxn: &'t heed::RoTxn<'e>, rtxn: &'t heed::RoTxn<'e>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
stack: Vec<( stack: Vec<(
RoaringBitmap, RoaringBitmap,
std::iter::Take< std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>>,
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
>,
)>, )>,
} }

View File

@ -7,21 +7,21 @@ use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort). /// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
/// ///
/// This function does the same thing, but in the opposite order. /// This function does the same thing, but in the opposite order.
pub fn descending_facet_sort<'t>( pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?; let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(); let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(itertools::Either::Left(DescendingFacetSort { Ok(itertools::Either::Left(DescendingFacetSort {
@ -37,13 +37,13 @@ pub fn descending_facet_sort<'t>(
struct DescendingFacetSort<'t> { struct DescendingFacetSort<'t> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
stack: Vec<( stack: Vec<(
RoaringBitmap, RoaringBitmap,
std::iter::Take< std::iter::Take<
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, heed::RoRevRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
>, >,
Bound<&'t [u8]>, Bound<&'t [u8]>,
)>, )>,
@ -100,7 +100,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*right_bound = Bound::Excluded(left_bound); *right_bound = Bound::Excluded(left_bound);
let iter = match self let iter = match self
.db .db
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>() .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
.rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow)) .rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
{ {
Ok(iter) => iter, Ok(iter) => iter,
@ -123,7 +123,7 @@ mod tests {
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::FacetGroupKeyCodec; use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{ use crate::search::facet::tests::{
@ -144,7 +144,7 @@ mod tests {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>(); let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new(); let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter { for el in iter {
let (docids, _) = el.unwrap(); let (docids, _) = el.unwrap();
@ -167,7 +167,7 @@ mod tests {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>(); let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new(); let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap(); let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter { for el in iter {
let (docids, _) = el.unwrap(); let (docids, _) = el.unwrap();

View File

@ -1,13 +1,13 @@
pub use facet_sort_ascending::ascending_facet_sort; pub use facet_sort_ascending::ascending_facet_sort;
pub use facet_sort_descending::descending_facet_sort; pub use facet_sort_descending::descending_facet_sort;
use heed::types::{ByteSlice, DecodeIgnore}; use heed::types::{Bytes, DecodeIgnore};
use heed::{BytesDecode, RoTxn}; use heed::{BytesDecode, RoTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::filter::{BadGeoError, Filter}; pub use self::filter::{BadGeoError, Filter};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result}; use crate::{Index, Result};
mod facet_distribution; mod facet_distribution;
mod facet_distribution_iter; mod facet_distribution_iter;
@ -22,8 +22,10 @@ fn facet_extreme_value<'t>(
let extreme_value = let extreme_value =
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
let (_, extreme_value) = extreme_value?; let (_, extreme_value) = extreme_value?;
OrderedF64Codec::bytes_decode(extreme_value)
Ok(OrderedF64Codec::bytes_decode(extreme_value)) .map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into)
} }
pub fn facet_min_value<'t>( pub fn facet_min_value<'t>(
@ -32,7 +34,7 @@ pub fn facet_min_value<'t>(
field_id: u16, field_id: u16,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<Option<f64>> { ) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?; let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it) facet_extreme_value(it)
} }
@ -43,7 +45,7 @@ pub fn facet_max_value<'t>(
field_id: u16, field_id: u16,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<Option<f64>> { ) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let it = descending_facet_sort(rtxn, db, field_id, candidates)?; let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it) facet_extreme_value(it)
} }
@ -51,7 +53,7 @@ pub fn facet_max_value<'t>(
/// Get the first facet value in the facet database /// Get the first facet value in the facet database
pub(crate) fn get_first_facet_value<'t, BoundCodec>( pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>> ) -> heed::Result<Option<BoundCodec::DItem>>
where where
@ -60,13 +62,12 @@ where
let mut level0prefix = vec![]; let mut level0prefix = vec![];
level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.extend_from_slice(&field_id.to_be_bytes());
level0prefix.push(0); level0prefix.push(0);
let mut level0_iter_forward = db let mut level0_iter_forward =
.as_polymorph() db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
if let Some(first) = level0_iter_forward.next() { if let Some(first) = level0_iter_forward.next() {
let (first_key, _) = first?; let (first_key, _) = first?;
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key) let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
.ok_or(heed::Error::Encoding)?; .map_err(heed::Error::Decoding)?;
Ok(Some(first_key.left_bound)) Ok(Some(first_key.left_bound))
} else { } else {
Ok(None) Ok(None)
@ -76,7 +77,7 @@ where
/// Get the last facet value in the facet database /// Get the last facet value in the facet database
pub(crate) fn get_last_facet_value<'t, BoundCodec>( pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>> ) -> heed::Result<Option<BoundCodec::DItem>>
where where
@ -85,13 +86,12 @@ where
let mut level0prefix = vec![]; let mut level0prefix = vec![];
level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.extend_from_slice(&field_id.to_be_bytes());
level0prefix.push(0); level0prefix.push(0);
let mut level0_iter_backward = db let mut level0_iter_backward =
.as_polymorph() db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
if let Some(last) = level0_iter_backward.next() { if let Some(last) = level0_iter_backward.next() {
let (last_key, _) = last?; let (last_key, _) = last?;
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key) let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
.ok_or(heed::Error::Encoding)?; .map_err(heed::Error::Decoding)?;
Ok(Some(last_key.left_bound)) Ok(Some(last_key.left_bound))
} else { } else {
Ok(None) Ok(None)
@ -101,17 +101,17 @@ where
/// Get the height of the highest level in the facet database /// Get the height of the highest level in the facet database
pub(crate) fn get_highest_level<'t>( pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>, txn: &'t RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> heed::Result<u8> { ) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes(); let field_id_prefix = &field_id.to_be_bytes();
Ok(db Ok(db
.as_polymorph() .remap_types::<Bytes, DecodeIgnore>()
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, field_id_prefix)? .rev_prefix_iter(txn, field_id_prefix)?
.next() .next()
.map(|el| { .map(|el| {
let (key, _) = el.unwrap(); let (key, _) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap(); let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
key.level key.level
}) })
.unwrap_or(0)) .unwrap_or(0))

View File

@ -17,8 +17,7 @@ use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::{ use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, SearchContext,
SearchContext, BEU16,
}; };
// Building these factories is not free. // Building these factories is not free.
@ -299,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> {
None => return Ok(Vec::new()), None => return Ok(Vec::new()),
}; };
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
Some(fst) => fst, Some(fst) => fst,
None => return Ok(vec![]), None => return Ok(vec![]),
}; };

View File

@ -3,7 +3,7 @@ use std::collections::hash_map::Entry;
use std::hash::Hash; use std::hash::Hash;
use fxhash::FxHashMap; use fxhash::FxHashMap;
use heed::types::ByteSlice; use heed::types::Bytes;
use heed::{BytesEncode, Database, RoTxn}; use heed::{BytesEncode, Database, RoTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -50,7 +50,7 @@ impl<'ctx> DatabaseCache<'ctx> {
cache_key: K1, cache_key: K1,
db_key: &'v KC::EItem, db_key: &'v KC::EItem,
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>, cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
db: Database<KC, ByteSlice>, db: Database<KC, Bytes>,
) -> Result<Option<DC::DItem>> ) -> Result<Option<DC::DItem>>
where where
K1: Copy + Eq + Hash, K1: Copy + Eq + Hash,
@ -63,12 +63,14 @@ impl<'ctx> DatabaseCache<'ctx> {
} }
match cache.get(&cache_key).unwrap() { match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => { Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) .map(Some)
} .map_err(heed::Error::Decoding)
Some(Cow::Owned(bytes)) => { .map_err(Into::into),
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
} .map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
None => Ok(None), None => Ok(None),
} }
} }
@ -78,7 +80,7 @@ impl<'ctx> DatabaseCache<'ctx> {
cache_key: K1, cache_key: K1,
db_keys: &'v [KC::EItem], db_keys: &'v [KC::EItem],
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>, cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
db: Database<KC, ByteSlice>, db: Database<KC, Bytes>,
merger: MergeFn, merger: MergeFn,
) -> Result<Option<DC::DItem>> ) -> Result<Option<DC::DItem>>
where where
@ -110,12 +112,14 @@ impl<'ctx> DatabaseCache<'ctx> {
} }
match cache.get(&cache_key).unwrap() { match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => { Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) .map(Some)
} .map_err(heed::Error::Decoding)
Some(Cow::Owned(bytes)) => { .map_err(Into::into),
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
} .map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
None => Ok(None), None => Ok(None),
} }
} }
@ -164,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> {
word, word,
&keys[..], &keys[..],
&mut self.db_cache.word_docids, &mut self.db_cache.word_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(), self.index.word_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
) )
} }
@ -173,7 +177,7 @@ impl<'ctx> SearchContext<'ctx> {
word, word,
self.word_interner.get(word).as_str(), self.word_interner.get(word).as_str(),
&mut self.db_cache.word_docids, &mut self.db_cache.word_docids,
self.index.word_docids.remap_data_type::<ByteSlice>(), self.index.word_docids.remap_data_type::<Bytes>(),
), ),
} }
} }
@ -187,7 +191,7 @@ impl<'ctx> SearchContext<'ctx> {
word, word,
self.word_interner.get(word).as_str(), self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids, &mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(), self.index.exact_word_docids.remap_data_type::<Bytes>(),
) )
} }
@ -226,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> {
prefix, prefix,
&keys[..], &keys[..],
&mut self.db_cache.word_prefix_docids, &mut self.db_cache.word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(), self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
) )
} }
@ -235,7 +239,7 @@ impl<'ctx> SearchContext<'ctx> {
prefix, prefix,
self.word_interner.get(prefix).as_str(), self.word_interner.get(prefix).as_str(),
&mut self.db_cache.word_prefix_docids, &mut self.db_cache.word_prefix_docids,
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(), self.index.word_prefix_docids.remap_data_type::<Bytes>(),
), ),
} }
} }
@ -249,7 +253,7 @@ impl<'ctx> SearchContext<'ctx> {
prefix, prefix,
self.word_interner.get(prefix).as_str(), self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids, &mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(), self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
) )
} }
@ -268,7 +272,7 @@ impl<'ctx> SearchContext<'ctx> {
self.word_interner.get(word2).as_str(), self.word_interner.get(word2).as_str(),
), ),
&mut self.db_cache.word_pair_proximity_docids, &mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(), self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
) )
} }
@ -287,7 +291,7 @@ impl<'ctx> SearchContext<'ctx> {
self.word_interner.get(word2).as_str(), self.word_interner.get(word2).as_str(),
), ),
&mut self.db_cache.word_pair_proximity_docids, &mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(), self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
) )
} }
@ -316,7 +320,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self let remap_key_type = self
.index .index
.word_pair_proximity_docids .word_pair_proximity_docids
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(self.txn, &key)?; .prefix_iter(self.txn, &key)?;
for result in remap_key_type { for result in remap_key_type {
let (_, docids) = result?; let (_, docids) = result?;
@ -355,7 +359,7 @@ impl<'ctx> SearchContext<'ctx> {
(word, fid), (word, fid),
&(self.word_interner.get(word).as_str(), fid), &(self.word_interner.get(word).as_str(), fid),
&mut self.db_cache.word_fid_docids, &mut self.db_cache.word_fid_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(), self.index.word_fid_docids.remap_data_type::<Bytes>(),
) )
} }
@ -374,7 +378,7 @@ impl<'ctx> SearchContext<'ctx> {
(word_prefix, fid), (word_prefix, fid),
&(self.word_interner.get(word_prefix).as_str(), fid), &(self.word_interner.get(word_prefix).as_str(), fid),
&mut self.db_cache.word_prefix_fid_docids, &mut self.db_cache.word_prefix_fid_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(), self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
) )
} }
@ -388,7 +392,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self let remap_key_type = self
.index .index
.word_fid_docids .word_fid_docids
.remap_types::<ByteSlice, ByteSlice>() .remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)? .prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>(); .remap_key_type::<StrBEU16Codec>();
for result in remap_key_type { for result in remap_key_type {
@ -414,7 +418,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self let remap_key_type = self
.index .index
.word_prefix_fid_docids .word_prefix_fid_docids
.remap_types::<ByteSlice, ByteSlice>() .remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)? .prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>(); .remap_key_type::<StrBEU16Codec>();
for result in remap_key_type { for result in remap_key_type {
@ -442,7 +446,7 @@ impl<'ctx> SearchContext<'ctx> {
(word, position), (word, position),
&(self.word_interner.get(word).as_str(), position), &(self.word_interner.get(word).as_str(), position),
&mut self.db_cache.word_position_docids, &mut self.db_cache.word_position_docids,
self.index.word_position_docids.remap_data_type::<ByteSlice>(), self.index.word_position_docids.remap_data_type::<Bytes>(),
) )
} }
@ -456,7 +460,7 @@ impl<'ctx> SearchContext<'ctx> {
(word_prefix, position), (word_prefix, position),
&(self.word_interner.get(word_prefix).as_str(), position), &(self.word_interner.get(word_prefix).as_str(), position),
&mut self.db_cache.word_prefix_position_docids, &mut self.db_cache.word_prefix_position_docids,
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(), self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
) )
} }
@ -470,7 +474,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self let remap_key_type = self
.index .index
.word_position_docids .word_position_docids
.remap_types::<ByteSlice, ByteSlice>() .remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)? .prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>(); .remap_key_type::<StrBEU16Codec>();
for result in remap_key_type { for result in remap_key_type {
@ -501,7 +505,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self let remap_key_type = self
.index .index
.word_prefix_position_docids .word_prefix_position_docids
.remap_types::<ByteSlice, ByteSlice>() .remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)? .prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>(); .remap_key_type::<StrBEU16Codec>();
for result in remap_key_type { for result in remap_key_type {

View File

@ -1,4 +1,4 @@
use heed::types::{ByteSlice, Str, Unit}; use heed::types::{Bytes, Str, Unit};
use heed::{Database, RoPrefix, RoTxn}; use heed::{Database, RoPrefix, RoTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -8,7 +8,7 @@ const DOCID_SIZE: usize = 4;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
}; };
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result, SearchContext}; use crate::{Index, Result, SearchContext};
pub struct DistinctOutput { pub struct DistinctOutput {
@ -71,7 +71,7 @@ pub fn distinct_single_docid(
/// Return all the docids containing the given value in the given field /// Return all the docids containing the given value in the given field
fn facet_value_docids( fn facet_value_docids(
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
txn: &RoTxn, txn: &RoTxn,
field_id: u16, field_id: u16,
facet_value: &[u8], facet_value: &[u8],
@ -87,12 +87,12 @@ fn facet_number_values<'a>(
field_id: u16, field_id: u16,
index: &Index, index: &Index,
txn: &'a RoTxn, txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> { ) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid); let key = facet_values_prefix_key(field_id, docid);
let iter = index let iter = index
.field_id_docid_facet_f64s .field_id_docid_facet_f64s
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(txn, &key)? .prefix_iter(txn, &key)?
.remap_key_type(); .remap_key_type();
@ -105,12 +105,12 @@ pub fn facet_string_values<'a>(
field_id: u16, field_id: u16,
index: &Index, index: &Index,
txn: &'a RoTxn, txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> { ) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
let key = facet_values_prefix_key(field_id, docid); let key = facet_values_prefix_key(field_id, docid);
let iter = index let iter = index
.field_id_docid_facet_strings .field_id_docid_facet_strings
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(txn, &key)? .prefix_iter(txn, &key)?
.remap_types(); .remap_types();

View File

@ -1,7 +1,7 @@
use std::collections::VecDeque; use std::collections::VecDeque;
use std::iter::FromIterator; use std::iter::FromIterator;
use heed::types::{ByteSlice, Unit}; use heed::types::{Bytes, Unit};
use heed::{RoPrefix, RoTxn}; use heed::{RoPrefix, RoTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
@ -34,7 +34,7 @@ fn facet_number_values<'a>(
let iter = index let iter = index
.field_id_docid_facet_f64s .field_id_docid_facet_f64s
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(txn, &key)? .prefix_iter(txn, &key)?
.remap_key_type(); .remap_key_type();
@ -163,7 +163,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
// computing the distance between two points is expensive thus we cache the result // computing the distance between two points is expensive thus we cache the result
documents documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize); .sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
self.cached_sorted_docids.extend(documents.into_iter()); self.cached_sorted_docids.extend(documents);
}; };
Ok(()) Ok(())

View File

@ -228,7 +228,7 @@ impl<T> Ord for Interned<T> {
impl<T> PartialOrd for Interned<T> { impl<T> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.idx.partial_cmp(&other.idx) Some(self.cmp(other))
} }
} }
@ -241,7 +241,7 @@ impl<T> PartialEq for Interned<T> {
} }
impl<T> Clone for Interned<T> { impl<T> Clone for Interned<T> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
Self { idx: self.idx, _phantom: PhantomData } *self
} }
} }

View File

@ -50,9 +50,7 @@ use crate::distance::NDotProductPoint;
use crate::error::FieldIdMapMissingEntry; use crate::error::FieldIdMapMissingEntry;
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule; use crate::search::new::distinct::apply_distinct_rule;
use crate::{ use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
};
/// A structure used throughout the execution of a search query. /// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> { pub struct SearchContext<'ctx> {
@ -451,8 +449,8 @@ pub fn execute_search(
let mut docids = Vec::new(); let mut docids = Vec::new();
let mut uniq_docids = RoaringBitmap::new(); let mut uniq_docids = RoaringBitmap::new();
for instant_distance::Item { distance: _, pid, point: _ } in neighbors { for instant_distance::Item { distance: _, pid, point: _ } in neighbors {
let index = BEU32::new(pid.into_inner()); let index = pid.into_inner();
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get(); let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap();
if universe.contains(docid) && uniq_docids.insert(docid) { if universe.contains(docid) && uniq_docids.insert(docid) {
docids.push(docid); docids.push(docid);
if docids.len() == (from + length) { if docids.len() == (from + length) {
@ -609,7 +607,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
field: field.to_string(), field: field.to_string(),
valid_fields, valid_fields,
hidden_fields, hidden_fields,
})?; }
.into());
} }
Member::Geo(_) if !sortable_fields.contains("_geo") => { Member::Geo(_) if !sortable_fields.contains("_geo") => {
let (valid_fields, hidden_fields) = let (valid_fields, hidden_fields) =
@ -619,7 +618,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
field: "_geo".to_string(), field: "_geo".to_string(),
valid_fields, valid_fields,
hidden_fields, hidden_fields,
})?; }
.into());
} }
_ => (), _ => (),
} }

View File

@ -175,7 +175,7 @@ impl QueryTermSubset {
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> { pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
let original = ctx.term_interner.get(self.original); let original = ctx.term_interner.get(self.original);
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None }; let use_prefix_db = original.zero_typo.use_prefix_db?;
let word = match &self.zero_typo_subset { let word = match &self.zero_typo_subset {
NTypoTermSubset::All => Some(use_prefix_db), NTypoTermSubset::All => Some(use_prefix_db),
NTypoTermSubset::Subset { words, phrases: _ } => { NTypoTermSubset::Subset { words, phrases: _ } => {

View File

@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext}; use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::score_details::{self, ScoreDetails}; use crate::score_details::{self, ScoreDetails};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::{FieldId, Index, Result}; use crate::{FieldId, Index, Result};
@ -100,11 +100,11 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
let number_db = ctx let number_db = ctx
.index .index
.facet_id_f64_docids .facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let string_db = ctx let string_db = ctx
.index .index
.facet_id_string_docids .facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(); .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let (number_iter, string_iter) = if self.is_ascending { let (number_iter, string_iter) = if self.is_ascending {
let number_iter = ascending_facet_sort( let number_iter = ascending_facet_sort(

View File

@ -124,8 +124,7 @@ fn test_attribute_fid_simple() {
s.query("the quick brown fox jumps over the lazy dog"); s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
} }
@ -142,7 +141,6 @@ fn test_attribute_fid_ngrams() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
} }

View File

@ -141,8 +141,7 @@ fn test_attribute_position_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
} }
#[test] #[test]
@ -158,8 +157,7 @@ fn test_attribute_position_repeated() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
} }
@ -176,8 +174,7 @@ fn test_attribute_position_different_fields() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
} }
@ -194,7 +191,6 @@ fn test_attribute_position_ngrams() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
} }

View File

@ -478,8 +478,7 @@ fn test_exactness_simple_ordered() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -511,8 +510,7 @@ fn test_exactness_simple_reversed() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -535,8 +533,7 @@ fn test_exactness_simple_reversed() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -566,8 +563,7 @@ fn test_exactness_simple_random() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -596,8 +592,7 @@ fn test_exactness_attribute_starts_with_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -623,8 +618,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -644,8 +638,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -674,8 +667,7 @@ fn test_exactness_all_candidates_with_typo() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -711,8 +703,7 @@ fn test_exactness_after_words() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -760,8 +751,7 @@ fn test_words_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -809,8 +799,7 @@ fn test_proximity_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]");
@ -847,8 +836,7 @@ fn test_proximity_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -881,8 +869,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 3]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -917,8 +904,7 @@ fn test_typo_followed_by_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> = let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 4, 3]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);

View File

@ -1,15 +1,16 @@
use heed::RwTxn;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use time::OffsetDateTime; use time::OffsetDateTime;
use crate::{FieldDistribution, Index, Result}; use crate::{FieldDistribution, Index, Result};
pub struct ClearDocuments<'t, 'u, 'i> { pub struct ClearDocuments<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
} }
impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { impl<'t, 'i> ClearDocuments<'t, 'i> {
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> ClearDocuments<'t, 'u, 'i> { pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> {
ClearDocuments { wtxn, index } ClearDocuments { wtxn, index }
} }

View File

@ -2,8 +2,8 @@ use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use grenad::CompressionType; use grenad::CompressionType;
use heed::types::ByteSlice; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn}; use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
@ -11,7 +11,7 @@ use crate::facet::FacetType;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader}; use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result}; use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
@ -70,11 +70,11 @@ impl<'i> FacetsUpdateBulk<'i> {
let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self; let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;
let db = match facet_type { let db = match facet_type {
FacetType::String => index FacetType::String => {
.facet_id_string_docids index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), }
FacetType::Number => { FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>() index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
} }
}; };
@ -88,7 +88,7 @@ impl<'i> FacetsUpdateBulk<'i> {
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> { pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub delta_data: Option<grenad::Reader<R>>, pub delta_data: Option<grenad::Reader<R>>,
pub group_size: u8, pub group_size: u8,
pub min_level_size: u8, pub min_level_size: u8,
@ -106,7 +106,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
for level_reader in level_readers { for level_reader in level_readers {
let mut cursor = level_reader.into_cursor()?; let mut cursor = level_reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? { while let Some((k, v)) = cursor.move_on_next()? {
self.db.remap_types::<ByteSlice, ByteSlice>().put(wtxn, k, v)?; self.db.remap_types::<Bytes, Bytes>().put(wtxn, k, v)?;
} }
} }
} }
@ -128,7 +128,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
}; };
if self.db.is_empty(wtxn)? { if self.db.is_empty(wtxn)? {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let mut database = self.db.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>(); let mut database = self.db.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
let mut cursor = delta_data.into_cursor()?; let mut cursor = delta_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
@ -146,11 +146,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
buffer.push(1); buffer.push(1);
// then we extend the buffer with the docids bitmap // then we extend the buffer with the docids bitmap
buffer.extend_from_slice(value); buffer.extend_from_slice(value);
unsafe { database.append(key, &buffer)? }; unsafe {
database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, &buffer)?
};
} }
} else { } else {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let database = self.db.remap_types::<ByteSlice, ByteSlice>(); let database = self.db.remap_types::<Bytes, Bytes>();
let mut cursor = delta_data.into_cursor()?; let mut cursor = delta_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
@ -219,9 +221,9 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
let level_0_iter = self let level_0_iter = self
.db .db
.as_polymorph() .remap_types::<Bytes, Bytes>()
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())? .prefix_iter(rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>(); .remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[]; let mut left_bound: &[u8] = &[];
let mut first_iteration_for_new_group = true; let mut first_iteration_for_new_group = true;
@ -307,11 +309,11 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{ {
let key = FacetGroupKey { field_id, level, left_bound }; let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key) let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?; .map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValue { size: group_size, bitmap };
let value = let value =
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?; cur_writer.insert(key, value)?;
cur_writer_len += 1; cur_writer_len += 1;
} }
@ -336,10 +338,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{ {
let key = FacetGroupKey { field_id, level, left_bound }; let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key) let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?; .map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?; cur_writer.insert(key, value)?;
cur_writer_len += 1; cur_writer_len += 1;
} }

View File

@ -1,7 +1,7 @@
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use heed::types::{ByteSlice, DecodeIgnore}; use heed::types::{Bytes, DecodeIgnore};
use heed::{BytesDecode, Error, RoTxn, RwTxn}; use heed::{BytesDecode, Error, RoTxn, RwTxn};
use obkv::KvReader; use obkv::KvReader;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -10,7 +10,7 @@ use crate::facet::FacetType;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::update::index_documents::valid_lmdb_key; use crate::update::index_documents::valid_lmdb_key;
@ -48,10 +48,10 @@ impl FacetsUpdateIncremental {
db: match facet_type { db: match facet_type {
FacetType::String => index FacetType::String => index
.facet_id_string_docids .facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
FacetType::Number => index FacetType::Number => index
.facet_id_f64_docids .facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
}, },
group_size, group_size,
max_group_size, max_group_size,
@ -67,19 +67,19 @@ impl FacetsUpdateIncremental {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
continue; continue;
} }
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key) let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?; .map_err(heed::Error::Encoding)?;
let value = KvReader::new(value); let value = KvReader::new(value);
let docids_to_delete = value let docids_to_delete = value
.get(DelAdd::Deletion) .get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode) .map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding)); .map(|o| o.map_err(heed::Error::Encoding));
let docids_to_add = value let docids_to_add = value
.get(DelAdd::Addition) .get(DelAdd::Addition)
.map(CboRoaringBitmapCodec::bytes_decode) .map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding)); .map(|o| o.map_err(heed::Error::Encoding));
if let Some(docids_to_delete) = docids_to_delete { if let Some(docids_to_delete) = docids_to_delete {
let docids_to_delete = docids_to_delete?; let docids_to_delete = docids_to_delete?;
@ -98,7 +98,7 @@ impl FacetsUpdateIncremental {
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type /// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
pub struct FacetsUpdateIncrementalInner { pub struct FacetsUpdateIncrementalInner {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub group_size: u8, pub group_size: u8,
pub min_level_size: u8, pub min_level_size: u8,
pub max_group_size: u8, pub max_group_size: u8,
@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner {
prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(level); prefix.push(level);
let mut iter = let mut iter = self
self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( .db
txn, .remap_types::<Bytes, FacetGroupValueCodec>()
prefix.as_slice(), .prefix_iter(txn, prefix.as_slice())?;
)?;
let (key_bytes, value) = iter.next().unwrap()?; let (key_bytes, value) = iter.next().unwrap()?;
Ok(( Ok((
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes) FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)? .map_err(Error::Encoding)?
.into_owned(), .into_owned(),
value, value,
)) ))
@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner {
level0_prefix.extend_from_slice(&field_id.to_be_bytes()); level0_prefix.extend_from_slice(&field_id.to_be_bytes());
level0_prefix.push(0); level0_prefix.push(0);
let mut iter = self let mut iter =
.db self.db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
.as_polymorph()
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?;
if iter.next().is_none() { if iter.next().is_none() {
drop(iter); drop(iter);
@ -382,11 +379,8 @@ impl FacetsUpdateIncrementalInner {
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
highest_level_prefix.push(highest_level); highest_level_prefix.push(highest_level);
let size_highest_level = self let size_highest_level =
.db self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
.count();
if size_highest_level < self.group_size as usize * self.min_level_size as usize { if size_highest_level < self.group_size as usize * self.min_level_size as usize {
return Ok(()); return Ok(());
@ -394,8 +388,8 @@ impl FacetsUpdateIncrementalInner {
let mut groups_iter = self let mut groups_iter = self
.db .db
.as_polymorph() .remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?; .prefix_iter(txn, &highest_level_prefix)?;
let nbr_new_groups = size_highest_level / self.group_size as usize; let nbr_new_groups = size_highest_level / self.group_size as usize;
let nbr_leftover_elements = size_highest_level % self.group_size as usize; let nbr_leftover_elements = size_highest_level % self.group_size as usize;
@ -406,8 +400,8 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new(); let mut values = RoaringBitmap::new();
for _ in 0..group_size { for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?; let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes) let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?; .map_err(Error::Encoding)?;
if first_key.is_none() { if first_key.is_none() {
first_key = Some(key_i); first_key = Some(key_i);
@ -429,8 +423,8 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new(); let mut values = RoaringBitmap::new();
for _ in 0..nbr_leftover_elements { for _ in 0..nbr_leftover_elements {
let (key_bytes, value_i) = groups_iter.next().unwrap()?; let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes) let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?; .map_err(Error::Encoding)?;
if first_key.is_none() { if first_key.is_none() {
first_key = Some(key_i); first_key = Some(key_i);
@ -597,23 +591,21 @@ impl FacetsUpdateIncrementalInner {
if highest_level == 0 if highest_level == 0
|| self || self
.db .db
.as_polymorph() .remap_types::<Bytes, Bytes>()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)? .prefix_iter(txn, &highest_level_prefix)?
.count() .count()
>= self.min_level_size as usize >= self.min_level_size as usize
{ {
return Ok(()); return Ok(());
} }
let mut to_delete = vec![]; let mut to_delete = vec![];
let mut iter = self let mut iter =
.db self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?;
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?;
for el in iter.by_ref() { for el in iter.by_ref() {
let (k, _) = el?; let (k, _) = el?;
to_delete.push( to_delete.push(
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k) FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
.ok_or(Error::Encoding)? .map_err(Error::Encoding)?
.into_owned(), .into_owned(),
); );
} }
@ -1121,7 +1113,7 @@ mod fuzz {
#[no_coverage] #[no_coverage]
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) { fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
let index = FacetIndex::<ByteSliceRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
let mut txn = index.env.write_txn().unwrap(); let mut txn = index.env.write_txn().unwrap();
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default(); let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
@ -1167,16 +1159,13 @@ mod fuzz {
let level0iter = index let level0iter = index
.content .content
.as_polymorph() .as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
&mut txn,
&field_id.to_be_bytes(),
)
.unwrap(); .unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) { for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap(); let (group_key, group_values) = group.unwrap();
let group_key = let group_key =
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(group_key).unwrap(); FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound); assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap); assert_eq!(values, &group_values.bitmap);
} }
@ -1186,13 +1175,13 @@ mod fuzz {
let level0iter = index let level0iter = index
.content .content
.as_polymorph() .as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
.unwrap(); .unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) { for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap(); let (group_key, group_values) = group.unwrap();
let group_key = let group_key =
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(group_key).unwrap(); FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound); assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap); assert_eq!(values, &group_values.bitmap);
} }

View File

@ -83,7 +83,7 @@ use std::iter::FromIterator;
use charabia::normalizer::{Normalize, NormalizerOption}; use charabia::normalizer::{Normalize, NormalizerOption};
use grenad::{CompressionType, SortAlgorithm}; use grenad::{CompressionType, SortAlgorithm};
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode; use heed::BytesEncode;
use log::debug; use log::debug;
use time::OffsetDateTime; use time::OffsetDateTime;
@ -92,10 +92,10 @@ use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk; use super::FacetsUpdateBulk;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::update::index_documents::create_sorter; use crate::update::index_documents::create_sorter;
use crate::update::merge_btreeset_string; use crate::update::merge_btreeset_string;
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH}; use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
pub mod bulk; pub mod bulk;
pub mod incremental; pub mod incremental;
@ -106,7 +106,7 @@ pub mod incremental;
/// a bulk update method or an incremental update method. /// a bulk update method or an incremental update method.
pub struct FacetsUpdate<'i> { pub struct FacetsUpdate<'i> {
index: &'i Index, index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType, facet_type: FacetType,
delta_data: grenad::Reader<BufReader<File>>, delta_data: grenad::Reader<BufReader<File>>,
group_size: u8, group_size: u8,
@ -120,11 +120,11 @@ impl<'i> FacetsUpdate<'i> {
delta_data: grenad::Reader<BufReader<File>>, delta_data: grenad::Reader<BufReader<File>>,
) -> Self { ) -> Self {
let database = match facet_type { let database = match facet_type {
FacetType::String => index FacetType::String => {
.facet_id_string_docids index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), }
FacetType::Number => { FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>() index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
} }
}; };
Self { Self {
@ -146,7 +146,7 @@ impl<'i> FacetsUpdate<'i> {
self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
// See self::comparison_bench::benchmark_facet_indexing // See self::comparison_bench::benchmark_facet_indexing
if self.delta_data.len() >= (self.database.len(wtxn)? as u64 / 50) { if self.delta_data.len() >= (self.database.len(wtxn)? / 50) {
let field_ids = let field_ids =
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>(); self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
let bulk_update = FacetsUpdateBulk::new( let bulk_update = FacetsUpdateBulk::new(
@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> {
} }
let set = BTreeSet::from_iter(std::iter::once(left_bound)); let set = BTreeSet::from_iter(std::iter::once(left_bound));
let key = (field_id, normalized_facet.as_ref()); let key = (field_id, normalized_facet.as_ref());
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?; let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
sorter.insert(key, val)?; sorter.insert(key, val)?;
} }
} }
@ -217,10 +217,11 @@ impl<'i> FacetsUpdate<'i> {
// as the grenad sorter already merged them for us. // as the grenad sorter already merged them for us.
let mut merger_iter = sorter.into_stream_merger_iter()?; let mut merger_iter = sorter.into_stream_merger_iter()?;
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? { while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
self.index self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
.facet_id_normalized_string_strings wtxn,
.remap_types::<ByteSlice, ByteSlice>() key_bytes,
.put(wtxn, key_bytes, btreeset_bytes)?; btreeset_bytes,
)?;
} }
// We compute one FST by string facet // We compute one FST by string facet
@ -252,7 +253,7 @@ impl<'i> FacetsUpdate<'i> {
// We write those FSTs in LMDB now // We write those FSTs in LMDB now
for (field_id, fst) in text_fsts { for (field_id, fst) in text_fsts {
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?; self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
} }
Ok(()) Ok(())
@ -267,7 +268,7 @@ pub(crate) mod test_helpers {
use std::marker::PhantomData; use std::marker::PhantomData;
use std::rc::Rc; use std::rc::Rc;
use heed::types::ByteSlice; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -275,7 +276,7 @@ pub(crate) mod test_helpers {
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvWriterDelAdd};
@ -306,7 +307,7 @@ pub(crate) mod test_helpers {
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{ {
pub env: Env, pub env: Env,
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub group_size: Cell<u8>, pub group_size: Cell<u8>,
pub min_level_size: Cell<u8>, pub min_level_size: Cell<u8>,
pub max_group_size: Cell<u8>, pub max_group_size: Cell<u8>,
@ -454,7 +455,7 @@ pub(crate) mod test_helpers {
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned(); let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
let key: FacetGroupKey<&[u8]> = let key: FacetGroupKey<&[u8]> =
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes }; FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap(); let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
let mut inner_writer = KvWriterDelAdd::memory(); let mut inner_writer = KvWriterDelAdd::memory();
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap(); let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
inner_writer.insert(DelAdd::Addition, value).unwrap(); inner_writer.insert(DelAdd::Addition, value).unwrap();
@ -486,12 +487,12 @@ pub(crate) mod test_helpers {
let iter = self let iter = self
.content .content
.as_polymorph() .remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &level_no_prefix) .prefix_iter(txn, &level_no_prefix)
.unwrap(); .unwrap();
for el in iter { for el in iter {
let (key, value) = el.unwrap(); let (key, value) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap(); let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
let mut prefix_start_below = vec![]; let mut prefix_start_below = vec![];
prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
@ -501,14 +502,11 @@ pub(crate) mod test_helpers {
let start_below = { let start_below = {
let mut start_below_iter = self let mut start_below_iter = self
.content .content
.as_polymorph() .remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( .prefix_iter(txn, &prefix_start_below)
txn,
&prefix_start_below,
)
.unwrap(); .unwrap();
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes).unwrap() FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes).unwrap()
}; };
assert!(value.size > 0); assert!(value.size > 0);
@ -612,7 +610,7 @@ mod comparison_bench {
} }
let time_spent = timer.elapsed().as_millis(); let time_spent = timer.elapsed().as_millis();
println!(" add {nbr_doc} : {time_spent}ms"); println!(" add {nbr_doc} : {time_spent}ms");
txn.abort().unwrap(); txn.abort();
} }
} }
} }

View File

@ -309,8 +309,7 @@ fn tokens_from_document<'a>(
// if a language has been detected for the token, we update the counter. // if a language has been detected for the token, we update the counter.
if let Some(language) = token.language { if let Some(language) = token.language {
let script = token.script; let script = token.script;
let entry = let entry = script_language_word_count.entry(script).or_default();
script_language_word_count.entry(script).or_insert_with(Vec::new);
match entry.iter_mut().find(|(l, _)| *l == language) { match entry.iter_mut().find(|(l, _)| *l == language) {
Some((_, n)) => *n += 1, Some((_, n)) => *n += 1,
None => entry.push((language, 1)), None => entry.push((language, 1)),

View File

@ -6,8 +6,8 @@ use std::io::{self, BufReader};
use std::mem::size_of; use std::mem::size_of;
use std::result::Result as StdResult; use std::result::Result as StdResult;
use bytemuck::bytes_of;
use grenad::Sorter; use grenad::Sorter;
use heed::zerocopy::AsBytes;
use heed::BytesEncode; use heed::BytesEncode;
use itertools::EitherOrBoth; use itertools::EitherOrBoth;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
@ -20,9 +20,7 @@ use crate::error::InternalError;
use crate::facet::value_encoding::f64_into_bytes; use crate::facet::value_encoding::f64_into_bytes;
use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::{create_writer, writer_into_reader}; use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{ use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
};
/// The length of the elements that are always in the buffer when inserting new values. /// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>(); const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
let document = BEU32::from(document).get(); let document = DocumentId::from_be_bytes(document);
// For the other extraction tasks, prefix the key with the field_id and the document_id // For the other extraction tasks, prefix the key with the field_id and the document_id
numbers_key_buffer.extend_from_slice(docid_bytes); numbers_key_buffer.extend_from_slice(docid_bytes);
@ -323,7 +321,7 @@ where
// We insert only the Del part of the Obkv to inform // We insert only the Del part of the Obkv to inform
// that we only want to remove all those numbers. // that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, ().as_bytes())?; obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
} }
@ -336,7 +334,7 @@ where
// We insert only the Add part of the Obkv to inform // We insert only the Add part of the Obkv to inform
// that we only want to remove all those numbers. // that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, ().as_bytes())?; obkv.insert(DelAdd::Addition, bytes_of(&()))?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
} }

View File

@ -118,7 +118,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
} }
let (word, fid) = StrBEU16Codec::bytes_decode(key) let (word, fid) = StrBEU16Codec::bytes_decode(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
// every words contained in an attribute set to exact must be pushed in the exact_words list. // every words contained in an attribute set to exact must be pushed in the exact_words list.
if exact_attributes.contains(&fid) { if exact_attributes.contains(&fid) {

View File

@ -3,7 +3,7 @@ use std::fs::File;
use std::io::{self, BufReader, BufWriter, Seek}; use std::io::{self, BufReader, BufWriter, Seek};
use grenad::{CompressionType, Sorter}; use grenad::{CompressionType, Sorter};
use heed::types::ByteSlice; use heed::types::Bytes;
use super::{ClonableMmap, MergeFn}; use super::{ClonableMmap, MergeFn};
use crate::update::index_documents::valid_lmdb_key; use crate::update::index_documents::valid_lmdb_key;
@ -255,7 +255,7 @@ where
puffin::profile_function!(); puffin::profile_function!();
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let database = database.remap_types::<ByteSlice, ByteSlice>(); let database = database.remap_types::<Bytes, Bytes>();
let mut merger_iter = sorter.into_stream_merger_iter()?; let mut merger_iter = sorter.into_stream_merger_iter()?;
while let Some((key, value)) = merger_iter.next()? { while let Some((key, value)) = merger_iter.next()? {

View File

@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod {
} }
} }
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> { pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index, index: &'i Index,
config: IndexDocumentsConfig, config: IndexDocumentsConfig,
indexer_config: &'a IndexerConfig, indexer_config: &'a IndexerConfig,
@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig {
pub autogenerate_docids: bool, pub autogenerate_docids: bool,
} }
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA> impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA>
where where
FP: Fn(UpdateIndexingStep) + Sync, FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync, FA: Fn() -> bool + Sync,
{ {
pub fn new( pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index, index: &'i Index,
indexer_config: &'a IndexerConfig, indexer_config: &'a IndexerConfig,
config: IndexDocumentsConfig, config: IndexDocumentsConfig,
progress: FP, progress: FP,
should_abort: FA, should_abort: FA,
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> { ) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
let transform = Some(Transform::new( let transform = Some(Transform::new(
wtxn, wtxn,
index, index,
@ -701,7 +701,7 @@ mod tests {
use crate::documents::documents_batch_reader_from_objects; use crate::documents::documents_batch_reader_from_objects;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::search::TermsMatchingStrategy; use crate::search::TermsMatchingStrategy;
use crate::{db_snap, Filter, Search, BEU16}; use crate::{db_snap, Filter, Search};
#[test] #[test]
fn simple_document_replacement() { fn simple_document_replacement() {
@ -1743,14 +1743,11 @@ mod tests {
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
let bitmap_colour = let bitmap_colour =
index.facet_id_exists_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap(); index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap();
assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6, 7]); assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6, 7]);
let bitmap_colour_green = index let bitmap_colour_green =
.facet_id_exists_docids index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
.get(&rtxn, &BEU16::new(colour_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6, 7]); assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6, 7]);
}; };
@ -1848,21 +1845,15 @@ mod tests {
index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap(); index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap();
let bitmap_null_colour = let bitmap_null_colour =
index.facet_id_is_null_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap(); index.facet_id_is_null_docids.get(&rtxn, &colour_id).unwrap().unwrap();
assert_eq!(bitmap_null_colour.into_iter().collect::<Vec<_>>(), vec![0]); assert_eq!(bitmap_null_colour.into_iter().collect::<Vec<_>>(), vec![0]);
let bitmap_colour_green = index let bitmap_colour_green =
.facet_id_is_null_docids index.facet_id_is_null_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
.get(&rtxn, &BEU16::new(colour_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]); assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]);
let bitmap_colour_blue = index let bitmap_colour_blue =
.facet_id_is_null_docids index.facet_id_is_null_docids.get(&rtxn, &colour_blue_id).unwrap().unwrap();
.get(&rtxn, &BEU16::new(colour_blue_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]); assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
}; };
@ -1917,21 +1908,15 @@ mod tests {
let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap(); let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap();
let bitmap_empty_tags = let bitmap_empty_tags =
index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap(); index.facet_id_is_empty_docids.get(&rtxn, &tags_id).unwrap().unwrap();
assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]); assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]);
let bitmap_tags_green = index let bitmap_tags_green =
.facet_id_is_empty_docids index.facet_id_is_empty_docids.get(&rtxn, &tags_green_id).unwrap().unwrap();
.get(&rtxn, &BEU16::new(tags_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]); assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]);
let bitmap_tags_blue = index let bitmap_tags_blue =
.facet_id_is_empty_docids index.facet_id_is_empty_docids.get(&rtxn, &tags_blue_id).unwrap().unwrap();
.get(&rtxn, &BEU16::new(tags_blue_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]); assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
}; };
@ -2684,7 +2669,7 @@ mod tests {
} }
fn delete_documents<'t>( fn delete_documents<'t>(
wtxn: &mut RwTxn<'t, '_>, wtxn: &mut RwTxn<'t>,
index: &'t TempIndex, index: &'t TempIndex,
external_ids: &[&str], external_ids: &[&str],
) -> Vec<u32> { ) -> Vec<u32> {

View File

@ -24,9 +24,7 @@ use crate::index::{db_name, main_key};
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd}; use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
use crate::update::index_documents::GrenadParameters; use crate::update::index_documents::GrenadParameters;
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
use crate::{ use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
};
pub struct TransformOutput { pub struct TransformOutput {
pub primary_key: String, pub primary_key: String,
@ -245,11 +243,11 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut skip_insertion = false; let mut skip_insertion = false;
if let Some(original_docid) = original_docid { if let Some(original_docid) = original_docid {
let original_key = BEU32::new(original_docid); let original_key = original_docid;
let base_obkv = self let base_obkv = self
.index .index
.documents .documents
.remap_data_type::<heed::types::ByteSlice>() .remap_data_type::<heed::types::Bytes>()
.get(wtxn, &original_key)? .get(wtxn, &original_key)?
.ok_or(InternalError::DatabaseMissingEntry { .ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::DOCUMENTS, db_name: db_name::DOCUMENTS,
@ -499,11 +497,11 @@ impl<'a, 'i> Transform<'a, 'i> {
self.replaced_documents_ids.insert(internal_docid); self.replaced_documents_ids.insert(internal_docid);
// fetch the obkv document // fetch the obkv document
let original_key = BEU32::new(internal_docid); let original_key = internal_docid;
let base_obkv = self let base_obkv = self
.index .index
.documents .documents
.remap_data_type::<heed::types::ByteSlice>() .remap_data_type::<heed::types::Bytes>()
.get(txn, &original_key)? .get(txn, &original_key)?
.ok_or(InternalError::DatabaseMissingEntry { .ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::DOCUMENTS, db_name: db_name::DOCUMENTS,
@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// TODO this can be done in parallel by using the rayon `ThreadPool`. // TODO this can be done in parallel by using the rayon `ThreadPool`.
pub fn prepare_for_documents_reindexing( pub fn prepare_for_documents_reindexing(
self, self,
wtxn: &mut heed::RwTxn<'i, '_>, wtxn: &mut heed::RwTxn<'i>,
old_fields_ids_map: FieldsIdsMap, old_fields_ids_map: FieldsIdsMap,
mut new_fields_ids_map: FieldsIdsMap, mut new_fields_ids_map: FieldsIdsMap,
) -> Result<TransformOutput> { ) -> Result<TransformOutput> {
@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> {
let obkv = self.index.documents.get(wtxn, &docid)?.ok_or( let obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None }, InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
)?; )?;
let docid = docid.get();
obkv_buffer.clear(); obkv_buffer.clear();
let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer); let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer);

View File

@ -6,8 +6,8 @@ use std::io::{self, BufReader};
use bytemuck::allocation::pod_collect_to_vec; use bytemuck::allocation::pod_collect_to_vec;
use charabia::{Language, Script}; use charabia::{Language, Script};
use grenad::MergerBuilder; use grenad::MergerBuilder;
use heed::types::ByteSlice; use heed::types::Bytes;
use heed::RwTxn; use heed::{PutFlags, RwTxn};
use log::error; use log::error;
use obkv::{KvReader, KvWriter}; use obkv::{KvReader, KvWriter};
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
@ -27,9 +27,7 @@ use crate::index::Hnsw;
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
use crate::update::facet::FacetsUpdate; use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at}; use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
use crate::{ use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError};
lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32,
};
pub(crate) enum TypedChunk { pub(crate) enum TypedChunk {
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>), FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
@ -146,10 +144,10 @@ pub(crate) fn write_typed_chunk_into_index(
} }
} }
let db = index.documents.remap_data_type::<ByteSlice>(); let db = index.documents.remap_data_type::<Bytes>();
if !writer.is_empty() { if !writer.is_empty() {
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?; db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
operations.push(DocumentOperation { operations.push(DocumentOperation {
external_id: external_id.to_string(), external_id: external_id.to_string(),
internal_id: docid, internal_id: docid,
@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index(
}); });
docids.insert(docid); docids.insert(docid);
} else { } else {
db.delete(wtxn, &BEU32::new(docid))?; db.delete(wtxn, &docid)?;
operations.push(DocumentOperation { operations.push(DocumentOperation {
external_id: external_id.to_string(), external_id: external_id.to_string(),
internal_id: docid, internal_id: docid,
@ -295,7 +293,7 @@ pub(crate) fn write_typed_chunk_into_index(
} }
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
let index_fid_docid_facet_numbers = let index_fid_docid_facet_numbers =
index.field_id_docid_facet_f64s.remap_types::<ByteSlice, ByteSlice>(); index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_number.into_cursor()?; let mut cursor = fid_docid_facet_number.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
let reader = KvReaderDelAdd::new(value); let reader = KvReaderDelAdd::new(value);
@ -315,7 +313,7 @@ pub(crate) fn write_typed_chunk_into_index(
} }
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
let index_fid_docid_facet_strings = let index_fid_docid_facet_strings =
index.field_id_docid_facet_strings.remap_types::<ByteSlice, ByteSlice>(); index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_string.into_cursor()?; let mut cursor = fid_docid_facet_string.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
let reader = KvReaderDelAdd::new(value); let reader = KvReaderDelAdd::new(value);
@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index(
// We extract and store the previous vectors // We extract and store the previous vectors
if let Some(hnsw) = index.vector_hnsw(wtxn)? { if let Some(hnsw) = index.vector_hnsw(wtxn)? {
for (pid, point) in hnsw.iter() { for (pid, point) in hnsw.iter() {
let pid_key = BEU32::new(pid.into_inner()); let pid_key = pid.into_inner();
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get(); let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap();
let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect(); let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect();
vectors_set.insert((docid, vector)); vectors_set.insert((docid, vector));
} }
@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index(
// Store the vectors in the point-docid relation database // Store the vectors in the point-docid relation database
index.vector_id_docid.clear(wtxn)?; index.vector_id_docid.clear(wtxn)?;
for (docid, pid) in docids.into_iter().zip(pids) { for (docid, pid) in docids.into_iter().zip(pids) {
index.vector_id_docid.put( index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?;
wtxn,
&BEU32::new(pid.into_inner()),
&BEU32::new(docid),
)?;
} }
log::debug!("There are {} entries in the HNSW so far", hnsw_length); log::debug!("There are {} entries in the HNSW so far", hnsw_length);
@ -504,7 +498,7 @@ where
puffin::profile_function!(format!("number of entries: {}", data.len())); puffin::profile_function!(format!("number of entries: {}", data.len()));
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let database = database.remap_types::<ByteSlice, ByteSlice>(); let database = database.remap_types::<Bytes, Bytes>();
let mut cursor = data.into_cursor()?; let mut cursor = data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
@ -562,20 +556,23 @@ where
} }
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let mut database = database.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>(); let mut database = database.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
let mut cursor = data.into_cursor()?; let mut cursor = data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
if valid_lmdb_key(key) { if valid_lmdb_key(key) {
debug_assert!( debug_assert!(
K::bytes_decode(key).is_some(), K::bytes_decode(key).is_ok(),
"Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}", "Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
key.len(), key.len(),
&key &key
); );
buffer.clear(); buffer.clear();
let value = serialize_value(value, &mut buffer)?; let value = serialize_value(value, &mut buffer)?;
unsafe { database.append(key, value)? }; unsafe {
// safety: We do not keep a reference to anything that lives inside the database
database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, value)?
};
} }
} }

View File

@ -100,8 +100,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
} }
} }
pub struct Settings<'a, 't, 'u, 'i> { pub struct Settings<'a, 't, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index, index: &'i Index,
indexer_config: &'a IndexerConfig, indexer_config: &'a IndexerConfig,
@ -129,12 +129,12 @@ pub struct Settings<'a, 't, 'u, 'i> {
pagination_max_total_hits: Setting<usize>, pagination_max_total_hits: Setting<usize>,
} }
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { impl<'a, 't, 'i> Settings<'a, 't, 'i> {
pub fn new( pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index, index: &'i Index,
indexer_config: &'a IndexerConfig, indexer_config: &'a IndexerConfig,
) -> Settings<'a, 't, 'u, 'i> { ) -> Settings<'a, 't, 'i> {
Settings { Settings {
wtxn, wtxn,
index, index,
@ -822,7 +822,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_max_values_per_facet(&mut self) -> Result<()> { fn update_max_values_per_facet(&mut self) -> Result<()> {
match self.max_values_per_facet { match self.max_values_per_facet {
Setting::Set(max) => { Setting::Set(max) => {
self.index.put_max_values_per_facet(self.wtxn, max)?; self.index.put_max_values_per_facet(self.wtxn, max as u64)?;
} }
Setting::Reset => { Setting::Reset => {
self.index.delete_max_values_per_facet(self.wtxn)?; self.index.delete_max_values_per_facet(self.wtxn)?;
@ -850,7 +850,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_pagination_max_total_hits(&mut self) -> Result<()> { fn update_pagination_max_total_hits(&mut self) -> Result<()> {
match self.pagination_max_total_hits { match self.pagination_max_total_hits {
Setting::Set(max) => { Setting::Set(max) => {
self.index.put_pagination_max_total_hits(self.wtxn, max)?; self.index.put_pagination_max_total_hits(self.wtxn, max as u64)?;
} }
Setting::Reset => { Setting::Reset => {
self.index.delete_pagination_max_total_hits(self.wtxn)?; self.index.delete_pagination_max_total_hits(self.wtxn)?;
@ -917,7 +917,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use big_s::S; use big_s::S;
use heed::types::ByteSlice; use heed::types::Bytes;
use maplit::{btreemap, btreeset, hashset}; use maplit::{btreemap, btreeset, hashset};
use super::*; use super::*;
@ -1130,7 +1130,7 @@ mod tests {
} }
let count = index let count = index
.facet_id_f64_docids .facet_id_f64_docids
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
// The faceted field id is 1u16 // The faceted field id is 1u16
.prefix_iter(&rtxn, &[0, 1, 0]) .prefix_iter(&rtxn, &[0, 1, 0])
.unwrap() .unwrap()
@ -1151,7 +1151,7 @@ mod tests {
// Only count the field_id 0 and level 0 facet values. // Only count the field_id 0 and level 0 facet values.
let count = index let count = index
.facet_id_f64_docids .facet_id_f64_docids
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(&rtxn, &[0, 1, 0]) .prefix_iter(&rtxn, &[0, 1, 0])
.unwrap() .unwrap()
.count(); .count();
@ -1565,7 +1565,7 @@ mod tests {
}) })
.unwrap_err(); .unwrap_err();
assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_)))); assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_))));
wtxn.abort().unwrap(); wtxn.abort();
// But if we clear the database... // But if we clear the database...
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();

View File

@ -1,7 +1,7 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use grenad::CompressionType; use grenad::CompressionType;
use heed::types::{ByteSlice, Str}; use heed::types::{Bytes, Str};
use heed::Database; use heed::Database;
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd}; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
@ -12,8 +12,8 @@ use crate::update::index_documents::{
}; };
use crate::{CboRoaringBitmapCodec, Result}; use crate::{CboRoaringBitmapCodec, Result};
pub struct WordPrefixDocids<'t, 'u, 'i> { pub struct WordPrefixDocids<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
word_docids: Database<Str, CboRoaringBitmapCodec>, word_docids: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>, word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
pub(crate) chunk_compression_type: CompressionType, pub(crate) chunk_compression_type: CompressionType,
@ -22,12 +22,12 @@ pub struct WordPrefixDocids<'t, 'u, 'i> {
pub(crate) max_memory: Option<usize>, pub(crate) max_memory: Option<usize>,
} }
impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { impl<'t, 'i> WordPrefixDocids<'t, 'i> {
pub fn new( pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
word_docids: Database<Str, CboRoaringBitmapCodec>, word_docids: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>, word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
) -> WordPrefixDocids<'t, 'u, 'i> { ) -> WordPrefixDocids<'t, 'i> {
WordPrefixDocids { WordPrefixDocids {
wtxn, wtxn,
word_docids, word_docids,
@ -93,7 +93,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
} }
// We fetch the docids associated to the newly added word prefix fst only. // We fetch the docids associated to the newly added word prefix fst only.
let db = self.word_docids.remap_data_type::<ByteSlice>(); let db = self.word_docids.remap_data_type::<Bytes>();
let mut buffer = Vec::new(); let mut buffer = Vec::new();
for prefix in new_prefix_fst_words { for prefix in new_prefix_fst_words {
let prefix = std::str::from_utf8(prefix.as_bytes())?; let prefix = std::str::from_utf8(prefix.as_bytes())?;

View File

@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet};
use std::str; use std::str;
use grenad::CompressionType; use grenad::CompressionType;
use heed::types::ByteSlice; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Database}; use heed::{BytesDecode, BytesEncode, Database};
use log::debug; use log::debug;
@ -17,8 +17,8 @@ use crate::update::index_documents::{
}; };
use crate::{CboRoaringBitmapCodec, Result}; use crate::{CboRoaringBitmapCodec, Result};
pub struct WordPrefixIntegerDocids<'t, 'u, 'i> { pub struct WordPrefixIntegerDocids<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
pub(crate) chunk_compression_type: CompressionType, pub(crate) chunk_compression_type: CompressionType,
@ -27,12 +27,12 @@ pub struct WordPrefixIntegerDocids<'t, 'u, 'i> {
pub(crate) max_memory: Option<usize>, pub(crate) max_memory: Option<usize>,
} }
impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
pub fn new( pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i>,
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
) -> WordPrefixIntegerDocids<'t, 'u, 'i> { ) -> WordPrefixIntegerDocids<'t, 'i> {
WordPrefixIntegerDocids { WordPrefixIntegerDocids {
wtxn, wtxn,
prefix_database, prefix_database,
@ -72,7 +72,8 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
let mut current_prefixes: Option<&&[String]> = None; let mut current_prefixes: Option<&&[String]> = None;
let mut prefixes_cache = HashMap::new(); let mut prefixes_cache = HashMap::new();
while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? { while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? {
let (word, pos) = StrBEU16Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?; let (word, pos) =
StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?;
current_prefixes = match current_prefixes.take() { current_prefixes = match current_prefixes.take() {
Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes), Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes),
@ -109,7 +110,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
} }
// We fetch the docids associated to the newly added word prefix fst only. // We fetch the docids associated to the newly added word prefix fst only.
let db = self.word_database.remap_data_type::<ByteSlice>(); let db = self.word_database.remap_data_type::<Bytes>();
let mut buffer = Vec::new(); let mut buffer = Vec::new();
for prefix_bytes in new_prefix_fst_words { for prefix_bytes in new_prefix_fst_words {
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| { let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
@ -118,7 +119,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
// iter over all lines of the DB where the key is prefixed by the current prefix. // iter over all lines of the DB where the key is prefixed by the current prefix.
let iter = db let iter = db
.remap_key_type::<ByteSlice>() .remap_key_type::<Bytes>()
.prefix_iter(self.wtxn, prefix_bytes.as_bytes())? .prefix_iter(self.wtxn, prefix_bytes.as_bytes())?
.remap_key_type::<StrBEU16Codec>(); .remap_key_type::<StrBEU16Codec>();
for result in iter { for result in iter {

View File

@ -2,21 +2,19 @@ use std::iter::{repeat_with, FromIterator};
use std::str; use std::str;
use fst::{SetBuilder, Streamer}; use fst::{SetBuilder, Streamer};
use heed::RwTxn;
use crate::{Index, Result, SmallString32}; use crate::{Index, Result, SmallString32};
pub struct WordsPrefixesFst<'t, 'u, 'i> { pub struct WordsPrefixesFst<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
threshold: u32, threshold: u32,
max_prefix_length: usize, max_prefix_length: usize,
} }
impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> { impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
pub fn new( pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> WordsPrefixesFst<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
) -> WordsPrefixesFst<'t, 'u, 'i> {
WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 } WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 }
} }