mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Move to the v0.20.0-alpha.9 of heed
This commit is contained in:
parent
58dac8af42
commit
d32eb11329
38
Cargo.lock
generated
38
Cargo.lock
generated
@ -1260,9 +1260,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "doxygen-rs"
|
name = "doxygen-rs"
|
||||||
version = "0.4.2"
|
version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
|
checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf",
|
"phf",
|
||||||
]
|
]
|
||||||
@ -1823,8 +1823,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed"
|
name = "heed"
|
||||||
version = "0.20.0-alpha.6"
|
version = "0.20.0-alpha.9"
|
||||||
source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.3.3",
|
"bitflags 2.3.3",
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
@ -1834,23 +1835,24 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
"lmdb-master-sys",
|
"lmdb-master-sys",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"page_size",
|
"page_size 0.6.0",
|
||||||
"synchronoise",
|
"synchronoise",
|
||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed-traits"
|
name = "heed-traits"
|
||||||
version = "0.20.0-alpha.6"
|
version = "0.20.0-alpha.9"
|
||||||
source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed-types"
|
name = "heed-types"
|
||||||
version = "0.20.0-alpha.6"
|
version = "0.20.0-alpha.9"
|
||||||
source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bincode",
|
"bincode",
|
||||||
"bytemuck",
|
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"heed-traits",
|
"heed-traits",
|
||||||
"serde",
|
"serde",
|
||||||
@ -2472,7 +2474,7 @@ dependencies = [
|
|||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"nelson",
|
"nelson",
|
||||||
"page_size",
|
"page_size 0.5.0",
|
||||||
"puffin",
|
"puffin",
|
||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
@ -2984,11 +2986,13 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "lmdb-master-sys"
|
name = "lmdb-master-sys"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"doxygen-rs",
|
"doxygen-rs",
|
||||||
"libc",
|
"libc",
|
||||||
|
"pkg-config",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -3494,6 +3498,16 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "page_size"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parking_lot"
|
name = "parking_lot"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
|
@ -14,7 +14,7 @@ use meilisearch_types::heed::BoxedError;
|
|||||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||||
use meilisearch_types::keys::KeyId;
|
use meilisearch_types::keys::KeyId;
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
||||||
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
|
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
|
||||||
use sha2::Sha256;
|
use sha2::Sha256;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
@ -32,7 +32,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct HeedAuthStore {
|
pub struct HeedAuthStore {
|
||||||
env: Arc<Env>,
|
env: Arc<Env>,
|
||||||
keys: Database<ByteSlice, SerdeJson<Key>>,
|
keys: Database<Bytes, SerdeJson<Key>>,
|
||||||
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
|
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
|
||||||
should_close_on_drop: bool,
|
should_close_on_drop: bool,
|
||||||
}
|
}
|
||||||
@ -278,7 +278,7 @@ impl HeedAuthStore {
|
|||||||
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
|
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
|
||||||
let mut iter = self
|
let mut iter = self
|
||||||
.action_keyid_index_expiration
|
.action_keyid_index_expiration
|
||||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
.remap_types::<Bytes, DecodeIgnore>()
|
||||||
.prefix_iter_mut(wtxn, key.as_bytes())?;
|
.prefix_iter_mut(wtxn, key.as_bytes())?;
|
||||||
while iter.next().transpose()?.is_some() {
|
while iter.next().transpose()?.is_some() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
|
@ -532,7 +532,10 @@ pub fn settings(
|
|||||||
|
|
||||||
let faceting = FacetingSettings {
|
let faceting = FacetingSettings {
|
||||||
max_values_per_facet: Setting::Set(
|
max_values_per_facet: Setting::Set(
|
||||||
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
index
|
||||||
|
.max_values_per_facet(rtxn)?
|
||||||
|
.map(|x| x as usize)
|
||||||
|
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||||
),
|
),
|
||||||
sort_facet_values_by: Setting::Set(
|
sort_facet_values_by: Setting::Set(
|
||||||
index
|
index
|
||||||
@ -545,7 +548,10 @@ pub fn settings(
|
|||||||
|
|
||||||
let pagination = PaginationSettings {
|
let pagination = PaginationSettings {
|
||||||
max_total_hits: Setting::Set(
|
max_total_hits: Setting::Set(
|
||||||
index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
index
|
||||||
|
.pagination_max_total_hits(rtxn)?
|
||||||
|
.map(|x| x as usize)
|
||||||
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ use std::io::ErrorKind;
|
|||||||
use actix_web::http::header::CONTENT_TYPE;
|
use actix_web::http::header::CONTENT_TYPE;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
|
||||||
use bstr::ByteSlice;
|
use bstr::ByteSlice as _;
|
||||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
|
@ -360,6 +360,7 @@ fn prepare_search<'t>(
|
|||||||
let max_total_hits = index
|
let max_total_hits = index
|
||||||
.pagination_max_total_hits(rtxn)
|
.pagination_max_total_hits(rtxn)
|
||||||
.map_err(milli::Error::from)?
|
.map_err(milli::Error::from)?
|
||||||
|
.map(|x| x as usize)
|
||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||||
|
|
||||||
search.exhaustive_number_hits(is_finite_pagination);
|
search.exhaustive_number_hits(is_finite_pagination);
|
||||||
@ -586,6 +587,7 @@ pub fn perform_search(
|
|||||||
let max_values_by_facet = index
|
let max_values_by_facet = index
|
||||||
.max_values_per_facet(&rtxn)
|
.max_values_per_facet(&rtxn)
|
||||||
.map_err(milli::Error::from)?
|
.map_err(milli::Error::from)?
|
||||||
|
.map(|x| x as usize)
|
||||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
||||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
facet_distribution.max_values_per_facet(max_values_by_facet);
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ geoutils = "0.5.1"
|
|||||||
grenad = { version = "0.4.5", default-features = false, features = [
|
grenad = { version = "0.4.5", default-features = false, features = [
|
||||||
"rayon", "tempfile"
|
"rayon", "tempfile"
|
||||||
] }
|
] }
|
||||||
heed = { git = "https://github.com/meilisearch/heed", branch = "main", default-features = false, features = [
|
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
|
||||||
"serde-json", "serde-bincode", "read-txn-no-tls"
|
"serde-json", "serde-bincode", "read-txn-no-tls"
|
||||||
] }
|
] }
|
||||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||||
|
@ -2,11 +2,11 @@ use std::borrow::Cow;
|
|||||||
|
|
||||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||||
|
|
||||||
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated
|
||||||
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
||||||
pub struct ByteSliceRefCodec;
|
pub struct BytesRefCodec;
|
||||||
|
|
||||||
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
|
impl<'a> BytesEncode<'a> for BytesRefCodec {
|
||||||
type EItem = &'a [u8];
|
type EItem = &'a [u8];
|
||||||
|
|
||||||
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||||
@ -14,7 +14,7 @@ impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
|
impl<'a> BytesDecode<'a> for BytesRefCodec {
|
||||||
type DItem = &'a [u8];
|
type DItem = &'a [u8];
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||||
|
@ -12,7 +12,7 @@ mod str_beu32_codec;
|
|||||||
mod str_ref;
|
mod str_ref;
|
||||||
mod str_str_u8_codec;
|
mod str_str_u8_codec;
|
||||||
|
|
||||||
pub use byte_slice_ref::ByteSliceRefCodec;
|
pub use byte_slice_ref::BytesRefCodec;
|
||||||
use heed::BoxedError;
|
use heed::BoxedError;
|
||||||
pub use str_ref::StrRefCodec;
|
pub use str_ref::StrRefCodec;
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ use crate::{
|
|||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||||
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
|
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
|
||||||
BEU32,
|
BEU32, BEU64,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The HNSW data-structure that we serialize, fill and search in.
|
/// The HNSW data-structure that we serialize, fill and search in.
|
||||||
@ -498,7 +498,7 @@ impl Index {
|
|||||||
let i = i as u32;
|
let i = i as u32;
|
||||||
let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec();
|
let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec();
|
||||||
key.extend_from_slice(&i.to_be_bytes());
|
key.extend_from_slice(&i.to_be_bytes());
|
||||||
self.main.remap_types::<ByteSlice, ByteSlice>().put(wtxn, &key, chunk)?;
|
self.main.remap_types::<Bytes, Bytes>().put(wtxn, &key, chunk)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -507,7 +507,7 @@ impl Index {
|
|||||||
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
let mut iter = self
|
let mut iter = self
|
||||||
.main
|
.main
|
||||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
.remap_types::<Bytes, DecodeIgnore>()
|
||||||
.prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?;
|
.prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?;
|
||||||
let mut deleted = false;
|
let mut deleted = false;
|
||||||
while iter.next().transpose()?.is_some() {
|
while iter.next().transpose()?.is_some() {
|
||||||
@ -522,7 +522,7 @@ impl Index {
|
|||||||
let mut slices = Vec::new();
|
let mut slices = Vec::new();
|
||||||
for result in self
|
for result in self
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, ByteSlice>()
|
.remap_types::<Str, Bytes>()
|
||||||
.prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
|
.prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
|
||||||
{
|
{
|
||||||
let (_, slice) = result?;
|
let (_, slice) = result?;
|
||||||
@ -994,7 +994,7 @@ impl Index {
|
|||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
fst: &fst::Set<A>,
|
fst: &fst::Set<A>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, ByteSlice>().put(
|
self.main.remap_types::<Str, Bytes>().put(
|
||||||
wtxn,
|
wtxn,
|
||||||
main_key::WORDS_FST_KEY,
|
main_key::WORDS_FST_KEY,
|
||||||
fst.as_fst().as_bytes(),
|
fst.as_fst().as_bytes(),
|
||||||
@ -1003,7 +1003,7 @@ impl Index {
|
|||||||
|
|
||||||
/// Returns the FST which is the words dictionary of the engine.
|
/// Returns the FST which is the words dictionary of the engine.
|
||||||
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||||
match self.main.remap_types::<Str, ByteSlice>().get(rtxn, main_key::WORDS_FST_KEY)? {
|
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_FST_KEY)? {
|
||||||
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
||||||
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
||||||
}
|
}
|
||||||
@ -1016,7 +1016,7 @@ impl Index {
|
|||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
fst: &fst::Set<A>,
|
fst: &fst::Set<A>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, ByteSlice>().put(
|
self.main.remap_types::<Str, Bytes>().put(
|
||||||
wtxn,
|
wtxn,
|
||||||
main_key::STOP_WORDS_KEY,
|
main_key::STOP_WORDS_KEY,
|
||||||
fst.as_fst().as_bytes(),
|
fst.as_fst().as_bytes(),
|
||||||
@ -1028,7 +1028,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
|
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
|
||||||
match self.main.remap_types::<Str, ByteSlice>().get(rtxn, main_key::STOP_WORDS_KEY)? {
|
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::STOP_WORDS_KEY)? {
|
||||||
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
|
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
@ -1186,7 +1186,7 @@ impl Index {
|
|||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
fst: &fst::Set<A>,
|
fst: &fst::Set<A>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, ByteSlice>().put(
|
self.main.remap_types::<Str, Bytes>().put(
|
||||||
wtxn,
|
wtxn,
|
||||||
main_key::WORDS_PREFIXES_FST_KEY,
|
main_key::WORDS_PREFIXES_FST_KEY,
|
||||||
fst.as_fst().as_bytes(),
|
fst.as_fst().as_bytes(),
|
||||||
@ -1195,11 +1195,7 @@ impl Index {
|
|||||||
|
|
||||||
/// Returns the FST which is the words prefixes dictionnary of the engine.
|
/// Returns the FST which is the words prefixes dictionnary of the engine.
|
||||||
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||||
match self
|
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
|
||||||
.main
|
|
||||||
.remap_types::<Str, ByteSlice>()
|
|
||||||
.get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)?
|
|
||||||
{
|
|
||||||
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
||||||
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
||||||
}
|
}
|
||||||
@ -1325,7 +1321,7 @@ impl Index {
|
|||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
match self.main.remap_types::<Str, OwnedType<u8>>().get(txn, main_key::AUTHORIZE_TYPOS)? {
|
match self.main.remap_types::<Str, U8>().get(txn, main_key::AUTHORIZE_TYPOS)? {
|
||||||
Some(0) => Ok(false),
|
Some(0) => Ok(false),
|
||||||
_ => Ok(true),
|
_ => Ok(true),
|
||||||
}
|
}
|
||||||
@ -1335,11 +1331,7 @@ impl Index {
|
|||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
self.main.remap_types::<Str, OwnedType<u8>>().put(
|
self.main.remap_types::<Str, U8>().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
|
||||||
txn,
|
|
||||||
main_key::AUTHORIZE_TYPOS,
|
|
||||||
&(flag as u8),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -1350,7 +1342,7 @@ impl Index {
|
|||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, OwnedType<u8>>()
|
.remap_types::<Str, U8>()
|
||||||
.get(txn, main_key::ONE_TYPO_WORD_LEN)?
|
.get(txn, main_key::ONE_TYPO_WORD_LEN)?
|
||||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
|
||||||
}
|
}
|
||||||
@ -1359,11 +1351,7 @@ impl Index {
|
|||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
self.main.remap_types::<Str, OwnedType<u8>>().put(
|
self.main.remap_types::<Str, U8>().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
|
||||||
txn,
|
|
||||||
main_key::ONE_TYPO_WORD_LEN,
|
|
||||||
&val,
|
|
||||||
)?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1373,7 +1361,7 @@ impl Index {
|
|||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, OwnedType<u8>>()
|
.remap_types::<Str, U8>()
|
||||||
.get(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
.get(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
||||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
|
||||||
}
|
}
|
||||||
@ -1382,17 +1370,13 @@ impl Index {
|
|||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
self.main.remap_types::<Str, OwnedType<u8>>().put(
|
self.main.remap_types::<Str, U8>().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
|
||||||
txn,
|
|
||||||
main_key::TWO_TYPOS_WORD_LEN,
|
|
||||||
&val,
|
|
||||||
)?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// List the words on which typo are not allowed
|
/// List the words on which typo are not allowed
|
||||||
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
|
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
|
||||||
match self.main.remap_types::<Str, ByteSlice>().get(txn, main_key::EXACT_WORDS)? {
|
match self.main.remap_types::<Str, Bytes>().get(txn, main_key::EXACT_WORDS)? {
|
||||||
Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
|
Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
@ -1403,7 +1387,7 @@ impl Index {
|
|||||||
txn: &mut RwTxn,
|
txn: &mut RwTxn,
|
||||||
words: &fst::Set<A>,
|
words: &fst::Set<A>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
self.main.remap_types::<Str, ByteSlice>().put(
|
self.main.remap_types::<Str, Bytes>().put(
|
||||||
txn,
|
txn,
|
||||||
main_key::EXACT_WORDS,
|
main_key::EXACT_WORDS,
|
||||||
words.as_fst().as_bytes(),
|
words.as_fst().as_bytes(),
|
||||||
@ -1442,16 +1426,12 @@ impl Index {
|
|||||||
self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
|
self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
|
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
|
||||||
self.main.remap_types::<Str, OwnedType<usize>>().get(txn, main_key::MAX_VALUES_PER_FACET)
|
self.main.remap_types::<Str, BEU64>().get(txn, main_key::MAX_VALUES_PER_FACET)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> {
|
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: u64) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, OwnedType<usize>>().put(
|
self.main.remap_types::<Str, BEU64>().put(txn, main_key::MAX_VALUES_PER_FACET, &val)
|
||||||
txn,
|
|
||||||
main_key::MAX_VALUES_PER_FACET,
|
|
||||||
&val,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
@ -1481,22 +1461,16 @@ impl Index {
|
|||||||
self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
|
self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
|
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
|
||||||
self.main
|
self.main.remap_types::<Str, BEU64>().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||||
.remap_types::<Str, OwnedType<usize>>()
|
|
||||||
.get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn put_pagination_max_total_hits(
|
pub(crate) fn put_pagination_max_total_hits(
|
||||||
&self,
|
&self,
|
||||||
txn: &mut RwTxn,
|
txn: &mut RwTxn,
|
||||||
val: usize,
|
val: u64,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, OwnedType<usize>>().put(
|
self.main.remap_types::<Str, BEU64>().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
|
||||||
txn,
|
|
||||||
main_key::PAGINATION_MAX_TOTAL_HITS,
|
|
||||||
&val,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
|
|||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
use std::{fmt, mem};
|
use std::{fmt, mem};
|
||||||
|
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -13,7 +13,7 @@ use crate::facet::FacetType;
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
|
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||||
use crate::search::facet::facet_distribution_iter::{
|
use crate::search::facet::facet_distribution_iter::{
|
||||||
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
|
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
|
||||||
};
|
};
|
||||||
@ -105,7 +105,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
let iter = db
|
let iter = db
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(self.rtxn, &key_buffer)?
|
.prefix_iter(self.rtxn, &key_buffer)?
|
||||||
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||||
|
|
||||||
@ -129,7 +129,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
let iter = db
|
let iter = db
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(self.rtxn, &key_buffer)?
|
.prefix_iter(self.rtxn, &key_buffer)?
|
||||||
.remap_key_type::<FieldDocIdFacetStringCodec>();
|
.remap_key_type::<FieldDocIdFacetStringCodec>();
|
||||||
|
|
||||||
@ -172,9 +172,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
|
|
||||||
search_function(
|
search_function(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.index
|
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||||
.facet_id_f64_docids
|
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
|facet_key, nbr_docids, _| {
|
|facet_key, nbr_docids, _| {
|
||||||
@ -203,9 +201,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
|
|
||||||
search_function(
|
search_function(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.index
|
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||||
.facet_id_string_docids
|
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
|facet_key, nbr_docids, any_docid| {
|
|facet_key, nbr_docids, any_docid| {
|
||||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
|
|
||||||
/// Call the given closure on the facet distribution of the candidate documents.
|
/// Call the given closure on the facet distribution of the candidate documents.
|
||||||
@ -23,7 +23,7 @@ use crate::DocumentId;
|
|||||||
/// keep iterating over the different facet values or stop.
|
/// keep iterating over the different facet values or stop.
|
||||||
pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
|
pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
callback: CB,
|
callback: CB,
|
||||||
@ -34,11 +34,11 @@ where
|
|||||||
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
|
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
|
||||||
let highest_level = get_highest_level(
|
let highest_level = get_highest_level(
|
||||||
rtxn,
|
rtxn,
|
||||||
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||||
field_id,
|
field_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
@ -48,7 +48,7 @@ where
|
|||||||
|
|
||||||
pub fn count_iterate_over_facet_distribution<'t, CB>(
|
pub fn count_iterate_over_facet_distribution<'t, CB>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
mut callback: CB,
|
mut callback: CB,
|
||||||
@ -77,11 +77,11 @@ where
|
|||||||
let mut heap = BinaryHeap::new();
|
let mut heap = BinaryHeap::new();
|
||||||
let highest_level = get_highest_level(
|
let highest_level = get_highest_level(
|
||||||
rtxn,
|
rtxn,
|
||||||
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||||
field_id,
|
field_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||||
// We first fill the heap with values from the highest level
|
// We first fill the heap with values from the highest level
|
||||||
let starting_key =
|
let starting_key =
|
||||||
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
@ -146,7 +146,7 @@ where
|
|||||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||||
{
|
{
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
callback: CB,
|
callback: CB,
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// Find all the document ids for which the given field contains a value contained within
|
/// Find all the document ids for which the given field contains a value contained within
|
||||||
@ -46,16 +46,13 @@ where
|
|||||||
}
|
}
|
||||||
Bound::Unbounded => Bound::Unbounded,
|
Bound::Unbounded => Bound::Unbounded,
|
||||||
};
|
};
|
||||||
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
|
|
||||||
if let Some(starting_left_bound) =
|
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||||
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
|
let rightmost_bound =
|
||||||
{
|
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||||
let rightmost_bound = Bound::Included(
|
|
||||||
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
|
|
||||||
); // will not fail because get_first_facet_value succeeded
|
|
||||||
let group_size = usize::MAX;
|
let group_size = usize::MAX;
|
||||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -67,7 +64,7 @@ where
|
|||||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
left: Bound<&'b [u8]>,
|
left: Bound<&'b [u8]>,
|
||||||
right: Bound<&'b [u8]>,
|
right: Bound<&'b [u8]>,
|
||||||
|
@ -5,7 +5,7 @@ use super::{get_first_facet_value, get_highest_level};
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
|
|
||||||
/// Return an iterator which iterates over the given candidate documents in
|
/// Return an iterator which iterates over the given candidate documents in
|
||||||
/// ascending order of their facet value for the given field id.
|
/// ascending order of their facet value for the given field id.
|
||||||
@ -31,12 +31,12 @@ use crate::heed_codec::ByteSliceRefCodec;
|
|||||||
/// Note that once a document id is returned by the iterator, it is never returned again.
|
/// Note that once a document id is returned by the iterator, it is never returned again.
|
||||||
pub fn ascending_facet_sort<'t>(
|
pub fn ascending_facet_sort<'t>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||||
|
|
||||||
@ -53,14 +53,12 @@ pub fn ascending_facet_sort<'t>(
|
|||||||
|
|
||||||
struct AscendingFacetSort<'t, 'e> {
|
struct AscendingFacetSort<'t, 'e> {
|
||||||
rtxn: &'t heed::RoTxn<'e>,
|
rtxn: &'t heed::RoTxn<'e>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
stack: Vec<(
|
stack: Vec<(
|
||||||
RoaringBitmap,
|
RoaringBitmap,
|
||||||
std::iter::Take<
|
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>>,
|
||||||
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
|
||||||
>,
|
|
||||||
)>,
|
)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,21 +7,21 @@ use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
|
|
||||||
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
|
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
|
||||||
///
|
///
|
||||||
/// This function does the same thing, but in the opposite order.
|
/// This function does the same thing, but in the opposite order.
|
||||||
pub fn descending_facet_sort<'t>(
|
pub fn descending_facet_sort<'t>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
|
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||||
Ok(itertools::Either::Left(DescendingFacetSort {
|
Ok(itertools::Either::Left(DescendingFacetSort {
|
||||||
@ -37,13 +37,13 @@ pub fn descending_facet_sort<'t>(
|
|||||||
|
|
||||||
struct DescendingFacetSort<'t> {
|
struct DescendingFacetSort<'t> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
stack: Vec<(
|
stack: Vec<(
|
||||||
RoaringBitmap,
|
RoaringBitmap,
|
||||||
std::iter::Take<
|
std::iter::Take<
|
||||||
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
heed::RoRevRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
>,
|
>,
|
||||||
Bound<&'t [u8]>,
|
Bound<&'t [u8]>,
|
||||||
)>,
|
)>,
|
||||||
@ -100,7 +100,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
*right_bound = Bound::Excluded(left_bound);
|
*right_bound = Bound::Excluded(left_bound);
|
||||||
let iter = match self
|
let iter = match self
|
||||||
.db
|
.db
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||||
.rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
|
.rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
|
||||||
{
|
{
|
||||||
Ok(iter) => iter,
|
Ok(iter) => iter,
|
||||||
@ -123,7 +123,7 @@ mod tests {
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||||
use crate::search::facet::tests::{
|
use crate::search::facet::tests::{
|
||||||
@ -144,7 +144,7 @@ mod tests {
|
|||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let (docids, _) = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
@ -167,7 +167,7 @@ mod tests {
|
|||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let (docids, _) = el.unwrap();
|
let (docids, _) = el.unwrap();
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
pub use facet_sort_ascending::ascending_facet_sort;
|
pub use facet_sort_ascending::ascending_facet_sort;
|
||||||
pub use facet_sort_descending::descending_facet_sort;
|
pub use facet_sort_descending::descending_facet_sort;
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
use heed::types::{Bytes, DecodeIgnore};
|
||||||
use heed::{BytesDecode, RoTxn};
|
use heed::{BytesDecode, RoTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::filter::{BadGeoError, Filter};
|
pub use self::filter::{BadGeoError, Filter};
|
||||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
mod facet_distribution_iter;
|
mod facet_distribution_iter;
|
||||||
@ -34,7 +34,7 @@ pub fn facet_min_value<'t>(
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Option<f64>> {
|
) -> Result<Option<f64>> {
|
||||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||||
facet_extreme_value(it)
|
facet_extreme_value(it)
|
||||||
}
|
}
|
||||||
@ -45,7 +45,7 @@ pub fn facet_max_value<'t>(
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Option<f64>> {
|
) -> Result<Option<f64>> {
|
||||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||||
facet_extreme_value(it)
|
facet_extreme_value(it)
|
||||||
}
|
}
|
||||||
@ -53,7 +53,7 @@ pub fn facet_max_value<'t>(
|
|||||||
/// Get the first facet value in the facet database
|
/// Get the first facet value in the facet database
|
||||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -63,7 +63,7 @@ where
|
|||||||
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||||
level0prefix.push(0);
|
level0prefix.push(0);
|
||||||
let mut level0_iter_forward =
|
let mut level0_iter_forward =
|
||||||
db.remap_types::<ByteSlice, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
|
db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
|
||||||
if let Some(first) = level0_iter_forward.next() {
|
if let Some(first) = level0_iter_forward.next() {
|
||||||
let (first_key, _) = first?;
|
let (first_key, _) = first?;
|
||||||
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
|
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
|
||||||
@ -77,7 +77,7 @@ where
|
|||||||
/// Get the last facet value in the facet database
|
/// Get the last facet value in the facet database
|
||||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -86,9 +86,8 @@ where
|
|||||||
let mut level0prefix = vec![];
|
let mut level0prefix = vec![];
|
||||||
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||||
level0prefix.push(0);
|
level0prefix.push(0);
|
||||||
let mut level0_iter_backward = db
|
let mut level0_iter_backward =
|
||||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
|
||||||
.rev_prefix_iter(txn, level0prefix.as_slice())?;
|
|
||||||
if let Some(last) = level0_iter_backward.next() {
|
if let Some(last) = level0_iter_backward.next() {
|
||||||
let (last_key, _) = last?;
|
let (last_key, _) = last?;
|
||||||
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
|
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
|
||||||
@ -102,17 +101,17 @@ where
|
|||||||
/// Get the height of the highest level in the facet database
|
/// Get the height of the highest level in the facet database
|
||||||
pub(crate) fn get_highest_level<'t>(
|
pub(crate) fn get_highest_level<'t>(
|
||||||
txn: &'t RoTxn<'t>,
|
txn: &'t RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<u8> {
|
) -> heed::Result<u8> {
|
||||||
let field_id_prefix = &field_id.to_be_bytes();
|
let field_id_prefix = &field_id.to_be_bytes();
|
||||||
Ok(db
|
Ok(db
|
||||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
.remap_types::<Bytes, DecodeIgnore>()
|
||||||
.rev_prefix_iter(txn, field_id_prefix)?
|
.rev_prefix_iter(txn, field_id_prefix)?
|
||||||
.next()
|
.next()
|
||||||
.map(|el| {
|
.map(|el| {
|
||||||
let (key, _) = el.unwrap();
|
let (key, _) = el.unwrap();
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
|
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
|
||||||
key.level
|
key.level
|
||||||
})
|
})
|
||||||
.unwrap_or(0))
|
.unwrap_or(0))
|
||||||
|
@ -3,7 +3,7 @@ use std::collections::hash_map::Entry;
|
|||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
use heed::{BytesEncode, Database, RoTxn};
|
use heed::{BytesEncode, Database, RoTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -50,7 +50,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
cache_key: K1,
|
cache_key: K1,
|
||||||
db_key: &'v KC::EItem,
|
db_key: &'v KC::EItem,
|
||||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||||
db: Database<KC, ByteSlice>,
|
db: Database<KC, Bytes>,
|
||||||
) -> Result<Option<DC::DItem>>
|
) -> Result<Option<DC::DItem>>
|
||||||
where
|
where
|
||||||
K1: Copy + Eq + Hash,
|
K1: Copy + Eq + Hash,
|
||||||
@ -80,7 +80,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
|||||||
cache_key: K1,
|
cache_key: K1,
|
||||||
db_keys: &'v [KC::EItem],
|
db_keys: &'v [KC::EItem],
|
||||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||||
db: Database<KC, ByteSlice>,
|
db: Database<KC, Bytes>,
|
||||||
merger: MergeFn,
|
merger: MergeFn,
|
||||||
) -> Result<Option<DC::DItem>>
|
) -> Result<Option<DC::DItem>>
|
||||||
where
|
where
|
||||||
@ -168,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word,
|
word,
|
||||||
&keys[..],
|
&keys[..],
|
||||||
&mut self.db_cache.word_docids,
|
&mut self.db_cache.word_docids,
|
||||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -177,7 +177,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word,
|
word,
|
||||||
self.word_interner.get(word).as_str(),
|
self.word_interner.get(word).as_str(),
|
||||||
&mut self.db_cache.word_docids,
|
&mut self.db_cache.word_docids,
|
||||||
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_docids.remap_data_type::<Bytes>(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -191,7 +191,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word,
|
word,
|
||||||
self.word_interner.get(word).as_str(),
|
self.word_interner.get(word).as_str(),
|
||||||
&mut self.db_cache.exact_word_docids,
|
&mut self.db_cache.exact_word_docids,
|
||||||
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
|
self.index.exact_word_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
prefix,
|
prefix,
|
||||||
&keys[..],
|
&keys[..],
|
||||||
&mut self.db_cache.word_prefix_docids,
|
&mut self.db_cache.word_prefix_docids,
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -239,7 +239,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
prefix,
|
prefix,
|
||||||
self.word_interner.get(prefix).as_str(),
|
self.word_interner.get(prefix).as_str(),
|
||||||
&mut self.db_cache.word_prefix_docids,
|
&mut self.db_cache.word_prefix_docids,
|
||||||
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -253,7 +253,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
prefix,
|
prefix,
|
||||||
self.word_interner.get(prefix).as_str(),
|
self.word_interner.get(prefix).as_str(),
|
||||||
&mut self.db_cache.exact_word_prefix_docids,
|
&mut self.db_cache.exact_word_prefix_docids,
|
||||||
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
|
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -272,7 +272,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
self.word_interner.get(word2).as_str(),
|
self.word_interner.get(word2).as_str(),
|
||||||
),
|
),
|
||||||
&mut self.db_cache.word_pair_proximity_docids,
|
&mut self.db_cache.word_pair_proximity_docids,
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -291,7 +291,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
self.word_interner.get(word2).as_str(),
|
self.word_interner.get(word2).as_str(),
|
||||||
),
|
),
|
||||||
&mut self.db_cache.word_pair_proximity_docids,
|
&mut self.db_cache.word_pair_proximity_docids,
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -320,7 +320,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let remap_key_type = self
|
let remap_key_type = self
|
||||||
.index
|
.index
|
||||||
.word_pair_proximity_docids
|
.word_pair_proximity_docids
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(self.txn, &key)?;
|
.prefix_iter(self.txn, &key)?;
|
||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
let (_, docids) = result?;
|
let (_, docids) = result?;
|
||||||
@ -359,7 +359,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
(word, fid),
|
(word, fid),
|
||||||
&(self.word_interner.get(word).as_str(), fid),
|
&(self.word_interner.get(word).as_str(), fid),
|
||||||
&mut self.db_cache.word_fid_docids,
|
&mut self.db_cache.word_fid_docids,
|
||||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -378,7 +378,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
(word_prefix, fid),
|
(word_prefix, fid),
|
||||||
&(self.word_interner.get(word_prefix).as_str(), fid),
|
&(self.word_interner.get(word_prefix).as_str(), fid),
|
||||||
&mut self.db_cache.word_prefix_fid_docids,
|
&mut self.db_cache.word_prefix_fid_docids,
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -392,7 +392,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let remap_key_type = self
|
let remap_key_type = self
|
||||||
.index
|
.index
|
||||||
.word_fid_docids
|
.word_fid_docids
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
.remap_types::<Bytes, Bytes>()
|
||||||
.prefix_iter(self.txn, &key)?
|
.prefix_iter(self.txn, &key)?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
@ -418,7 +418,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let remap_key_type = self
|
let remap_key_type = self
|
||||||
.index
|
.index
|
||||||
.word_prefix_fid_docids
|
.word_prefix_fid_docids
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
.remap_types::<Bytes, Bytes>()
|
||||||
.prefix_iter(self.txn, &key)?
|
.prefix_iter(self.txn, &key)?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
@ -446,7 +446,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
(word, position),
|
(word, position),
|
||||||
&(self.word_interner.get(word).as_str(), position),
|
&(self.word_interner.get(word).as_str(), position),
|
||||||
&mut self.db_cache.word_position_docids,
|
&mut self.db_cache.word_position_docids,
|
||||||
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_position_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -460,7 +460,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
(word_prefix, position),
|
(word_prefix, position),
|
||||||
&(self.word_interner.get(word_prefix).as_str(), position),
|
&(self.word_interner.get(word_prefix).as_str(), position),
|
||||||
&mut self.db_cache.word_prefix_position_docids,
|
&mut self.db_cache.word_prefix_position_docids,
|
||||||
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
|
self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -474,7 +474,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let remap_key_type = self
|
let remap_key_type = self
|
||||||
.index
|
.index
|
||||||
.word_position_docids
|
.word_position_docids
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
.remap_types::<Bytes, Bytes>()
|
||||||
.prefix_iter(self.txn, &key)?
|
.prefix_iter(self.txn, &key)?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
@ -505,7 +505,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let remap_key_type = self
|
let remap_key_type = self
|
||||||
.index
|
.index
|
||||||
.word_prefix_position_docids
|
.word_prefix_position_docids
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
.remap_types::<Bytes, Bytes>()
|
||||||
.prefix_iter(self.txn, &key)?
|
.prefix_iter(self.txn, &key)?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use heed::types::{ByteSlice, Str, Unit};
|
use heed::types::{Bytes, Str, Unit};
|
||||||
use heed::{Database, RoPrefix, RoTxn};
|
use heed::{Database, RoPrefix, RoTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -8,7 +8,7 @@ const DOCID_SIZE: usize = 4;
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::{Index, Result, SearchContext};
|
use crate::{Index, Result, SearchContext};
|
||||||
|
|
||||||
pub struct DistinctOutput {
|
pub struct DistinctOutput {
|
||||||
@ -71,7 +71,7 @@ pub fn distinct_single_docid(
|
|||||||
|
|
||||||
/// Return all the docids containing the given value in the given field
|
/// Return all the docids containing the given value in the given field
|
||||||
fn facet_value_docids(
|
fn facet_value_docids(
|
||||||
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
facet_value: &[u8],
|
facet_value: &[u8],
|
||||||
@ -87,12 +87,12 @@ fn facet_number_values<'a>(
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'a RoTxn,
|
txn: &'a RoTxn,
|
||||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
|
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
|
||||||
let key = facet_values_prefix_key(field_id, docid);
|
let key = facet_values_prefix_key(field_id, docid);
|
||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
.field_id_docid_facet_f64s
|
.field_id_docid_facet_f64s
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(txn, &key)?
|
.prefix_iter(txn, &key)?
|
||||||
.remap_key_type();
|
.remap_key_type();
|
||||||
|
|
||||||
@ -105,12 +105,12 @@ pub fn facet_string_values<'a>(
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'a RoTxn,
|
txn: &'a RoTxn,
|
||||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
|
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
|
||||||
let key = facet_values_prefix_key(field_id, docid);
|
let key = facet_values_prefix_key(field_id, docid);
|
||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
.field_id_docid_facet_strings
|
.field_id_docid_facet_strings
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(txn, &key)?
|
.prefix_iter(txn, &key)?
|
||||||
.remap_types();
|
.remap_types();
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
use heed::types::{ByteSlice, Unit};
|
use heed::types::{Bytes, Unit};
|
||||||
use heed::{RoPrefix, RoTxn};
|
use heed::{RoPrefix, RoTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use rstar::RTree;
|
use rstar::RTree;
|
||||||
@ -34,7 +34,7 @@ fn facet_number_values<'a>(
|
|||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
.field_id_docid_facet_f64s
|
.field_id_docid_facet_f64s
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(txn, &key)?
|
.prefix_iter(txn, &key)?
|
||||||
.remap_key_type();
|
.remap_key_type();
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
|
|||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
||||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||||
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||||
use crate::score_details::{self, ScoreDetails};
|
use crate::score_details::{self, ScoreDetails};
|
||||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
@ -100,11 +100,11 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
|||||||
let number_db = ctx
|
let number_db = ctx
|
||||||
.index
|
.index
|
||||||
.facet_id_f64_docids
|
.facet_id_f64_docids
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
let string_db = ctx
|
let string_db = ctx
|
||||||
.index
|
.index
|
||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
|
|
||||||
let (number_iter, string_iter) = if self.is_ascending {
|
let (number_iter, string_iter) = if self.is_ascending {
|
||||||
let number_iter = ascending_facet_sort(
|
let number_iter = ascending_facet_sort(
|
||||||
|
@ -2,7 +2,7 @@ use std::fs::File;
|
|||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
|
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -11,7 +11,7 @@ use crate::facet::FacetType;
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||||
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
|
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
|
||||||
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
|
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
|
||||||
@ -70,11 +70,11 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;
|
let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;
|
||||||
|
|
||||||
let db = match facet_type {
|
let db = match facet_type {
|
||||||
FacetType::String => index
|
FacetType::String => {
|
||||||
.facet_id_string_docids
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
}
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
|
|
||||||
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
|
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
|
||||||
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
||||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
pub delta_data: Option<grenad::Reader<R>>,
|
pub delta_data: Option<grenad::Reader<R>>,
|
||||||
pub group_size: u8,
|
pub group_size: u8,
|
||||||
pub min_level_size: u8,
|
pub min_level_size: u8,
|
||||||
@ -106,7 +106,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
for level_reader in level_readers {
|
for level_reader in level_readers {
|
||||||
let mut cursor = level_reader.into_cursor()?;
|
let mut cursor = level_reader.into_cursor()?;
|
||||||
while let Some((k, v)) = cursor.move_on_next()? {
|
while let Some((k, v)) = cursor.move_on_next()? {
|
||||||
self.db.remap_types::<ByteSlice, ByteSlice>().put(wtxn, k, v)?;
|
self.db.remap_types::<Bytes, Bytes>().put(wtxn, k, v)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -128,7 +128,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
};
|
};
|
||||||
if self.db.is_empty(wtxn)? {
|
if self.db.is_empty(wtxn)? {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let mut database = self.db.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
|
let mut database = self.db.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
|
||||||
let mut cursor = delta_data.into_cursor()?;
|
let mut cursor = delta_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
if !valid_lmdb_key(key) {
|
if !valid_lmdb_key(key) {
|
||||||
@ -147,16 +147,12 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
// then we extend the buffer with the docids bitmap
|
// then we extend the buffer with the docids bitmap
|
||||||
buffer.extend_from_slice(value);
|
buffer.extend_from_slice(value);
|
||||||
unsafe {
|
unsafe {
|
||||||
database.put_current_with_options::<ByteSlice>(
|
database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, &buffer)?
|
||||||
PutFlags::APPEND,
|
|
||||||
key,
|
|
||||||
&buffer,
|
|
||||||
)?
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let database = self.db.remap_types::<ByteSlice, ByteSlice>();
|
let database = self.db.remap_types::<Bytes, Bytes>();
|
||||||
|
|
||||||
let mut cursor = delta_data.into_cursor()?;
|
let mut cursor = delta_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
@ -225,9 +221,9 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
|
|
||||||
let level_0_iter = self
|
let level_0_iter = self
|
||||||
.db
|
.db
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
.remap_types::<Bytes, Bytes>()
|
||||||
.prefix_iter(rtxn, level_0_prefix.as_slice())?
|
.prefix_iter(rtxn, level_0_prefix.as_slice())?
|
||||||
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
|
.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>();
|
||||||
|
|
||||||
let mut left_bound: &[u8] = &[];
|
let mut left_bound: &[u8] = &[];
|
||||||
let mut first_iteration_for_new_group = true;
|
let mut first_iteration_for_new_group = true;
|
||||||
@ -313,7 +309,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetGroupKey { field_id, level, left_bound };
|
let key = FacetGroupKey { field_id, level, left_bound };
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
|
||||||
.map_err(Error::Encoding)?;
|
.map_err(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value =
|
let value =
|
||||||
@ -342,7 +338,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetGroupKey { field_id, level, left_bound };
|
let key = FacetGroupKey { field_id, level, left_bound };
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
|
||||||
.map_err(Error::Encoding)?;
|
.map_err(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
|
let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
use heed::types::{Bytes, DecodeIgnore};
|
||||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -10,7 +10,7 @@ use crate::facet::FacetType;
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::index_documents::valid_lmdb_key;
|
use crate::update::index_documents::valid_lmdb_key;
|
||||||
@ -48,10 +48,10 @@ impl FacetsUpdateIncremental {
|
|||||||
db: match facet_type {
|
db: match facet_type {
|
||||||
FacetType::String => index
|
FacetType::String => index
|
||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||||
FacetType::Number => index
|
FacetType::Number => index
|
||||||
.facet_id_f64_docids
|
.facet_id_f64_docids
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||||
},
|
},
|
||||||
group_size,
|
group_size,
|
||||||
max_group_size,
|
max_group_size,
|
||||||
@ -67,7 +67,7 @@ impl FacetsUpdateIncremental {
|
|||||||
if !valid_lmdb_key(key) {
|
if !valid_lmdb_key(key) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
|
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
|
||||||
.map_err(heed::Error::Encoding)?;
|
.map_err(heed::Error::Encoding)?;
|
||||||
let value = KvReader::new(value);
|
let value = KvReader::new(value);
|
||||||
|
|
||||||
@ -98,7 +98,7 @@ impl FacetsUpdateIncremental {
|
|||||||
|
|
||||||
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
|
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
|
||||||
pub struct FacetsUpdateIncrementalInner {
|
pub struct FacetsUpdateIncrementalInner {
|
||||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
pub group_size: u8,
|
pub group_size: u8,
|
||||||
pub min_level_size: u8,
|
pub min_level_size: u8,
|
||||||
pub max_group_size: u8,
|
pub max_group_size: u8,
|
||||||
@ -136,11 +136,11 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
|
|
||||||
let mut iter = self
|
let mut iter = self
|
||||||
.db
|
.db
|
||||||
.remap_types::<ByteSlice, FacetGroupValueCodec>()
|
.remap_types::<Bytes, FacetGroupValueCodec>()
|
||||||
.prefix_iter(txn, prefix.as_slice())?;
|
.prefix_iter(txn, prefix.as_slice())?;
|
||||||
let (key_bytes, value) = iter.next().unwrap()?;
|
let (key_bytes, value) = iter.next().unwrap()?;
|
||||||
Ok((
|
Ok((
|
||||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
|
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
||||||
.map_err(Error::Encoding)?
|
.map_err(Error::Encoding)?
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
value,
|
value,
|
||||||
@ -177,7 +177,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
level0_prefix.push(0);
|
level0_prefix.push(0);
|
||||||
|
|
||||||
let mut iter =
|
let mut iter =
|
||||||
self.db.remap_types::<ByteSlice, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
|
self.db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
|
||||||
|
|
||||||
if iter.next().is_none() {
|
if iter.next().is_none() {
|
||||||
drop(iter);
|
drop(iter);
|
||||||
@ -379,11 +379,8 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
|
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||||
highest_level_prefix.push(highest_level);
|
highest_level_prefix.push(highest_level);
|
||||||
|
|
||||||
let size_highest_level = self
|
let size_highest_level =
|
||||||
.db
|
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
|
||||||
.prefix_iter(txn, &highest_level_prefix)?
|
|
||||||
.count();
|
|
||||||
|
|
||||||
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
|
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
@ -391,7 +388,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
|
|
||||||
let mut groups_iter = self
|
let mut groups_iter = self
|
||||||
.db
|
.db
|
||||||
.remap_types::<ByteSlice, FacetGroupValueCodec>()
|
.remap_types::<Bytes, FacetGroupValueCodec>()
|
||||||
.prefix_iter(txn, &highest_level_prefix)?;
|
.prefix_iter(txn, &highest_level_prefix)?;
|
||||||
|
|
||||||
let nbr_new_groups = size_highest_level / self.group_size as usize;
|
let nbr_new_groups = size_highest_level / self.group_size as usize;
|
||||||
@ -403,7 +400,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
let mut values = RoaringBitmap::new();
|
let mut values = RoaringBitmap::new();
|
||||||
for _ in 0..group_size {
|
for _ in 0..group_size {
|
||||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||||
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
|
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
||||||
.map_err(Error::Encoding)?;
|
.map_err(Error::Encoding)?;
|
||||||
|
|
||||||
if first_key.is_none() {
|
if first_key.is_none() {
|
||||||
@ -426,7 +423,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
let mut values = RoaringBitmap::new();
|
let mut values = RoaringBitmap::new();
|
||||||
for _ in 0..nbr_leftover_elements {
|
for _ in 0..nbr_leftover_elements {
|
||||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||||
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
|
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
|
||||||
.map_err(Error::Encoding)?;
|
.map_err(Error::Encoding)?;
|
||||||
|
|
||||||
if first_key.is_none() {
|
if first_key.is_none() {
|
||||||
@ -594,7 +591,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
if highest_level == 0
|
if highest_level == 0
|
||||||
|| self
|
|| self
|
||||||
.db
|
.db
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
.remap_types::<Bytes, Bytes>()
|
||||||
.prefix_iter(txn, &highest_level_prefix)?
|
.prefix_iter(txn, &highest_level_prefix)?
|
||||||
.count()
|
.count()
|
||||||
>= self.min_level_size as usize
|
>= self.min_level_size as usize
|
||||||
@ -602,14 +599,12 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
let mut to_delete = vec![];
|
let mut to_delete = vec![];
|
||||||
let mut iter = self
|
let mut iter =
|
||||||
.db
|
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?;
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
|
||||||
.prefix_iter(txn, &highest_level_prefix)?;
|
|
||||||
for el in iter.by_ref() {
|
for el in iter.by_ref() {
|
||||||
let (k, _) = el?;
|
let (k, _) = el?;
|
||||||
to_delete.push(
|
to_delete.push(
|
||||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
|
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
|
||||||
.map_err(Error::Encoding)?
|
.map_err(Error::Encoding)?
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
);
|
);
|
||||||
@ -1118,7 +1113,7 @@ mod fuzz {
|
|||||||
|
|
||||||
#[no_coverage]
|
#[no_coverage]
|
||||||
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
|
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
|
||||||
let index = FacetIndex::<ByteSliceRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
|
let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
|
|
||||||
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
|
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
|
||||||
@ -1164,16 +1159,13 @@ mod fuzz {
|
|||||||
let level0iter = index
|
let level0iter = index
|
||||||
.content
|
.content
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
|
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
|
||||||
&mut txn,
|
|
||||||
&field_id.to_be_bytes(),
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||||
let (group_key, group_values) = group.unwrap();
|
let (group_key, group_values) = group.unwrap();
|
||||||
let group_key =
|
let group_key =
|
||||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(group_key).unwrap();
|
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
||||||
assert_eq!(key, &group_key.left_bound);
|
assert_eq!(key, &group_key.left_bound);
|
||||||
assert_eq!(values, &group_values.bitmap);
|
assert_eq!(values, &group_values.bitmap);
|
||||||
}
|
}
|
||||||
@ -1183,13 +1175,13 @@ mod fuzz {
|
|||||||
let level0iter = index
|
let level0iter = index
|
||||||
.content
|
.content
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
|
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||||
let (group_key, group_values) = group.unwrap();
|
let (group_key, group_values) = group.unwrap();
|
||||||
let group_key =
|
let group_key =
|
||||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(group_key).unwrap();
|
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
||||||
assert_eq!(key, &group_key.left_bound);
|
assert_eq!(key, &group_key.left_bound);
|
||||||
assert_eq!(values, &group_values.bitmap);
|
assert_eq!(values, &group_values.bitmap);
|
||||||
}
|
}
|
||||||
|
@ -83,7 +83,7 @@ use std::iter::FromIterator;
|
|||||||
|
|
||||||
use charabia::normalizer::{Normalize, NormalizerOption};
|
use charabia::normalizer::{Normalize, NormalizerOption};
|
||||||
use grenad::{CompressionType, SortAlgorithm};
|
use grenad::{CompressionType, SortAlgorithm};
|
||||||
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
@ -92,7 +92,7 @@ use self::incremental::FacetsUpdateIncremental;
|
|||||||
use super::FacetsUpdateBulk;
|
use super::FacetsUpdateBulk;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::update::index_documents::create_sorter;
|
use crate::update::index_documents::create_sorter;
|
||||||
use crate::update::merge_btreeset_string;
|
use crate::update::merge_btreeset_string;
|
||||||
use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
|
use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||||
@ -106,7 +106,7 @@ pub mod incremental;
|
|||||||
/// a bulk update method or an incremental update method.
|
/// a bulk update method or an incremental update method.
|
||||||
pub struct FacetsUpdate<'i> {
|
pub struct FacetsUpdate<'i> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
delta_data: grenad::Reader<BufReader<File>>,
|
delta_data: grenad::Reader<BufReader<File>>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
@ -120,11 +120,11 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
delta_data: grenad::Reader<BufReader<File>>,
|
delta_data: grenad::Reader<BufReader<File>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let database = match facet_type {
|
let database = match facet_type {
|
||||||
FacetType::String => index
|
FacetType::String => {
|
||||||
.facet_id_string_docids
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
}
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Self {
|
Self {
|
||||||
@ -217,10 +217,11 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
// as the grenad sorter already merged them for us.
|
// as the grenad sorter already merged them for us.
|
||||||
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
||||||
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
|
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
|
||||||
self.index
|
self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
|
||||||
.facet_id_normalized_string_strings
|
wtxn,
|
||||||
.remap_types::<ByteSlice, ByteSlice>()
|
key_bytes,
|
||||||
.put(wtxn, key_bytes, btreeset_bytes)?;
|
btreeset_bytes,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We compute one FST by string facet
|
// We compute one FST by string facet
|
||||||
@ -267,7 +268,7 @@ pub(crate) mod test_helpers {
|
|||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
|
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -275,7 +276,7 @@ pub(crate) mod test_helpers {
|
|||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::BytesRefCodec;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||||
@ -306,7 +307,7 @@ pub(crate) mod test_helpers {
|
|||||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||||
{
|
{
|
||||||
pub env: Env,
|
pub env: Env,
|
||||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
pub group_size: Cell<u8>,
|
pub group_size: Cell<u8>,
|
||||||
pub min_level_size: Cell<u8>,
|
pub min_level_size: Cell<u8>,
|
||||||
pub max_group_size: Cell<u8>,
|
pub max_group_size: Cell<u8>,
|
||||||
@ -454,7 +455,7 @@ pub(crate) mod test_helpers {
|
|||||||
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
|
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
|
||||||
let key: FacetGroupKey<&[u8]> =
|
let key: FacetGroupKey<&[u8]> =
|
||||||
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
|
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
|
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
|
||||||
let mut inner_writer = KvWriterDelAdd::memory();
|
let mut inner_writer = KvWriterDelAdd::memory();
|
||||||
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
|
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
|
||||||
inner_writer.insert(DelAdd::Addition, value).unwrap();
|
inner_writer.insert(DelAdd::Addition, value).unwrap();
|
||||||
@ -486,12 +487,12 @@ pub(crate) mod test_helpers {
|
|||||||
|
|
||||||
let iter = self
|
let iter = self
|
||||||
.content
|
.content
|
||||||
.remap_types::<ByteSlice, FacetGroupValueCodec>()
|
.remap_types::<Bytes, FacetGroupValueCodec>()
|
||||||
.prefix_iter(txn, &level_no_prefix)
|
.prefix_iter(txn, &level_no_prefix)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let (key, value) = el.unwrap();
|
let (key, value) = el.unwrap();
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
|
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
|
||||||
|
|
||||||
let mut prefix_start_below = vec![];
|
let mut prefix_start_below = vec![];
|
||||||
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
||||||
@ -501,11 +502,11 @@ pub(crate) mod test_helpers {
|
|||||||
let start_below = {
|
let start_below = {
|
||||||
let mut start_below_iter = self
|
let mut start_below_iter = self
|
||||||
.content
|
.content
|
||||||
.remap_types::<ByteSlice, FacetGroupValueCodec>()
|
.remap_types::<Bytes, FacetGroupValueCodec>()
|
||||||
.prefix_iter(txn, &prefix_start_below)
|
.prefix_iter(txn, &prefix_start_below)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
||||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes).unwrap()
|
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(value.size > 0);
|
assert!(value.size > 0);
|
||||||
|
@ -3,7 +3,7 @@ use std::fs::File;
|
|||||||
use std::io::{self, BufReader, BufWriter, Seek};
|
use std::io::{self, BufReader, BufWriter, Seek};
|
||||||
|
|
||||||
use grenad::{CompressionType, Sorter};
|
use grenad::{CompressionType, Sorter};
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
|
|
||||||
use super::{ClonableMmap, MergeFn};
|
use super::{ClonableMmap, MergeFn};
|
||||||
use crate::update::index_documents::valid_lmdb_key;
|
use crate::update::index_documents::valid_lmdb_key;
|
||||||
@ -255,7 +255,7 @@ where
|
|||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let database = database.remap_types::<ByteSlice, ByteSlice>();
|
let database = database.remap_types::<Bytes, Bytes>();
|
||||||
|
|
||||||
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
||||||
while let Some((key, value)) = merger_iter.next()? {
|
while let Some((key, value)) = merger_iter.next()? {
|
||||||
|
@ -247,7 +247,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let base_obkv = self
|
let base_obkv = self
|
||||||
.index
|
.index
|
||||||
.documents
|
.documents
|
||||||
.remap_data_type::<heed::types::ByteSlice>()
|
.remap_data_type::<heed::types::Bytes>()
|
||||||
.get(wtxn, &original_key)?
|
.get(wtxn, &original_key)?
|
||||||
.ok_or(InternalError::DatabaseMissingEntry {
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
db_name: db_name::DOCUMENTS,
|
db_name: db_name::DOCUMENTS,
|
||||||
@ -501,7 +501,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let base_obkv = self
|
let base_obkv = self
|
||||||
.index
|
.index
|
||||||
.documents
|
.documents
|
||||||
.remap_data_type::<heed::types::ByteSlice>()
|
.remap_data_type::<heed::types::Bytes>()
|
||||||
.get(txn, &original_key)?
|
.get(txn, &original_key)?
|
||||||
.ok_or(InternalError::DatabaseMissingEntry {
|
.ok_or(InternalError::DatabaseMissingEntry {
|
||||||
db_name: db_name::DOCUMENTS,
|
db_name: db_name::DOCUMENTS,
|
||||||
|
@ -6,7 +6,7 @@ use std::io::{self, BufReader};
|
|||||||
use bytemuck::allocation::pod_collect_to_vec;
|
use bytemuck::allocation::pod_collect_to_vec;
|
||||||
use charabia::{Language, Script};
|
use charabia::{Language, Script};
|
||||||
use grenad::MergerBuilder;
|
use grenad::MergerBuilder;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
use heed::{PutFlags, RwTxn};
|
use heed::{PutFlags, RwTxn};
|
||||||
use log::error;
|
use log::error;
|
||||||
use obkv::{KvReader, KvWriter};
|
use obkv::{KvReader, KvWriter};
|
||||||
@ -144,7 +144,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let db = index.documents.remap_data_type::<ByteSlice>();
|
let db = index.documents.remap_data_type::<Bytes>();
|
||||||
|
|
||||||
if !writer.is_empty() {
|
if !writer.is_empty() {
|
||||||
db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
|
db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
|
||||||
@ -293,7 +293,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
}
|
}
|
||||||
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
|
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
|
||||||
let index_fid_docid_facet_numbers =
|
let index_fid_docid_facet_numbers =
|
||||||
index.field_id_docid_facet_f64s.remap_types::<ByteSlice, ByteSlice>();
|
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
|
||||||
let mut cursor = fid_docid_facet_number.into_cursor()?;
|
let mut cursor = fid_docid_facet_number.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
let reader = KvReaderDelAdd::new(value);
|
let reader = KvReaderDelAdd::new(value);
|
||||||
@ -313,7 +313,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
}
|
}
|
||||||
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
|
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
|
||||||
let index_fid_docid_facet_strings =
|
let index_fid_docid_facet_strings =
|
||||||
index.field_id_docid_facet_strings.remap_types::<ByteSlice, ByteSlice>();
|
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
|
||||||
let mut cursor = fid_docid_facet_string.into_cursor()?;
|
let mut cursor = fid_docid_facet_string.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
let reader = KvReaderDelAdd::new(value);
|
let reader = KvReaderDelAdd::new(value);
|
||||||
@ -498,7 +498,7 @@ where
|
|||||||
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let database = database.remap_types::<ByteSlice, ByteSlice>();
|
let database = database.remap_types::<Bytes, Bytes>();
|
||||||
|
|
||||||
let mut cursor = data.into_cursor()?;
|
let mut cursor = data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
@ -556,7 +556,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let mut database = database.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
|
let mut database = database.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
|
||||||
|
|
||||||
let mut cursor = data.into_cursor()?;
|
let mut cursor = data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
@ -571,7 +571,7 @@ where
|
|||||||
let value = serialize_value(value, &mut buffer)?;
|
let value = serialize_value(value, &mut buffer)?;
|
||||||
unsafe {
|
unsafe {
|
||||||
// safety: We do not keep a reference to anything that lives inside the database
|
// safety: We do not keep a reference to anything that lives inside the database
|
||||||
database.put_current_with_options::<ByteSlice>(PutFlags::APPEND, key, value)?
|
database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, value)?
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -822,7 +822,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
fn update_max_values_per_facet(&mut self) -> Result<()> {
|
fn update_max_values_per_facet(&mut self) -> Result<()> {
|
||||||
match self.max_values_per_facet {
|
match self.max_values_per_facet {
|
||||||
Setting::Set(max) => {
|
Setting::Set(max) => {
|
||||||
self.index.put_max_values_per_facet(self.wtxn, max)?;
|
self.index.put_max_values_per_facet(self.wtxn, max as u64)?;
|
||||||
}
|
}
|
||||||
Setting::Reset => {
|
Setting::Reset => {
|
||||||
self.index.delete_max_values_per_facet(self.wtxn)?;
|
self.index.delete_max_values_per_facet(self.wtxn)?;
|
||||||
@ -850,7 +850,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
fn update_pagination_max_total_hits(&mut self) -> Result<()> {
|
fn update_pagination_max_total_hits(&mut self) -> Result<()> {
|
||||||
match self.pagination_max_total_hits {
|
match self.pagination_max_total_hits {
|
||||||
Setting::Set(max) => {
|
Setting::Set(max) => {
|
||||||
self.index.put_pagination_max_total_hits(self.wtxn, max)?;
|
self.index.put_pagination_max_total_hits(self.wtxn, max as u64)?;
|
||||||
}
|
}
|
||||||
Setting::Reset => {
|
Setting::Reset => {
|
||||||
self.index.delete_pagination_max_total_hits(self.wtxn)?;
|
self.index.delete_pagination_max_total_hits(self.wtxn)?;
|
||||||
@ -917,7 +917,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
use maplit::{btreemap, btreeset, hashset};
|
use maplit::{btreemap, btreeset, hashset};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -1130,7 +1130,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
let count = index
|
let count = index
|
||||||
.facet_id_f64_docids
|
.facet_id_f64_docids
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
// The faceted field id is 1u16
|
// The faceted field id is 1u16
|
||||||
.prefix_iter(&rtxn, &[0, 1, 0])
|
.prefix_iter(&rtxn, &[0, 1, 0])
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@ -1151,7 +1151,7 @@ mod tests {
|
|||||||
// Only count the field_id 0 and level 0 facet values.
|
// Only count the field_id 0 and level 0 facet values.
|
||||||
let count = index
|
let count = index
|
||||||
.facet_id_f64_docids
|
.facet_id_f64_docids
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(&rtxn, &[0, 1, 0])
|
.prefix_iter(&rtxn, &[0, 1, 0])
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.count();
|
.count();
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::{ByteSlice, Str};
|
use heed::types::{Bytes, Str};
|
||||||
use heed::Database;
|
use heed::Database;
|
||||||
|
|
||||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
|
||||||
@ -93,7 +93,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
let db = self.word_docids.remap_data_type::<ByteSlice>();
|
let db = self.word_docids.remap_data_type::<Bytes>();
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
for prefix in new_prefix_fst_words {
|
for prefix in new_prefix_fst_words {
|
||||||
let prefix = std::str::from_utf8(prefix.as_bytes())?;
|
let prefix = std::str::from_utf8(prefix.as_bytes())?;
|
||||||
|
@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet};
|
|||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::Bytes;
|
||||||
use heed::{BytesDecode, BytesEncode, Database};
|
use heed::{BytesDecode, BytesEncode, Database};
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
@ -110,7 +110,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
let db = self.word_database.remap_data_type::<ByteSlice>();
|
let db = self.word_database.remap_data_type::<Bytes>();
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
for prefix_bytes in new_prefix_fst_words {
|
for prefix_bytes in new_prefix_fst_words {
|
||||||
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
|
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
|
||||||
@ -119,7 +119,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
|
|
||||||
// iter over all lines of the DB where the key is prefixed by the current prefix.
|
// iter over all lines of the DB where the key is prefixed by the current prefix.
|
||||||
let iter = db
|
let iter = db
|
||||||
.remap_key_type::<ByteSlice>()
|
.remap_key_type::<Bytes>()
|
||||||
.prefix_iter(self.wtxn, prefix_bytes.as_bytes())?
|
.prefix_iter(self.wtxn, prefix_bytes.as_bytes())?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in iter {
|
for result in iter {
|
||||||
|
Loading…
Reference in New Issue
Block a user