mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Introduce the word_level_position_docids Index database
This commit is contained in:
parent
75e7b1e3da
commit
b0a417f342
@ -319,6 +319,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
|
word_level_position_docids,
|
||||||
facet_field_id_value_docids,
|
facet_field_id_value_docids,
|
||||||
field_id_docid_facet_values: _,
|
field_id_docid_facet_values: _,
|
||||||
documents,
|
documents,
|
||||||
|
@ -2,6 +2,7 @@ mod beu32_str_codec;
|
|||||||
mod obkv_codec;
|
mod obkv_codec;
|
||||||
mod roaring_bitmap;
|
mod roaring_bitmap;
|
||||||
mod roaring_bitmap_length;
|
mod roaring_bitmap_length;
|
||||||
|
mod str_level_position_codec;
|
||||||
mod str_str_u8_codec;
|
mod str_str_u8_codec;
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
|
|
||||||
@ -9,4 +10,5 @@ pub use self::beu32_str_codec::BEU32StrCodec;
|
|||||||
pub use self::obkv_codec::ObkvCodec;
|
pub use self::obkv_codec::ObkvCodec;
|
||||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||||
pub use self::roaring_bitmap_length::{BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
pub use self::roaring_bitmap_length::{BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
||||||
|
pub use self::str_level_position_codec::StrLevelPositionCodec;
|
||||||
pub use self::str_str_u8_codec::StrStrU8Codec;
|
pub use self::str_str_u8_codec::StrStrU8Codec;
|
||||||
|
42
milli/src/heed_codec/str_level_position_codec.rs
Normal file
42
milli/src/heed_codec/str_level_position_codec.rs
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use std::convert::TryInto;
|
||||||
|
use std::mem::size_of;
|
||||||
|
use std::str;
|
||||||
|
|
||||||
|
pub struct StrLevelPositionCodec;
|
||||||
|
|
||||||
|
impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
|
||||||
|
type DItem = (&'a str, u8, u32, u32);
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let footer_len = size_of::<u8>() + size_of::<u32>() * 2;
|
||||||
|
|
||||||
|
if bytes.len() < footer_len { return None }
|
||||||
|
|
||||||
|
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||||
|
let word = str::from_utf8(word).ok()?;
|
||||||
|
|
||||||
|
let (level, bytes) = bytes.split_first()?;
|
||||||
|
let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?;
|
||||||
|
let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?;
|
||||||
|
|
||||||
|
Some((word, *level, left, right))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
|
||||||
|
type EItem = (&'a str, u8, u32, u32);
|
||||||
|
|
||||||
|
fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
|
let left = left.to_be_bytes();
|
||||||
|
let right = right.to_be_bytes();
|
||||||
|
|
||||||
|
let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len());
|
||||||
|
bytes.extend_from_slice(word.as_bytes());
|
||||||
|
bytes.push(*level);
|
||||||
|
bytes.extend_from_slice(&left[..]);
|
||||||
|
bytes.extend_from_slice(&right[..]);
|
||||||
|
|
||||||
|
Some(Cow::Owned(bytes))
|
||||||
|
}
|
||||||
|
}
|
@ -12,7 +12,7 @@ use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution,
|
|||||||
use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId};
|
use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId};
|
||||||
use crate::{
|
use crate::{
|
||||||
BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
||||||
ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrStrU8Codec,
|
ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec,
|
||||||
};
|
};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
@ -52,6 +52,8 @@ pub struct Index {
|
|||||||
pub word_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
|
pub word_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
|
||||||
/// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
|
/// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
|
||||||
pub word_prefix_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
|
pub word_prefix_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
|
||||||
|
/// Maps the word, level and position range with the docids that corresponds to it.
|
||||||
|
pub word_level_position_docids: Database<StrLevelPositionCodec, CboRoaringBitmapCodec>,
|
||||||
/// Maps the facet field id and the globally ordered value with the docids that corresponds to it.
|
/// Maps the facet field id and the globally ordered value with the docids that corresponds to it.
|
||||||
pub facet_field_id_value_docids: Database<ByteSlice, CboRoaringBitmapCodec>,
|
pub facet_field_id_value_docids: Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||||
/// Maps the document id, the facet field id and the globally ordered value.
|
/// Maps the document id, the facet field id and the globally ordered value.
|
||||||
@ -62,7 +64,7 @@ pub struct Index {
|
|||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> {
|
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> {
|
||||||
options.max_dbs(9);
|
options.max_dbs(10);
|
||||||
|
|
||||||
let env = options.open(path)?;
|
let env = options.open(path)?;
|
||||||
let main = env.create_poly_database(Some("main"))?;
|
let main = env.create_poly_database(Some("main"))?;
|
||||||
@ -71,6 +73,7 @@ impl Index {
|
|||||||
let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
|
let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
|
||||||
let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
|
let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
|
||||||
let word_prefix_pair_proximity_docids = env.create_database(Some("word-prefix-pair-proximity-docids"))?;
|
let word_prefix_pair_proximity_docids = env.create_database(Some("word-prefix-pair-proximity-docids"))?;
|
||||||
|
let word_level_position_docids = env.create_database(Some("word-level-position-docids"))?;
|
||||||
let facet_field_id_value_docids = env.create_database(Some("facet-field-id-value-docids"))?;
|
let facet_field_id_value_docids = env.create_database(Some("facet-field-id-value-docids"))?;
|
||||||
let field_id_docid_facet_values = env.create_database(Some("field-id-docid-facet-values"))?;
|
let field_id_docid_facet_values = env.create_database(Some("field-id-docid-facet-values"))?;
|
||||||
let documents = env.create_database(Some("documents"))?;
|
let documents = env.create_database(Some("documents"))?;
|
||||||
@ -94,6 +97,7 @@ impl Index {
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
|
word_level_position_docids,
|
||||||
facet_field_id_value_docids,
|
facet_field_id_value_docids,
|
||||||
field_id_docid_facet_values,
|
field_id_docid_facet_values,
|
||||||
documents,
|
documents,
|
||||||
|
@ -22,7 +22,7 @@ use serde_json::{Map, Value};
|
|||||||
pub use self::criterion::{Criterion, default_criteria};
|
pub use self::criterion::{Criterion, default_criteria};
|
||||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
pub use self::fields_ids_map::FieldsIdsMap;
|
pub use self::fields_ids_map::FieldsIdsMap;
|
||||||
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
|
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec};
|
||||||
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||||
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
|
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
|
@ -28,6 +28,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
|
word_level_position_docids,
|
||||||
facet_field_id_value_docids,
|
facet_field_id_value_docids,
|
||||||
field_id_docid_facet_values,
|
field_id_docid_facet_values,
|
||||||
documents,
|
documents,
|
||||||
|
@ -88,6 +88,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
word_prefix_pair_proximity_docids,
|
word_prefix_pair_proximity_docids,
|
||||||
|
word_level_position_docids,
|
||||||
facet_field_id_value_docids,
|
facet_field_id_value_docids,
|
||||||
field_id_docid_facet_values,
|
field_id_docid_facet_values,
|
||||||
documents,
|
documents,
|
||||||
|
Loading…
Reference in New Issue
Block a user