Make sure that we do not generate too long keys

This commit is contained in:
Kerollmops 2022-05-03 09:57:03 +02:00
parent 7e47031bdc
commit 211c8763b9
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 17 additions and 9 deletions

View File

@ -20,7 +20,8 @@ use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
pub use self::helpers::{ pub use self::helpers::{
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset, as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
sorter_into_lmdb_database, write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn, sorter_into_lmdb_database, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
ClonableMmap, MergeFn,
}; };
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters}; use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput}; pub use self::transform::{Transform, TransformOutput};

View File

@ -5,7 +5,8 @@ use heed::types::{ByteSlice, Str};
use heed::Database; use heed::Database;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_sorter, merge_roaring_bitmaps, sorter_into_lmdb_database, CursorClonableMmap, MergeFn, create_sorter, merge_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
CursorClonableMmap, MergeFn,
}; };
use crate::{Result, RoaringBitmapCodec}; use crate::{Result, RoaringBitmapCodec};
@ -124,7 +125,9 @@ fn write_prefixes_in_sorter(
) -> Result<()> { ) -> Result<()> {
for (key, data_slices) in prefixes.drain() { for (key, data_slices) in prefixes.drain() {
for data in data_slices { for data in data_slices {
sorter.insert(&key, data)?; if valid_lmdb_key(&key) {
sorter.insert(&key, data)?;
}
} }
} }

View File

@ -7,8 +7,8 @@ use log::debug;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, CursorClonableMmap, create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
MergeFn, CursorClonableMmap, MergeFn,
}; };
use crate::{Index, Result, StrStrU8Codec}; use crate::{Index, Result, StrStrU8Codec};
@ -188,7 +188,9 @@ fn write_prefixes_in_sorter(
) -> Result<()> { ) -> Result<()> {
for (key, data_slices) in prefixes.drain() { for (key, data_slices) in prefixes.drain() {
for data in data_slices { for data in data_slices {
sorter.insert(&key, data)?; if valid_lmdb_key(&key) {
sorter.insert(&key, data)?;
}
} }
} }

View File

@ -11,8 +11,8 @@ use crate::error::SerializationError;
use crate::heed_codec::StrBEU32Codec; use crate::heed_codec::StrBEU32Codec;
use crate::index::main_key::WORDS_PREFIXES_FST_KEY; use crate::index::main_key::WORDS_PREFIXES_FST_KEY;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, CursorClonableMmap, create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
MergeFn, CursorClonableMmap, MergeFn,
}; };
use crate::{Index, Result}; use crate::{Index, Result};
@ -167,7 +167,9 @@ fn write_prefixes_in_sorter(
) -> Result<()> { ) -> Result<()> {
for (key, data_slices) in prefixes.drain() { for (key, data_slices) in prefixes.drain() {
for data in data_slices { for data in data_slices {
sorter.insert(&key, data)?; if valid_lmdb_key(&key) {
sorter.insert(&key, data)?;
}
} }
} }