bump heed

This commit is contained in:
Irevoire 2022-08-10 16:25:24 +02:00
parent 087da5621a
commit e96b852107
No known key found for this signature in database
GPG Key ID: 7A6A970C96104F1B
3 changed files with 21 additions and 14 deletions

View File

@ -18,7 +18,8 @@ fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geoutils = "0.4.1" geoutils = "0.4.1"
grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] } grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] } # heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
heed = { git = "https://github.com/meilisearch/heed", branch = "compute_size", default-features = false, features = ["lmdb", "sync-read-txn"] }
json-depth-checker = { path = "../json-depth-checker" } json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memmap2 = "0.5.3" memmap2 = "0.5.3"

View File

@ -116,6 +116,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
} }
)] )]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> }, InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
#[error("{}", HeedError::BadOpenOptions)]
InvalidLmdbOpenOptions,
#[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")] #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
SortRankingRuleMissing, SortRankingRuleMissing,
#[error("The database file is in an invalid state.")] #[error("The database file is in an invalid state.")]
@ -244,6 +246,7 @@ impl From<HeedError> for Error {
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })), HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping), HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
HeedError::DatabaseClosing => InternalError(DatabaseClosing), HeedError::DatabaseClosing => InternalError(DatabaseClosing),
HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
} }
} }
} }

View File

@ -278,27 +278,30 @@ where
let stop_words = self.index.stop_words(self.wtxn)?; let stop_words = self.index.stop_words(self.wtxn)?;
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?; let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
// Run extraction pipeline in parallel. let pool_params = GrenadParameters {
pool.install(|| {
let params = GrenadParameters {
chunk_compression_type: self.indexer_config.chunk_compression_type, chunk_compression_type: self.indexer_config.chunk_compression_type,
chunk_compression_level: self.indexer_config.chunk_compression_level, chunk_compression_level: self.indexer_config.chunk_compression_level,
max_memory: self.indexer_config.max_memory, max_memory: self.indexer_config.max_memory,
max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen. max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
}; };
let documents_chunk_size =
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4); // 4MiB
let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
// Run extraction pipeline in parallel.
pool.install(|| {
// split obkv file into several chunks // split obkv file into several chunks
let original_chunk_iter = grenad_obkv_into_chunks( let original_chunk_iter = grenad_obkv_into_chunks(
original_documents, original_documents,
params.clone(), pool_params.clone(),
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB documents_chunk_size,
); );
// split obkv file into several chunks // split obkv file into several chunks
let flattened_chunk_iter = grenad_obkv_into_chunks( let flattened_chunk_iter = grenad_obkv_into_chunks(
flattened_documents, flattened_documents,
params.clone(), pool_params.clone(),
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB documents_chunk_size,
); );
let result = original_chunk_iter let result = original_chunk_iter
@ -308,14 +311,14 @@ where
extract::data_from_obkv_documents( extract::data_from_obkv_documents(
original_chunk, original_chunk,
flattened_chunk, flattened_chunk,
params, pool_params,
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
searchable_fields, searchable_fields,
faceted_fields, faceted_fields,
primary_key_id, primary_key_id,
geo_fields_ids, geo_fields_ids,
stop_words, stop_words,
self.indexer_config.max_positions_per_attributes, max_positions_per_attributes,
exact_attributes, exact_attributes,
) )
}); });