3160: Clamp databases max size to the page size r=irevoire a=Kerollmops

This PR fixes #3150 (again #2662). We fix it again, here, as we entirely rewrote the index scheduler and forgot about this little detail.

`@irevoire` Can I have your input on where we create the indexes in the tests? I want to use a non-page-size rounded value in the tests. This way, we can see this issue in the tests next time.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2022-11-29 14:43:49 +00:00 committed by GitHub
commit ee372099fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 7 deletions

13
Cargo.lock generated
View File

@ -1625,7 +1625,7 @@ dependencies = [
"libc",
"lmdb-rkv-sys",
"once_cell",
"page_size",
"page_size 0.4.2",
"synchronoise",
"url",
"zerocopy",
@ -1783,6 +1783,7 @@ dependencies = [
"meili-snap",
"meilisearch-types",
"nelson",
"page_size 0.5.0",
"roaring",
"serde",
"serde_json",
@ -2663,6 +2664,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "page_size"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "parking_lot"
version = "0.12.1"

View File

@ -13,6 +13,7 @@ enum-iterator = "1.1.3"
file-store = { path = "../file-store" }
log = "0.4.14"
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }

View File

@ -12,7 +12,7 @@ use meilisearch_types::milli::Index;
use uuid::Uuid;
use self::IndexStatus::{Available, BeingDeleted};
use crate::{Error, Result};
use crate::{clamp_to_page_size, Error, Result};
const INDEX_MAPPING: &str = "index-mapping";
@ -68,7 +68,7 @@ impl IndexMapper {
/// The path *must* exists or an error will be thrown.
fn create_or_open_index(&self, path: &Path) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(self.index_size);
options.map_size(clamp_to_page_size(self.index_size));
options.max_readers(1024);
Ok(Index::new(options, path)?)
}

View File

@ -54,7 +54,7 @@ use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, m
use uuid::Uuid;
use crate::index_mapper::IndexMapper;
use crate::utils::check_index_swap_validity;
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 =
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
@ -361,7 +361,7 @@ impl IndexScheduler {
let env = heed::EnvOpenOptions::new()
.max_dbs(10)
.map_size(options.task_db_size)
.map_size(clamp_to_page_size(options.task_db_size))
.open(options.tasks_path)?;
let file_store = FileStore::new(&options.update_file_path)?;
@ -1111,8 +1111,8 @@ mod tests {
indexes_path: tempdir.path().join("indexes"),
snapshots_path: tempdir.path().join("snapshots"),
dumps_path: tempdir.path().join("dumps"),
task_db_size: 1024 * 1024, // 1 MiB
index_size: 1024 * 1024, // 1 MiB
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
indexer_config: IndexerConfig::default(),
autobatching_enabled,
};

View File

@ -324,6 +324,11 @@ pub(crate) fn check_index_swap_validity(task: &Task) -> Result<()> {
Ok(())
}
/// Clamp the provided value to be a multiple of system page size.
pub fn clamp_to_page_size(size: usize) -> usize {
size / page_size::get() * page_size::get()
}
#[cfg(test)]
impl IndexScheduler {
/// Asserts that the index scheduler's content is internally consistent.