diff --git a/.github/workflows/flaky.yml b/.github/workflows/flaky.yml index fadd6bf96..d1249b7b7 100644 --- a/.github/workflows/flaky.yml +++ b/.github/workflows/flaky.yml @@ -6,10 +6,20 @@ on: jobs: flaky: - runs-on: ubuntu-18.04 - + runs-on: ubuntu-latest + container: + # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations + image: ubuntu:18.04 steps: - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update && apt-get install -y curl + apt-get install build-essential -y + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true - name: Install cargo-flaky run: cargo install cargo-flaky - name: Run cargo flaky 100 times diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index db17d9d13..784640eb0 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -32,30 +32,56 @@ jobs: if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true' run: bash .github/scripts/check-release.sh - publish: + publish-linux: + name: Publish binary for Linux + runs-on: ubuntu-latest + needs: check-version + container: + # Use ubuntu-18.04 to compile with glibc 2.27 + image: ubuntu:18.04 + steps: + - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update && apt-get install -y curl + apt-get install build-essential -y + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Build + run: cargo build --release --locked + # No need to upload binaries for dry run (cron) + - name: Upload binaries to release + if: github.event_name == 'release' + uses: svenstaro/upload-release-action@2.3.0 + with: + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} + file: target/release/meilisearch + asset_name: meilisearch-linux-amd64 + tag: ${{ github.ref }} + + publish-macos-windows: name: Publish binary for ${{ matrix.os }} runs-on: ${{ matrix.os }} needs: check-version strategy: fail-fast: false matrix: - os: [ubuntu-18.04, macos-latest, windows-latest] + os: [macos-12, windows-2022] include: - - os: ubuntu-18.04 - artifact_name: meilisearch - asset_name: meilisearch-linux-amd64 - - os: macos-latest + - os: macos-12 artifact_name: meilisearch asset_name: meilisearch-macos-amd64 - - os: windows-latest + - os: windows-2022 artifact_name: meilisearch.exe asset_name: meilisearch-windows-amd64.exe - steps: - - uses: hecrj/setup-rust-action@master - with: - rust-version: stable - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -72,15 +98,13 @@ jobs: name: Publish binary for macOS silicon runs-on: ${{ matrix.os }} needs: check-version - continue-on-error: false strategy: fail-fast: false matrix: include: - - os: macos-latest + - os: macos-12 target: aarch64-apple-darwin asset_name: meilisearch-macos-apple-silicon - steps: - name: Checkout repository uses: actions/checkout@v3 @@ -110,7 +134,6 @@ jobs: name: Publish binary for aarch64 runs-on: ${{ matrix.os }} needs: check-version - continue-on-error: false strategy: fail-fast: false matrix: @@ -121,11 +144,9 @@ jobs: linker: gcc-aarch64-linux-gnu use-cross: true asset_name: meilisearch-linux-aarch64 - steps: - name: Checkout repository uses: actions/checkout@v3 - - name: Installing Rust toolchain uses: actions-rs/toolchain@v1 with: @@ -133,16 +154,13 @@ jobs: profile: minimal target: ${{ matrix.target }} override: true - - name: APT update run: | sudo apt update - - name: Install target specific tools if: matrix.use-cross run: | sudo apt-get install -y ${{ matrix.linker }} - - name: Configure target aarch64 GNU if: matrix.target == 'aarch64-unknown-linux-gnu' ## Environment variable is not passed using env: @@ -154,17 +172,14 @@ jobs: echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV - - name: Cargo build uses: actions-rs/cargo@v1 with: command: build use-cross: ${{ matrix.use-cross }} args: --release --target ${{ matrix.target }} - - name: List target output files run: ls -lR ./target - - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index 3d9446321..a276b5a6e 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -15,12 +15,20 @@ jobs: debian: name: Publish debian packagge - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest needs: check-version + container: + # Use ubuntu-18.04 to compile with glibc 2.27 + image: ubuntu:18.04 steps: - - uses: hecrj/setup-rust-action@master + - name: Install needed dependencies + run: | + apt-get update && apt-get install -y curl + apt-get install build-essential -y + - uses: actions-rs/toolchain@v1 with: - rust-version: stable + toolchain: stable + override: true - name: Install cargo-deb run: cargo install cargo-deb - uses: actions/checkout@v3 diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 2d95e6784..3dd93b6eb 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -86,6 +86,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v3 with: + push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta.outputs.tags }} build-args: | diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5c5ca827f..f1260124e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -15,13 +15,42 @@ env: RUSTFLAGS: "-D warnings" jobs: - tests: + test-linux: + name: Tests on ubuntu-18.04 + runs-on: ubuntu-latest + container: + # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations + image: ubuntu:18.04 + steps: + - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update && apt-get install -y curl + apt-get install build-essential -y + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Cache dependencies + uses: Swatinem/rust-cache@v2.2.0 + - name: Run cargo check without any default features + uses: actions-rs/cargo@v1 + with: + command: build + args: --locked --release --no-default-features + - name: Run cargo test + uses: actions-rs/cargo@v1 + with: + command: test + args: --locked --release + + test-others: name: Tests on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-18.04, macos-latest, windows-latest] + os: [macos-12, windows-2022] steps: - uses: actions/checkout@v3 - name: Cache dependencies @@ -40,12 +69,18 @@ jobs: # We run tests in debug also, to make sure that the debug_assertions are hit test-debug: name: Run tests in debug - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest + container: + # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations + image: ubuntu:18.04 steps: - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update && apt-get install -y curl + apt-get install build-essential -y - uses: actions-rs/toolchain@v1 with: - profile: minimal toolchain: stable override: true - name: Cache dependencies @@ -58,7 +93,7 @@ jobs: clippy: name: Run Clippy - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 @@ -77,7 +112,7 @@ jobs: fmt: name: Run Rustfmt - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml index e823a0b23..6446a366a 100644 --- a/.github/workflows/update-cargo-toml-version.yml +++ b/.github/workflows/update-cargo-toml-version.yml @@ -16,7 +16,7 @@ jobs: update-version-cargo-toml: name: Update version in Cargo.toml files - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 diff --git a/Cargo.lock b/Cargo.lock index dfc9818aa..fb3c0daa2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1332,8 +1332,8 @@ dependencies = [ [[package]] name = "filter-parser" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "nom", "nom_locate", @@ -1351,8 +1351,8 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "serde_json", ] @@ -1898,8 +1898,8 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "serde_json", ] @@ -2374,6 +2374,7 @@ dependencies = [ "csv", "either", "enum-iterator", + "file-store", "flate2", "fst", "insta", @@ -2386,6 +2387,7 @@ dependencies = [ "serde", "serde_json", "tar", + "tempfile", "thiserror", "time", "tokio", @@ -2418,8 +2420,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "bimap", "bincode", diff --git a/bors.toml b/bors.toml index b357e8d61..1e7e418e5 100644 --- a/bors.toml +++ b/bors.toml @@ -1,7 +1,7 @@ status = [ 'Tests on ubuntu-18.04', - 'Tests on macos-latest', - 'Tests on windows-latest', + 'Tests on macos-12', + 'Tests on windows-2022', 'Run Clippy', 'Run Rustfmt', 'Run tests in debug', diff --git a/dump/src/error.rs b/dump/src/error.rs index 0d57729ae..b53dd640a 100644 --- a/dump/src/error.rs +++ b/dump/src/error.rs @@ -19,9 +19,10 @@ pub enum Error { impl ErrorCode for Error { fn error_code(&self) -> Code { match self { - // Are these three really Internal errors? - // TODO look at that later. - Error::Io(_) => Code::Internal, + Error::Io(e) => e.error_code(), + + // These errors either happen when creating a dump and don't need any error code, + // or come from an internal bad deserialization. Error::Serde(_) => Code::Internal, Error::Uuid(_) => Code::Internal, diff --git a/dump/src/reader/error.rs b/dump/src/reader/error.rs deleted file mode 100644 index 679fa2bc2..000000000 --- a/dump/src/reader/error.rs +++ /dev/null @@ -1,42 +0,0 @@ -use meilisearch_auth::error::AuthControllerError; -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; - -use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError}; - -pub type Result = std::result::Result; - -#[derive(thiserror::Error, Debug)] -pub enum DumpError { - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error("{0}")] - IndexResolver(Box), -} - -internal_error!( - DumpError: milli::heed::Error, - std::io::Error, - tokio::task::JoinError, - tokio::sync::oneshot::error::RecvError, - serde_json::error::Error, - tempfile::PersistError, - fs_extra::error::Error, - AuthControllerError, - TaskError -); - -impl From for DumpError { - fn from(e: IndexResolverError) -> Self { - Self::IndexResolver(Box::new(e)) - } -} - -impl ErrorCode for DumpError { - fn error_code(&self) -> Code { - match self { - DumpError::Internal(_) => Code::Internal, - DumpError::IndexResolver(e) => e.error_code(), - } - } -} diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs index 4dfbf3502..e1e2fd0c1 100644 --- a/dump/src/reader/v4/mod.rs +++ b/dump/src/reader/v4/mod.rs @@ -102,7 +102,7 @@ impl V4Reader { &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), &index.index_meta, BufReader::new( - File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap(), + File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(), ), ) })) diff --git a/dump/src/writer.rs b/dump/src/writer.rs index 81695f55c..9776dd225 100644 --- a/dump/src/writer.rs +++ b/dump/src/writer.rs @@ -25,7 +25,7 @@ impl DumpWriter { if let Some(instance_uuid) = instance_uuid { fs::write( dir.path().join("instance_uid.uuid"), - &instance_uuid.as_hyphenated().to_string(), + instance_uuid.as_hyphenated().to_string(), )?; } @@ -36,7 +36,7 @@ impl DumpWriter { }; fs::write(dir.path().join("metadata.json"), serde_json::to_string(&metadata)?)?; - std::fs::create_dir(&dir.path().join("indexes"))?; + std::fs::create_dir(dir.path().join("indexes"))?; Ok(DumpWriter { dir }) } diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 6eac926c4..78f1e1fbe 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -666,7 +666,7 @@ impl IndexScheduler { let snapshot_path = self.snapshots_path.join(format!("{}.snapshot", db_name)); let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.snapshots_path)?; compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; - let file = temp_snapshot_file.persist(&snapshot_path)?; + let file = temp_snapshot_file.persist(snapshot_path)?; // 5.3 Change the permission to make the snapshot readonly let mut permissions = file.metadata()?.permissions(); diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index cfbf7a25e..037f8a269 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -114,17 +114,17 @@ impl ErrorCode for Error { Error::Dump(e) => e.error_code(), Error::Milli(e) => e.error_code(), Error::ProcessBatchPanicked => Code::Internal, - // TODO: TAMO: are all these errors really internal? - Error::Heed(_) => Code::Internal, - Error::FileStore(_) => Code::Internal, - Error::IoError(_) => Code::Internal, - Error::Persist(_) => Code::Internal, + Error::Heed(e) => e.error_code(), + Error::HeedTransaction(e) => e.error_code(), + Error::FileStore(e) => e.error_code(), + Error::IoError(e) => e.error_code(), + Error::Persist(e) => e.error_code(), + // Irrecoverable errors Error::Anyhow(_) => Code::Internal, Error::CorruptedTaskQueue => Code::Internal, Error::CorruptedDump => Code::Internal, Error::TaskDatabaseUpdate(_) => Code::Internal, Error::CreateBatch(_) => Code::Internal, - Error::HeedTransaction(_) => Code::Internal, } } } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 111f2960c..205fc4a50 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1220,7 +1220,7 @@ mod tests { ); let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); (file, documents_count) } @@ -1600,7 +1600,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -1637,7 +1637,7 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -1804,7 +1804,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -1962,7 +1962,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2009,7 +2009,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2058,7 +2058,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2108,7 +2108,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2159,7 +2159,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2606,7 +2606,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2644,7 +2644,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2700,7 +2700,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2748,7 +2748,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2802,7 +2802,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2861,7 +2861,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2925,7 +2925,7 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { @@ -2978,7 +2978,7 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64; + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler .register(KindWithContent::DocumentAdditionOrUpdate { diff --git a/meili-snap/src/lib.rs b/meili-snap/src/lib.rs index a2abd0cea..dea0d750b 100644 --- a/meili-snap/src/lib.rs +++ b/meili-snap/src/lib.rs @@ -29,7 +29,7 @@ pub fn default_snapshot_settings_for_test<'a>( let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name); let test_name = test_name.rsplit("::").next().unwrap().to_owned(); - let path = Path::new("snapshots").join(filename).join(&test_name); + let path = Path::new("snapshots").join(filename).join(test_name); settings.set_snapshot_path(path.clone()); let snap_name = if let Some(name) = name { Cow::Borrowed(name) diff --git a/meilisearch-auth/src/dump.rs b/meilisearch-auth/src/dump.rs index 0b26bf7da..8a215d8ae 100644 --- a/meilisearch-auth/src/dump.rs +++ b/meilisearch-auth/src/dump.rs @@ -18,7 +18,7 @@ impl AuthController { let keys_file_path = dst.as_ref().join(KEYS_PATH); let keys = store.list_api_keys()?; - let mut keys_file = File::create(&keys_file_path)?; + let mut keys_file = File::create(keys_file_path)?; for key in keys { serde_json::to_writer(&mut keys_file, &key)?; keys_file.write_all(b"\n")?; diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 787737edb..88b689af7 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -10,16 +10,18 @@ anyhow = "1.0.65" csv = "1.1.6" either = { version = "1.6.1", features = ["serde"] } enum-iterator = "1.1.3" +file-store = { path = "../file-store" } flate2 = "1.0.24" fst = "0.4.7" memmap2 = "0.5.7" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.2", default-features = false } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.3", default-features = false } proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } roaring = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0.85" tar = "0.4.38" +tempfile = "3.3.0" thiserror = "1.0.30" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } tokio = "1.0" diff --git a/meilisearch-types/src/document_formats.rs b/meilisearch-types/src/document_formats.rs index 5eee63afc..8011afac7 100644 --- a/meilisearch-types/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -13,7 +13,6 @@ use serde::{Deserialize, Deserializer}; use serde_json::error::Category; use crate::error::{Code, ErrorCode}; -use crate::internal_error; type Result = std::result::Result; @@ -36,14 +35,14 @@ impl fmt::Display for PayloadType { #[derive(Debug)] pub enum DocumentFormatError { - Internal(Box), + Io(io::Error), MalformedPayload(Error, PayloadType), } impl Display for DocumentFormatError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), + Self::Io(e) => write!(f, "{e}"), Self::MalformedPayload(me, b) => match me.borrow() { Error::Json(se) => { let mut message = match se.classify() { @@ -85,23 +84,27 @@ impl std::error::Error for DocumentFormatError {} impl From<(PayloadType, Error)> for DocumentFormatError { fn from((ty, error): (PayloadType, Error)) -> Self { match error { - Error::Io(e) => Self::Internal(Box::new(e)), + Error::Io(e) => Self::Io(e), e => Self::MalformedPayload(e, ty), } } } +impl From for DocumentFormatError { + fn from(error: io::Error) -> Self { + Self::Io(error) + } +} + impl ErrorCode for DocumentFormatError { fn error_code(&self) -> Code { match self { - DocumentFormatError::Internal(_) => Code::Internal, + DocumentFormatError::Io(e) => e.error_code(), DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload, } } } -internal_error!(DocumentFormatError: io::Error); - /// Reads CSV from input and write an obkv batch to writer. pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result { let mut builder = DocumentsBatchBuilder::new(writer); @@ -110,7 +113,7 @@ pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result { builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?; let count = builder.documents_count(); - let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(DocumentFormatError::Io)?; Ok(count as u64) } @@ -139,7 +142,7 @@ fn read_json_inner( // The json data has been deserialized and does not need to be processed again. // The data has been transferred to the writer during the deserialization process. Ok(Ok(_)) => (), - Ok(Err(e)) => return Err(DocumentFormatError::Internal(Box::new(e))), + Ok(Err(e)) => return Err(DocumentFormatError::Io(e)), Err(_e) => { // If we cannot deserialize the content as an array of object then we try // to deserialize it with the original method to keep correct error messages. @@ -155,16 +158,13 @@ fn read_json_inner( .map_err(|e| (payload_type, e))?; for object in content.inner.map_right(|o| vec![o]).into_inner() { - builder - .append_json_object(&object) - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; + builder.append_json_object(&object).map_err(DocumentFormatError::Io)?; } } } let count = builder.documents_count(); - let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(DocumentFormatError::Io)?; Ok(count as u64) } diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 5c0e1d9b8..fb267abb6 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -1,4 +1,4 @@ -use std::fmt; +use std::{fmt, io}; use actix_web::http::StatusCode; use actix_web::{self as aweb, HttpResponseBuilder}; @@ -23,7 +23,10 @@ pub struct ResponseError { } impl ResponseError { - pub fn from_msg(message: String, code: Code) -> Self { + pub fn from_msg(mut message: String, code: Code) -> Self { + if code == Code::IoError { + message.push_str(". This error generally happens when you have no space left on device or when your database doesn't have read or write right."); + } Self { code: code.http(), message, @@ -47,13 +50,7 @@ where T: ErrorCode, { fn from(other: T) -> Self { - Self { - code: other.http_status(), - message: other.to_string(), - error_code: other.error_name(), - error_type: other.error_type(), - error_link: other.error_url(), - } + Self::from_msg(other.to_string(), other.error_code()) } } @@ -111,8 +108,13 @@ impl fmt::Display for ErrorType { } } -#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] pub enum Code { + // error related to your setup + IoError, + NoSpaceLeftOnDevice, + TooManyOpenFiles, + // index related error CreateIndex, IndexAlreadyExists, @@ -145,7 +147,6 @@ pub enum Code { InvalidToken, MissingAuthorizationHeader, MissingMasterKey, - NoSpaceLeftOnDevice, DumpNotFound, InvalidTaskDateFilter, InvalidTaskStatusesFilter, @@ -188,6 +189,15 @@ impl Code { use Code::*; match self { + // related to the setup + IoError => ErrCode::invalid("io_error", StatusCode::UNPROCESSABLE_ENTITY), + TooManyOpenFiles => { + ErrCode::invalid("too_many_open_files", StatusCode::UNPROCESSABLE_ENTITY) + } + NoSpaceLeftOnDevice => { + ErrCode::invalid("no_space_left_on_device", StatusCode::UNPROCESSABLE_ENTITY) + } + // index related errors // create index is thrown on internal error while creating an index. CreateIndex => { @@ -266,9 +276,6 @@ impl Code { ErrCode::invalid("missing_task_filters", StatusCode::BAD_REQUEST) } DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), - NoSpaceLeftOnDevice => { - ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR) - } PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE), RetrieveDocument => { ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST) @@ -380,7 +387,7 @@ impl ErrorCode for milli::Error { match self { Error::InternalError(_) => Code::Internal, - Error::IoError(_) => Code::Internal, + Error::IoError(e) => e.error_code(), Error::UserError(ref error) => { match error { // TODO: wait for spec for new error codes. @@ -415,13 +422,28 @@ impl ErrorCode for milli::Error { } } +impl ErrorCode for file_store::Error { + fn error_code(&self) -> Code { + match self { + Self::IoError(e) => e.error_code(), + Self::PersistError(e) => e.error_code(), + } + } +} + +impl ErrorCode for tempfile::PersistError { + fn error_code(&self) -> Code { + self.error.error_code() + } +} + impl ErrorCode for HeedError { fn error_code(&self) -> Code { match self { HeedError::Mdb(MdbError::MapFull) => Code::DatabaseSizeLimitReached, HeedError::Mdb(MdbError::Invalid) => Code::InvalidStore, - HeedError::Io(_) - | HeedError::Mdb(_) + HeedError::Io(e) => e.error_code(), + HeedError::Mdb(_) | HeedError::Encoding | HeedError::Decoding | HeedError::InvalidDatabaseTyping @@ -431,6 +453,17 @@ impl ErrorCode for HeedError { } } +impl ErrorCode for io::Error { + fn error_code(&self) -> Code { + match self.raw_os_error() { + Some(5) => Code::IoError, + Some(24) => Code::TooManyOpenFiles, + Some(28) => Code::NoSpaceLeftOnDevice, + _ => Code::Internal, + } + } +} + #[cfg(feature = "test-traits")] mod strategy { use proptest::strategy::Strategy; diff --git a/meilisearch-types/src/versioning.rs b/meilisearch-types/src/versioning.rs index bf1efe1ad..f0979f669 100644 --- a/meilisearch-types/src/versioning.rs +++ b/meilisearch-types/src/versioning.rs @@ -19,7 +19,7 @@ pub fn create_version_file(db_path: &Path) -> io::Result<()> { pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> { let version_path = db_path.join(VERSION_FILE_NAME); - match fs::read_to_string(&version_path) { + match fs::read_to_string(version_path) { Ok(version) => { let version_components = version.split('.').collect::>(); let (major, minor, patch) = match &version_components[..] { diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 46c4b2090..1effe6692 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -51,7 +51,7 @@ fn config_user_id_path(db_path: &Path) -> Option { fn find_user_id(db_path: &Path) -> Option { fs::read_to_string(db_path.join("instance-uid")) .ok() - .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) + .or_else(|| fs::read_to_string(config_user_id_path(db_path)?).ok()) .and_then(|uid| InstanceUid::from_str(&uid).ok()) } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 6d966aab1..1dc40ac76 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -108,75 +108,43 @@ pub fn create_app( .wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim)) } -// TODO: TAMO: Finish setting up things +enum OnFailure { + RemoveDb, + KeepDb, +} + pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, AuthController)> { - // we don't want to create anything in the data.ms yet, thus we - // wrap our two builders in a closure that'll be executed later. - let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key); - let index_scheduler_builder = || { - IndexScheduler::new(IndexSchedulerOptions { - version_file_path: opt.db_path.join(VERSION_FILE_NAME), - auth_path: opt.db_path.join("auth"), - tasks_path: opt.db_path.join("tasks"), - update_file_path: opt.db_path.join("update_files"), - indexes_path: opt.db_path.join("indexes"), - snapshots_path: opt.snapshot_dir.clone(), - dumps_path: opt.dump_dir.clone(), - task_db_size: opt.max_task_db_size.get_bytes() as usize, - index_size: opt.max_index_size.get_bytes() as usize, - indexer_config: (&opt.indexer_options).try_into()?, - autobatching_enabled: !opt.scheduler_options.disable_auto_batching, - }) - }; - - enum OnFailure { - RemoveDb, - KeepDb, - } - - let meilisearch_builder = |on_failure: OnFailure| -> anyhow::Result<_> { - // if anything wrong happens we delete the `data.ms` entirely. - match ( - index_scheduler_builder().map_err(anyhow::Error::from), - auth_controller_builder().map_err(anyhow::Error::from), - create_version_file(&opt.db_path).map_err(anyhow::Error::from), - ) { - (Ok(i), Ok(a), Ok(())) => Ok((i, a)), - (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { - if matches!(on_failure, OnFailure::RemoveDb) { - std::fs::remove_dir_all(&opt.db_path)?; - } - Err(e) - } - } - }; - let empty_db = is_empty_db(&opt.db_path); let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot { let snapshot_path_exists = snapshot_path.exists(); + // the db is empty and the snapshot exists, import it if empty_db && snapshot_path_exists { match compression::from_tar_gz(snapshot_path, &opt.db_path) { - Ok(()) => meilisearch_builder(OnFailure::RemoveDb)?, + Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?, Err(e) => { std::fs::remove_dir_all(&opt.db_path)?; return Err(e); } } + // the db already exists and we should not ignore the snapshot => throw an error } else if !empty_db && !opt.ignore_snapshot_if_db_exists { bail!( "database already exists at {:?}, try to delete it or rename it", opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) ) + // the snapshot doesn't exist and we can't ignore it => throw an error } else if !snapshot_path_exists && !opt.ignore_missing_snapshot { bail!("snapshot doesn't exist at {}", snapshot_path.display()) + // the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag } else { - meilisearch_builder(OnFailure::RemoveDb)? + open_or_create_database(opt, empty_db)? } } else if let Some(ref path) = opt.import_dump { let src_path_exists = path.exists(); + // the db is empty and the dump exists, import it if empty_db && src_path_exists { let (mut index_scheduler, mut auth_controller) = - meilisearch_builder(OnFailure::RemoveDb)?; + open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?; match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { Ok(()) => (index_scheduler, auth_controller), Err(e) => { @@ -184,29 +152,22 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth return Err(e); } } + // the db already exists and we should not ignore the dump option => throw an error } else if !empty_db && !opt.ignore_dump_if_db_exists { bail!( "database already exists at {:?}, try to delete it or rename it", opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) ) + // the dump doesn't exist and we can't ignore it => throw an error } else if !src_path_exists && !opt.ignore_missing_dump { bail!("dump doesn't exist at {:?}", path) + // the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag + // or, the dump is missing but we can ignore that because of the ignore_missing_dump flag } else { - let (mut index_scheduler, mut auth_controller) = - meilisearch_builder(OnFailure::RemoveDb)?; - match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { - Ok(()) => (index_scheduler, auth_controller), - Err(e) => { - std::fs::remove_dir_all(&opt.db_path)?; - return Err(e); - } - } + open_or_create_database(opt, empty_db)? } } else { - if !empty_db { - check_version_file(&opt.db_path)?; - } - meilisearch_builder(OnFailure::KeepDb)? + open_or_create_database(opt, empty_db)? }; // We create a loop in a thread that registers snapshotCreation tasks @@ -228,6 +189,57 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth Ok((index_scheduler, auth_controller)) } +/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything. +fn open_or_create_database_unchecked( + opt: &Opt, + on_failure: OnFailure, +) -> anyhow::Result<(IndexScheduler, AuthController)> { + // we don't want to create anything in the data.ms yet, thus we + // wrap our two builders in a closure that'll be executed later. + let auth_controller = AuthController::new(&opt.db_path, &opt.master_key); + let index_scheduler_builder = || -> anyhow::Result<_> { + Ok(IndexScheduler::new(IndexSchedulerOptions { + version_file_path: opt.db_path.join(VERSION_FILE_NAME), + auth_path: opt.db_path.join("auth"), + tasks_path: opt.db_path.join("tasks"), + update_file_path: opt.db_path.join("update_files"), + indexes_path: opt.db_path.join("indexes"), + snapshots_path: opt.snapshot_dir.clone(), + dumps_path: opt.dump_dir.clone(), + task_db_size: opt.max_task_db_size.get_bytes() as usize, + index_size: opt.max_index_size.get_bytes() as usize, + indexer_config: (&opt.indexer_options).try_into()?, + autobatching_enabled: !opt.scheduler_options.disable_auto_batching, + })?) + }; + + match ( + index_scheduler_builder(), + auth_controller.map_err(anyhow::Error::from), + create_version_file(&opt.db_path).map_err(anyhow::Error::from), + ) { + (Ok(i), Ok(a), Ok(())) => Ok((i, a)), + (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { + if matches!(on_failure, OnFailure::RemoveDb) { + std::fs::remove_dir_all(&opt.db_path)?; + } + Err(e) + } + } +} + +/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you. +fn open_or_create_database( + opt: &Opt, + empty_db: bool, +) -> anyhow::Result<(IndexScheduler, AuthController)> { + if !empty_db { + check_version_file(&opt.db_path)?; + } + + open_or_create_database_unchecked(opt, OnFailure::KeepDb) +} + fn import_dump( db_path: &Path, dump_path: &Path,