From 84f49d76cd2869cc4af08d835d6182bfc2c9e042 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Sun, 7 Jan 2024 21:27:29 +0100 Subject: [PATCH 1/7] Add cuda feature --- Cargo.lock | 21 +++++++++++++++++++++ milli/Cargo.toml | 3 +++ milli/src/vector/hf.rs | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index bf3851db5..f1fc93b1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -706,6 +706,8 @@ version = "0.3.3" source = "git+https://github.com/huggingface/candle.git#5270224f407502b82fe90bc2622894ce3871b002" dependencies = [ "byteorder", + "candle-kernels", + "cudarc", "gemm", "half 2.3.1", "memmap2 0.9.3", @@ -720,6 +722,16 @@ dependencies = [ "zip", ] +[[package]] +name = "candle-kernels" +version = "0.3.1" +source = "git+https://github.com/huggingface/candle.git#f4fcf6090045ac44122fd5f0a7e46db6e3e16528" +dependencies = [ + "anyhow", + "glob", + "rayon", +] + [[package]] name = "candle-nn" version = "0.3.3" @@ -1163,6 +1175,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "cudarc" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1871a911a2b9a3f66a285896a719159985683bf9903aa2cf89e0c9f53e14552" +dependencies = [ + "half 2.3.1", +] + [[package]] name = "darling" version = "0.14.4" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index ec27b5f39..047a30e35 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -137,3 +137,6 @@ greek = ["charabia/greek"] # allow khmer specialized tokenization khmer = ["charabia/khmer"] + +# allow CUDA support +cuda = ["candle-core/cuda"] diff --git a/milli/src/vector/hf.rs b/milli/src/vector/hf.rs index 0a6bcbe93..3a3949e77 100644 --- a/milli/src/vector/hf.rs +++ b/milli/src/vector/hf.rs @@ -70,7 +70,7 @@ impl std::fmt::Debug for Embedder { impl Embedder { pub fn new(options: EmbedderOptions) -> std::result::Result { - let device = candle_core::Device::Cpu; + let device = candle_core::Device::cuda_if_available(0).unwrap(); let repo = match options.revision.clone() { Some(revision) => Repo::with_revision(options.model.clone(), RepoType::Model, revision), None => Repo::model(options.model.clone()), From 4b4e8ea2a4820a2d1b4eaf1fafb95df4114b2c34 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 9 Jan 2024 13:24:33 +0100 Subject: [PATCH 2/7] Add binary to list features --- Cargo.toml | 1 + xtask/Cargo.toml | 15 +++++++++++++++ xtask/src/main.rs | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 xtask/Cargo.toml create mode 100644 xtask/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index d7b3cca82..bb8d7d787 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "json-depth-checker", "benchmarks", "fuzzers", + "xtask", ] [workspace.package] diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml new file mode 100644 index 000000000..af9ecc7b5 --- /dev/null +++ b/xtask/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "xtask" +version.workspace = true +authors.workspace = true +description.workspace = true +homepage.workspace = true +readme.workspace = true +edition.workspace = true +license.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +cargo_metadata = "0.18.1" +clap = { version = "4.4.14", features = ["derive"] } diff --git a/xtask/src/main.rs b/xtask/src/main.rs new file mode 100644 index 000000000..6570dc67b --- /dev/null +++ b/xtask/src/main.rs @@ -0,0 +1,41 @@ +use std::collections::HashSet; + +use clap::Parser; + +/// List features available in the workspace +#[derive(Parser, Debug)] +struct ListFeaturesDeriveArgs { + /// Feature to exclude from the list. Repeat the argument to exclude multiple features + #[arg(short, long)] + exclude_feature: Vec, +} + +/// Utilitary commands +#[derive(Parser, Debug)] +#[command(author, version, about, long_about)] +#[command(name = "cargo xtask")] +#[command(bin_name = "cargo xtask")] +enum Command { + ListFeatures(ListFeaturesDeriveArgs), +} + +fn main() { + let args = Command::parse(); + match args { + Command::ListFeatures(args) => list_features(args), + } +} + +fn list_features(args: ListFeaturesDeriveArgs) { + let exclude_features: HashSet<_> = args.exclude_feature.into_iter().collect(); + let metadata = cargo_metadata::MetadataCommand::new().no_deps().exec().unwrap(); + let features: Vec = metadata + .packages + .iter() + .flat_map(|package| package.features.keys()) + .filter(|feature| !exclude_features.contains(feature.as_str())) + .map(|s| s.to_owned()) + .collect(); + let features = features.join(" "); + println!("{features}") +} From 4aa4a15dc9bf3896d7280889f305e8331f2e1db8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 9 Jan 2024 13:25:47 +0100 Subject: [PATCH 3/7] Add to Cargo.lock --- Cargo.lock | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index f1fc93b1d..f3938264d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -700,6 +700,15 @@ dependencies = [ "displaydoc", ] +[[package]] +name = "camino" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +dependencies = [ + "serde", +] + [[package]] name = "candle-core" version = "0.3.3" @@ -764,6 +773,29 @@ dependencies = [ "wav", ] +[[package]] +name = "cargo-platform" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ceed8ef69d8518a5dda55c07425450b58a4e1946f4951eab6d7191ee86c2443d" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror", +] + [[package]] name = "cargo_toml" version = "0.18.0" @@ -4858,6 +4890,9 @@ name = "semver" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +dependencies = [ + "serde", +] [[package]] name = "seq-macro" @@ -6205,6 +6240,14 @@ dependencies = [ "libc", ] +[[package]] +name = "xtask" +version = "1.6.0" +dependencies = [ + "cargo_metadata", + "clap", +] + [[package]] name = "yada" version = "0.5.0" From 66ae81a909bc22afd4bdbb089696aba896b14e6b Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 9 Jan 2024 13:25:06 +0100 Subject: [PATCH 4/7] Make it so binary can be used with `cargo xtask` --- .cargo/config.toml | 2 ++ CONTRIBUTING.md | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 .cargo/config.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..35049cbcb --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +xtask = "run --package xtask --" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7b0cb24e9..073da7031 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -75,6 +75,12 @@ If you get a "Too many open files" error you might want to increase the open fil ulimit -Sn 3000 ``` +#### Build tools + +Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools. + +Run `cargo xtask --help` from the root of the repository to find out what is available. + ## Git Guidelines ### Git Branches From 1b90778bf52bfdeecd0c3cc0eae0841d316bc8cb Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 9 Jan 2024 13:25:31 +0100 Subject: [PATCH 5/7] Change CI --- .github/workflows/test-suite.yml | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 68e5a4678..18e9fc48a 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -82,7 +82,7 @@ jobs: args: --locked --release --all test-all-features: - name: Tests all features + name: Tests almost all features runs-on: ubuntu-latest container: # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations @@ -98,16 +98,12 @@ jobs: with: toolchain: stable override: true - - name: Run cargo build with all features - uses: actions-rs/cargo@v1 - with: - command: build - args: --workspace --locked --release --all-features - - name: Run cargo test with all features - uses: actions-rs/cargo@v1 - with: - command: test - args: --workspace --locked --release --all-features + - name: Run cargo build with almost all features + run: | + cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" + - name: Run cargo test with almost all features + run: | + cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" test-disabled-tokenization: name: Test disabled tokenization From f692021bfcbd98eec2b8833d164d287e83a2322f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jan 2024 10:13:27 +0100 Subject: [PATCH 6/7] Implement PR comments --- milli/Cargo.toml | 2 +- milli/src/vector/hf.rs | 8 +++++++- xtask/Cargo.toml | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 047a30e35..dc2b992e0 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -138,5 +138,5 @@ greek = ["charabia/greek"] # allow khmer specialized tokenization khmer = ["charabia/khmer"] -# allow CUDA support +# allow CUDA support, see cuda = ["candle-core/cuda"] diff --git a/milli/src/vector/hf.rs b/milli/src/vector/hf.rs index 3a3949e77..7acb09aa8 100644 --- a/milli/src/vector/hf.rs +++ b/milli/src/vector/hf.rs @@ -70,7 +70,13 @@ impl std::fmt::Debug for Embedder { impl Embedder { pub fn new(options: EmbedderOptions) -> std::result::Result { - let device = candle_core::Device::cuda_if_available(0).unwrap(); + let device = match candle_core::Device::cuda_if_available(0) { + Ok(device) => device, + Err(error) => { + log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); + candle_core::Device::Cpu + } + }; let repo = match options.revision.clone() { Some(revision) => Repo::with_revision(options.model.clone(), RepoType::Model, revision), None => Repo::model(options.model.clone()), diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index af9ecc7b5..07271ea09 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -2,7 +2,7 @@ name = "xtask" version.workspace = true authors.workspace = true -description.workspace = true +description = "Workspace automation tool following the xtask pattern " homepage.workspace = true readme.workspace = true edition.workspace = true From d35fe43fd532fbddd730fbbfd36cf4c6ec12eeb5 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jan 2024 10:49:17 +0100 Subject: [PATCH 7/7] Update lock file --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f3938264d..512f69c53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1209,9 +1209,9 @@ dependencies = [ [[package]] name = "cudarc" -version = "0.9.15" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1871a911a2b9a3f66a285896a719159985683bf9903aa2cf89e0c9f53e14552" +checksum = "9395df0cab995685664e79cc35ad6302bf08fb9c5d82301875a183affe1278b1" dependencies = [ "half 2.3.1", ]