diff --git a/Cargo.toml b/Cargo.toml index b532389f1..79e5e3a14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] edition = "2018" -name = "raptor" +name = "pentium" version = "0.1.0" authors = ["Kerollmops "] diff --git a/README.md b/README.md index a7aec19f3..361d183af 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,31 @@ -# raptor-rs -Raptor, the new RISE +# pentium + +A search engine based on the [blog posts serie](https://blog.algolia.com/inside-the-algolia-engine-part-1-indexing-vs-search/) of the great Algolia company. + +This is a library, this means that binary are not part of this repository +but since I'm still nice I have made some examples for you in the `examples/` folder. ## Usage -First you need to generate the index files. +Pentium work with an index like most of the search engines. +So to test the library you can create one by indexing a simple csv file. ```bash -$ cargo build --release -$ time ./target/release/raptor-cli index csv --stop-words stop-words.txt the-csv-file.csv +cargo build --release --example csv-indexer +time ./target/release/examples/csv-indexer --stop-words misc/en.stopwords.txt misc/kaggle.csv ``` -The `stop-words.txt` file here is a simple file that contains one stop word by line. +The `en.stopwords.txt` file here is a simple file that contains one stop word by line (e.g. or, and...). Once the command finished indexing you will have 3 files that compose the index: - The `xxx.map` represent the fst map. - The `xxx.idx` represent the doc indexes matching the words in the map. - The `xxx.sst` is a file that contains all the fields and the values asociated with it, it is passed to the internal RocksDB. -Now you can easily use `raptor server console` or `raptor serve http` with the name of the dump. (e.g. relaxed-colden). +Now you can easily run the `serve-console` or `serve-http` examples with the name of the dump. (e.g. relaxed-colden). ```bash -$ cargo build --release --default-features --features serve-console -$ ./target/release/raptor-cli serve console --stop-words stop-words.txt relaxed-colden +cargo build --release --example serve-console +./target/release/examples/serve-console relaxed-colden ``` -Note: If you have performance issues run the searcher in release mode (i.e. `--release`). diff --git a/examples/csv-indexer.rs b/examples/csv-indexer.rs index 940a3b69e..c9dc83c8a 100644 --- a/examples/csv-indexer.rs +++ b/examples/csv-indexer.rs @@ -6,7 +6,7 @@ use std::fs::File; use std::io; use csv::ReaderBuilder; -use raptor::{MetadataBuilder, DocIndex, Tokenizer, CommonWords}; +use pentium::{MetadataBuilder, DocIndex, Tokenizer, CommonWords}; use rocksdb::{SstFileWriter, EnvOptions, ColumnFamilyOptions}; use structopt::StructOpt; diff --git a/examples/json-lines-indexer.rs b/examples/json-lines-indexer.rs index 0a25f4e26..27314d2af 100644 --- a/examples/json-lines-indexer.rs +++ b/examples/json-lines-indexer.rs @@ -7,7 +7,7 @@ use std::path::PathBuf; use serde_json::from_str; use rocksdb::{SstFileWriter, EnvOptions, ColumnFamilyOptions}; -use raptor::{MetadataBuilder, DocIndex, Tokenizer, CommonWords}; +use pentium::{MetadataBuilder, DocIndex, Tokenizer, CommonWords}; use structopt::StructOpt; #[derive(Debug, StructOpt)] diff --git a/examples/serve-console.rs b/examples/serve-console.rs index 36e0b3cda..408a4b3b5 100644 --- a/examples/serve-console.rs +++ b/examples/serve-console.rs @@ -5,8 +5,8 @@ use std::path::PathBuf; use elapsed::measure_time; use rocksdb::{DB, DBOptions, IngestExternalFileOptions}; -use raptor::rank::{criterion, Config, RankedStream, Document}; -use raptor::{automaton, DocumentId, Metadata, CommonWords}; +use pentium::rank::{criterion, Config, RankedStream}; +use pentium::{automaton, DocumentId, Metadata}; #[derive(Debug, StructOpt)] pub struct CommandConsole { diff --git a/examples/serve-http.rs b/examples/serve-http.rs index 4b882e0a8..e37d094d1 100644 --- a/examples/serve-http.rs +++ b/examples/serve-http.rs @@ -7,8 +7,8 @@ use std::path::PathBuf; use std::error::Error; use std::sync::Arc; -use raptor::rank::{criterion, Config, RankedStream}; -use raptor::{automaton, Metadata, CommonWords}; +use pentium::rank::{criterion, Config, RankedStream}; +use pentium::{automaton, Metadata}; use rocksdb::{DB, DBOptions, IngestExternalFileOptions}; use warp::Filter; use structopt::StructOpt;