diff --git a/examples/create-database.rs b/examples/create-database.rs new file mode 100644 index 000000000..a26fe05a9 --- /dev/null +++ b/examples/create-database.rs @@ -0,0 +1,113 @@ +use std::collections::hash_map::DefaultHasher; +use std::path::{Path, PathBuf}; +use std::hash::{Hash, Hasher}; +use std::error::Error; + +use serde_derive::{Serialize, Deserialize}; +use structopt::StructOpt; + +use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED}; +use meilidb::database::update::PositiveUpdateBuilder; +use meilidb::tokenizer::DefaultBuilder; +use meilidb::database::Database; + +#[derive(Debug, StructOpt)] +pub struct Opt { + /// The destination where the database must be created + #[structopt(parse(from_os_str))] + pub database_path: PathBuf, + + /// The csv file to index. + #[structopt(parse(from_os_str))] + pub csv_data_path: PathBuf, +} + +#[derive(Debug, Serialize, Deserialize)] +#[allow(non_snake_case)] +struct Document<'a> { + skuId: &'a str, + fr_FR_commercialName: &'a str, + en_GB_commercialName: &'a str, + maketingColorInternalName: &'a str, + materialInternalName: &'a str, + fr_FR_description: &'a str, + fr_FR_detailedDescription: &'a str, + fr_FR_Price: &'a str, + fr_FR_images_url: &'a str, + en_GB_description: &'a str, + en_GB_detailedDescription: &'a str, + en_GB_Price: &'a str, +} + +fn calculate_hash(t: &T) -> u64 { + let mut s = DefaultHasher::new(); + t.hash(&mut s); + s.finish() +} + +fn create_schema() -> Schema { + let mut schema = SchemaBuilder::new(); + schema.new_attribute("skuId", STORED | INDEXED); + schema.new_attribute("fr_FR_commercialName", STORED | INDEXED); + schema.new_attribute("en_GB_commercialName", STORED | INDEXED); + schema.new_attribute("maketingColorInternalName", STORED | INDEXED); + schema.new_attribute("materialInternalName", STORED | INDEXED); + schema.new_attribute("fr_FR_description", STORED | INDEXED); + schema.new_attribute("fr_FR_detailedDescription", STORED); + schema.new_attribute("fr_FR_Price", STORED); + schema.new_attribute("fr_FR_images_url", STORED); + schema.new_attribute("en_GB_description", STORED | INDEXED); + schema.new_attribute("en_GB_detailedDescription", STORED); + schema.new_attribute("en_GB_Price", STORED); + schema.build() +} + +fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result> { + let database = Database::create(database_path, schema.clone())?; + + println!("start indexing..."); + + let tokenizer_builder = DefaultBuilder::new(); + let update_path = tempfile::NamedTempFile::new()?; + let mut update = PositiveUpdateBuilder::new(update_path.path(), schema, tokenizer_builder); + + let mut rdr = csv::Reader::from_path(csv_data_path)?; + let mut raw_record = csv::StringRecord::new(); + let headers = rdr.headers()?.clone(); + + while rdr.read_record(&mut raw_record)? { + let document: Document = match raw_record.deserialize(Some(&headers)) { + Ok(document) => document, + Err(e) => { + eprintln!("{:?}", e); + continue; + } + }; + + let document_id = calculate_hash(&document.skuId); + update.update(document_id, &document).unwrap(); + } + + let mut update = update.build()?; + + update.set_move(true); + database.ingest_update_file(update)?; + + Ok(database) +} + +fn main() -> Result<(), Box> { + let opt = Opt::from_args(); + + let schema = create_schema(); + + let (elapsed, result) = elapsed::measure_time(|| { + index(schema, &opt.database_path, &opt.csv_data_path) + }); + + let _ = result?; + + println!("database created in {} at: {:?}", elapsed, opt.database_path); + + Ok(()) +} diff --git a/examples/query-database.rs b/examples/query-database.rs new file mode 100644 index 000000000..09f0f883f --- /dev/null +++ b/examples/query-database.rs @@ -0,0 +1,76 @@ +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::io::{self, Write}; +use std::path::PathBuf; +use std::error::Error; + +use serde_derive::{Serialize, Deserialize}; +use structopt::StructOpt; + +use meilidb::database::Database; + +#[derive(Debug, StructOpt)] +pub struct Opt { + /// The destination where the database must be created + #[structopt(parse(from_os_str))] + pub database_path: PathBuf, +} + +#[derive(Debug, Serialize, Deserialize)] +#[allow(non_snake_case)] +struct Document { + skuId: String, + fr_FR_commercialName: String, + en_GB_commercialName: String, + maketingColorInternalName: String, + materialInternalName: String, + fr_FR_description: String, + en_GB_description: String, +} + +fn calculate_hash(t: &T) -> u64 { + let mut s = DefaultHasher::new(); + t.hash(&mut s); + s.finish() +} + +fn main() -> Result<(), Box> { + let opt = Opt::from_args(); + + let (elapsed, result) = elapsed::measure_time(|| Database::open(&opt.database_path)); + let database = result?; + println!("database opened in {}", elapsed); + + let mut buffer = String::new(); + let input = io::stdin(); + + loop { + print!("Search: "); + io::stdout().flush()?; + + if input.read_line(&mut buffer)? == 0 { break } + + let view = database.view(); + + let (elapsed, documents) = elapsed::measure_time(|| { + let builder = view.query_builder().unwrap(); + builder.query(&buffer, 10) + }); + + let mut full_documents = Vec::with_capacity(documents.len()); + + for document in documents { + match view.retrieve_document::(document.id) { + Ok(document) => full_documents.push(document), + Err(e) => eprintln!("{}", e), + } + } + + println!("{:#?}", full_documents); + println!("{}", elapsed); + + buffer.clear(); + } + + Ok(()) +}