2018-11-22 22:44:51 +08:00
|
|
|
use std::fs;
|
2018-11-20 18:37:19 +08:00
|
|
|
use std::path::Path;
|
|
|
|
use std::error::Error;
|
|
|
|
use std::path::PathBuf;
|
|
|
|
|
|
|
|
use elapsed::measure_time;
|
|
|
|
use moby_name_gen::random_name;
|
|
|
|
use structopt::StructOpt;
|
|
|
|
|
2018-11-22 22:44:51 +08:00
|
|
|
use pentium::index::schema::{Schema, SchemaBuilder, STORED, INDEXED};
|
|
|
|
use pentium::index::update::{Update, PositiveUpdateBuilder};
|
|
|
|
use pentium::tokenizer::DefaultBuilder;
|
2018-11-20 18:37:19 +08:00
|
|
|
use pentium::index::Index;
|
|
|
|
|
|
|
|
#[derive(Debug, StructOpt)]
|
|
|
|
pub struct Cmd {
|
|
|
|
/// csv file to index
|
|
|
|
#[structopt(parse(from_os_str))]
|
|
|
|
pub csv_file: PathBuf,
|
|
|
|
}
|
|
|
|
|
2018-11-22 22:44:51 +08:00
|
|
|
fn generate_update_from_csv(path: &Path) -> Result<(Schema, Update), Box<Error>> {
|
|
|
|
let mut csv = csv::Reader::from_path(path)?;
|
|
|
|
|
|
|
|
let mut attributes = Vec::new();
|
|
|
|
let (schema, id_attr_index) = {
|
|
|
|
let mut id_attr_index = None;
|
|
|
|
let mut builder = SchemaBuilder::new();
|
|
|
|
|
|
|
|
for (i, header_name) in csv.headers()?.iter().enumerate() {
|
|
|
|
// FIXME this does not disallow multiple "id" fields
|
|
|
|
if header_name == "id" { id_attr_index = Some(i) };
|
|
|
|
|
|
|
|
let field = builder.new_attribute(header_name, STORED | INDEXED);
|
|
|
|
attributes.push(field);
|
|
|
|
}
|
|
|
|
|
|
|
|
let id = match id_attr_index {
|
|
|
|
Some(index) => index,
|
|
|
|
None => return Err(String::from("No \"id\" field found which is mandatory").into()),
|
|
|
|
};
|
|
|
|
|
|
|
|
(builder.build(), id)
|
|
|
|
};
|
|
|
|
|
|
|
|
let update_path = PathBuf::from("./positive-update-xxx.sst");
|
|
|
|
let tokenizer_builder = DefaultBuilder::new();
|
|
|
|
let mut builder = PositiveUpdateBuilder::new(&update_path, schema.clone(), tokenizer_builder);
|
|
|
|
|
|
|
|
for record in csv.records() {
|
|
|
|
let record = match record {
|
|
|
|
Ok(x) => x,
|
|
|
|
Err(e) => { eprintln!("{:?}", e); continue }
|
|
|
|
};
|
|
|
|
|
|
|
|
let id = record.into_iter().nth(id_attr_index).unwrap().parse()?;
|
|
|
|
for (value, attr) in record.into_iter().zip(&attributes) {
|
|
|
|
builder.update_field(id, *attr, value.to_string());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
builder.build().map(|update| (schema, update))
|
2018-11-20 18:37:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
fn main() -> Result<(), Box<Error>> {
|
|
|
|
let command = Cmd::from_args();
|
|
|
|
|
2018-12-02 19:43:48 +08:00
|
|
|
let path = random_name() + ".rdb";
|
2018-11-20 18:37:19 +08:00
|
|
|
|
|
|
|
println!("generating the update...");
|
2018-11-22 22:44:51 +08:00
|
|
|
let (schema, update) = generate_update_from_csv(&command.csv_file)?;
|
2018-11-20 18:37:19 +08:00
|
|
|
|
|
|
|
println!("creating the index");
|
2018-11-22 22:44:51 +08:00
|
|
|
let index = Index::create(&path, schema)?;
|
2018-11-20 18:37:19 +08:00
|
|
|
|
|
|
|
println!("ingesting the changes in the index");
|
|
|
|
index.ingest_update(update)?;
|
|
|
|
|
2018-11-22 22:44:51 +08:00
|
|
|
// FIXME this is really ugly !!!!
|
|
|
|
// the index does not support moving update files
|
|
|
|
// so we must remove it by hand
|
|
|
|
fs::remove_file("./positive-update-xxx.sst")?;
|
|
|
|
|
2018-11-20 18:37:19 +08:00
|
|
|
println!("the index {:?} has been created!", path);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|