diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index e6fca8c66..e2744a962 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -16,7 +16,7 @@ ordered-float = { version = "1.0.2", features = ["serde"] } sdset = "0.3.1" serde = { version = "1.0.90", features = ["derive"] } serde_json = { version = "1.0.39", features = ["preserve_order"] } -sled = "0.22.1" +sled = "0.23.0" toml = { version = "0.5.0", features = ["preserve_order"] } [dependencies.rmp-serde] diff --git a/meilidb-data/src/database.rs b/meilidb-data/src/database.rs index 0e71e0f5b..357693ad8 100644 --- a/meilidb-data/src/database.rs +++ b/meilidb-data/src/database.rs @@ -3,6 +3,7 @@ use std::io::{self, Cursor, BufRead}; use std::iter::FromIterator; use std::path::Path; use std::sync::Arc; +use std::{error, fmt}; use arc_swap::{ArcSwap, Lease}; use byteorder::{ReadBytesExt, BigEndian}; @@ -50,6 +51,23 @@ impl From for Error { } } +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::Error::*; + match self { + SchemaDiffer => write!(f, "schemas differ"), + SchemaMissing => write!(f, "this index does not have a schema"), + WordIndexMissing => write!(f, "this index does not have a word index"), + MissingDocumentId => write!(f, "document id is missing"), + SledError(e) => write!(f, "sled error; {}", e), + BincodeError(e) => write!(f, "bincode error; {}", e), + SerializerError(e) => write!(f, "serializer error; {}", e), + } + } +} + +impl error::Error for Error { } + fn index_name(name: &str) -> Vec { format!("index-{}", name).into_bytes() } @@ -96,13 +114,6 @@ fn extract_document_key(key: Vec) -> io::Result<(DocumentId, SchemaAttr)> { Ok((document_id, schema_attr)) } -fn ivec_into_arc(ivec: IVec) -> Arc<[u8]> { - match ivec { - IVec::Inline(len, bytes) => Arc::from(&bytes[..len as usize]), - IVec::Remote { buf } => buf, - } -} - #[derive(Clone)] pub struct Database { opened: Arc>>, @@ -185,7 +196,7 @@ impl RawIndex { let bytes = bytes.ok_or(Error::WordIndexMissing)?; let word_index = { let len = bytes.len(); - let bytes = ivec_into_arc(bytes); + let bytes: Arc<[u8]> = Into::into(bytes); let mut cursor = SharedDataCursor::from_shared_bytes(bytes, 0, len); // TODO must handle this error @@ -399,7 +410,6 @@ impl DocumentsAddition { Ok(()) } - pub fn finalize(self) -> sled::Result<()> { let delta_index = self.indexer.build(); diff --git a/meilidb-data/src/serde/indexer.rs b/meilidb-data/src/serde/indexer.rs index c6e0d0c75..8eb0b2c67 100644 --- a/meilidb-data/src/serde/indexer.rs +++ b/meilidb-data/src/serde/indexer.rs @@ -237,7 +237,7 @@ impl<'a> ser::SerializeSeq for SeqIndexer<'a> { Ok(()) } - fn end(mut self) -> Result { + fn end(self) -> Result { let texts = self.texts.iter().map(String::as_str); self.indexer.index_text_seq(self.document_id, self.attribute, texts); Ok(()) diff --git a/meilidb/Cargo.toml b/meilidb/Cargo.toml index e8cdb8d56..c2f4ad0fc 100644 --- a/meilidb/Cargo.toml +++ b/meilidb/Cargo.toml @@ -15,12 +15,13 @@ i128 = ["meilidb-core/i128"] nightly = ["meilidb-core/nightly"] [dev-dependencies] -csv = "1.0.5" -env_logger = "0.6.0" +csv = "1.0.7" +env_logger = "0.6.1" jemallocator = "0.1.9" quickcheck = "0.8.2" rand = "0.6.5" rand_xorshift = "0.1.1" -structopt = "0.2.14" +serde = { version = "1.0.90", features = ["derive"] } +structopt = "0.2.15" tempfile = "3.0.7" termcolor = "1.0.4" diff --git a/meilidb/examples/create-database.rs b/meilidb/examples/create-database.rs index e5d9c403a..b0bfa1127 100644 --- a/meilidb/examples/create-database.rs +++ b/meilidb/examples/create-database.rs @@ -9,10 +9,10 @@ use std::error::Error; use std::borrow::Cow; use std::fs::File; -use serde_derive::{Serialize, Deserialize}; +use serde::{Serialize, Deserialize}; use structopt::StructOpt; -use meilidb::database::{Database, Schema}; +use meilidb_data::{Database, Schema}; #[derive(Debug, StructOpt)] pub struct Opt { @@ -50,9 +50,9 @@ fn index( stop_words: &HashSet, ) -> Result> { - let database = Database::create(database_path)?; + let database = Database::start_default(database_path)?; - database.create_index("default", &schema)?; + let index = database.create_index("default".to_string(), schema.clone())?; let mut rdr = csv::Reader::from_path(csv_data_path)?; let mut raw_record = csv::StringRecord::new(); @@ -62,7 +62,7 @@ fn index( let mut end_of_file = false; while !end_of_file { - let mut update = database.start_update("default")?; + let mut update = index.documents_addition(); loop { end_of_file = !rdr.read_record(&mut raw_record)?; @@ -76,7 +76,7 @@ fn index( } }; - update.update_document(&document, &stop_words)?; + update.update_document(&document)?; print!("\rindexing document {}", i); i += 1; @@ -89,7 +89,7 @@ fn index( println!(); println!("committing update..."); - database.commit_update(update)?; + update.finalize()?; } Ok(database) diff --git a/meilidb/examples/query-database.rs b/meilidb/examples/query-database.rs index 2689ffe0f..f9a3bf8e5 100644 --- a/meilidb/examples/query-database.rs +++ b/meilidb/examples/query-database.rs @@ -2,19 +2,19 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; use std::collections::btree_map::{BTreeMap, Entry}; +use std::collections::{HashMap, HashSet}; use std::iter::FromIterator; use std::io::{self, Write}; use std::time::Instant; use std::path::PathBuf; use std::error::Error; -use hashbrown::{HashMap, HashSet}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use structopt::StructOpt; use meilidb_core::Match; -use meilidb::database::schema::SchemaAttr; -use meilidb::database::Database; +use meilidb_data::schema::SchemaAttr; +use meilidb_data::Database; #[derive(Debug, StructOpt)] pub struct Opt { @@ -138,12 +138,19 @@ fn main() -> Result<(), Box> { let opt = Opt::from_args(); let start = Instant::now(); - let database = Database::open(&opt.database_path)?; - println!("database prepared for you in {:.2?}", start.elapsed()); + let database = Database::start_default(&opt.database_path)?; let mut buffer = String::new(); let input = io::stdin(); + let index = database.open_index("default")?.unwrap(); + let schema = index.schema(); + + println!("database prepared for you in {:.2?}", start.elapsed()); + + let fields = opt.displayed_fields.iter().map(String::as_str); + let fields = HashSet::from_iter(fields); + loop { print!("Searching for: "); io::stdout().flush()?; @@ -151,12 +158,9 @@ fn main() -> Result<(), Box> { if input.read_line(&mut buffer)? == 0 { break } let query = buffer.trim_end_matches('\n'); - let view = database.view("default")?; - let schema = view.schema(); - let start = Instant::now(); - let builder = view.query_builder(); + let builder = index.query_builder(); let documents = builder.query(query, 0..opt.number_results); let number_of_documents = documents.len(); @@ -164,19 +168,12 @@ fn main() -> Result<(), Box> { doc.matches.sort_unstable_by_key(|m| (m.char_index, m.char_index)); - match view.document_by_id::(doc.id) { - Ok(document) => { - for name in &opt.displayed_fields { - let attr = match schema.attribute(name) { - Some(attr) => attr, - None => continue, - }; - let text = match document.get(name) { - Some(text) => text, - None => continue, - }; - + match index.document::(Some(&fields), doc.id) { + Ok(Some(document)) => { + for (name, text) in document { print!("{}: ", name); + + let attr = schema.attribute(&name).unwrap(); let matches = doc.matches.iter() .filter(|m| SchemaAttr::new(m.attribute) == attr) .cloned(); @@ -186,6 +183,7 @@ fn main() -> Result<(), Box> { println!(); } }, + Ok(None) => eprintln!("missing document"), Err(e) => eprintln!("{}", e), }