From 20b5a6a06e4b897313e83e24fe1e1e47c660bfe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sat, 29 Dec 2018 12:26:33 +0100
Subject: [PATCH] doc: Add examples for runtime defined data and Schema

---
 Cargo.toml                   |  2 +-
 examples/create-database.rs  | 40 +++++++++----------
 examples/query-database.rs   | 76 ++++++++++++++++++++++++------------
 examples/schema-example.toml | 19 +++++++++
 src/lib.rs                   |  4 ++
 5 files changed, 94 insertions(+), 47 deletions(-)
 create mode 100644 examples/schema-example.toml
diff --git a/Cargo.toml b/Cargo.toml
index ae85ae7c5..e15fbb6cf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,7 @@ bincode = "1.0"
 byteorder = "1.2"
 crossbeam = "0.6"
 fst = "0.3"
-hashbrown = "0.1"
+hashbrown = { version = "0.1", features = ["serde"] }
 lazy_static = "1.1"
 levenshtein_automata = { version = "0.1", features = ["fst_automaton"] }
 linked-hash-map = { version = "0.5", features = ["serde_impl"] }
diff --git a/examples/create-database.rs b/examples/create-database.rs
index 07ffeb931..9a2784586 100644
--- a/examples/create-database.rs
+++ b/examples/create-database.rs
@@ -1,41 +1,35 @@
 use std::path::{Path, PathBuf};
 use std::error::Error;
+use std::borrow::Cow;
+use std::fs::File;
 
+use hashbrown::HashMap;
 use serde_derive::{Serialize, Deserialize};
 use structopt::StructOpt;
 
-use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED};
-use meilidb::database::UpdateBuilder;
+use meilidb::database::{Database, Schema, UpdateBuilder};
 use meilidb::tokenizer::DefaultBuilder;
-use meilidb::database::Database;
 
 #[derive(Debug, StructOpt)]
 pub struct Opt {
-    /// The destination where the database must be created
+    /// The destination where the database must be created.
     #[structopt(parse(from_os_str))]
     pub database_path: PathBuf,
 
     /// The csv file to index.
     #[structopt(parse(from_os_str))]
     pub csv_data_path: PathBuf,
+
+    /// The path to the schema.
+    #[structopt(long = "schema", parse(from_os_str))]
+    pub schema_path: PathBuf,
 }
 
-#[derive(Debug, Serialize, Deserialize)]
-struct Document<'a> {
-    id: &'a str,
-    title: &'a str,
-    description: &'a str,
-    image: &'a str,
-}
-
-fn create_schema() -> Schema {
-    let mut schema = SchemaBuilder::with_identifier("id");
-    schema.new_attribute("id", STORED);
-    schema.new_attribute("title", STORED | INDEXED);
-    schema.new_attribute("description", STORED | INDEXED);
-    schema.new_attribute("image", STORED);
-    schema.build()
-}
+#[derive(Serialize, Deserialize)]
+struct Document<'a> (
+    #[serde(borrow)]
+    HashMap<Cow<'a, str>, Cow<'a, str>>
+);
 
 fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<Database, Box<Error>> {
     let database = Database::create(database_path, schema.clone())?;
@@ -71,7 +65,10 @@ fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<D
 fn main() -> Result<(), Box<Error>> {
     let opt = Opt::from_args();
 
-    let schema = create_schema();
+    let schema = {
+        let file = File::open(&opt.schema_path)?;
+        Schema::from_toml(file)?
+    };
 
     let (elapsed, result) = elapsed::measure_time(|| {
         index(schema, &opt.database_path, &opt.csv_data_path)
@@ -82,6 +79,5 @@ fn main() -> Result<(), Box<Error>> {
     }
 
     println!("database created in {} at: {:?}", elapsed, opt.database_path);
-
     Ok(())
 }
diff --git a/examples/query-database.rs b/examples/query-database.rs
index e6fb6ee93..4571d242e 100644
--- a/examples/query-database.rs
+++ b/examples/query-database.rs
@@ -1,11 +1,14 @@
+use std::collections::btree_map::{BTreeMap, Entry};
+use std::iter::FromIterator;
 use std::io::{self, Write};
 use std::path::PathBuf;
 use std::error::Error;
 
+use hashbrown::{HashMap, HashSet};
 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
-use serde_derive::{Serialize, Deserialize};
 use structopt::StructOpt;
 
+use meilidb::database::schema::SchemaAttr;
 use meilidb::database::Database;
 use meilidb::Match;
 
@@ -15,18 +18,15 @@ pub struct Opt {
     #[structopt(parse(from_os_str))]
     pub database_path: PathBuf,
 
+    /// Fields that must be displayed.
+    pub displayed_fields: Vec<String>,
+
     /// The number of returned results
     #[structopt(short = "n", long = "number-results", default_value = "10")]
     pub number_results: usize,
 }
 
-#[derive(Debug, Serialize, Deserialize)]
-struct Document {
-    id: String,
-    title: String,
-    description: String,
-    image: String,
-}
+type Document = HashMap<String, String>;
 
 fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
     let mut stdout = StandardStream::stdout(ColorChoice::Always);
@@ -45,20 +45,30 @@ fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
     Ok(())
 }
 
-fn create_highlight_areas(text: &str, matches: &[Match], attribute: u16) -> Vec<usize> {
-    let mut title_areas = Vec::new();
+fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr) -> Vec<usize> {
+    let mut byte_indexes = BTreeMap::new();
 
-    title_areas.push(0);
     for match_ in matches {
-        if match_.attribute.attribute() == attribute {
+        let match_attribute = match_.attribute.attribute();
+        if SchemaAttr::new(match_attribute) == attribute {
             let word_area = match_.word_area;
             let byte_index = word_area.byte_index() as usize;
             let length = word_area.length() as usize;
-            title_areas.push(byte_index);
-            title_areas.push(byte_index + length);
+            match byte_indexes.entry(byte_index) {
+                Entry::Vacant(entry) => { entry.insert(length); },
+                Entry::Occupied(mut entry) => if *entry.get() < length { entry.insert(length); },
+            }
         }
     }
+
+    let mut title_areas = Vec::new();
+    title_areas.push(0);
+    for (byte_index, length) in byte_indexes {
+        title_areas.push(byte_index);
+        title_areas.push(byte_index + length);
+    }
     title_areas.push(text.len());
+    title_areas.sort_unstable();
     title_areas
 }
 
@@ -80,6 +90,7 @@ fn main() -> Result<(), Box<Error>> {
         let query = buffer.trim_end_matches('\n');
 
         let view = database.view();
+        let schema = view.schema();
 
         let (elapsed, documents) = elapsed::measure_time(|| {
             let builder = view.query_builder().unwrap();
@@ -90,22 +101,39 @@ fn main() -> Result<(), Box<Error>> {
         for doc in documents {
             match view.document_by_id::<Document>(doc.id) {
                 Ok(document) => {
+                    for name in &opt.displayed_fields {
+                        let attr = match schema.attribute(name) {
+                            Some(attr) => attr,
+                            None => continue,
+                        };
+                        let text = match document.get(name) {
+                            Some(text) => text,
+                            None => continue,
+                        };
 
-                    print!("title: ");
-                    let title_areas = create_highlight_areas(&document.title, &doc.matches, 1);
-                    display_highlights(&document.title, &title_areas)?;
-                    println!();
-
-                    print!("description: ");
-                    let description_areas = create_highlight_areas(&document.description, &doc.matches, 2);
-                    display_highlights(&document.description, &description_areas)?;
-                    println!();
+                        print!("{}: ", name);
+                        let areas = create_highlight_areas(&text, &doc.matches, attr);
+                        display_highlights(&text, &areas)?;
+                        println!();
+                    }
                 },
                 Err(e) => eprintln!("{}", e),
             }
+
+            let mut matching_attributes = HashSet::new();
+            for _match in doc.matches {
+                let attr = SchemaAttr::new(_match.attribute.attribute());
+                let name = schema.attribute_name(attr);
+                matching_attributes.insert(name);
+            }
+
+            let matching_attributes = Vec::from_iter(matching_attributes);
+            println!("matching in: {:?}", matching_attributes);
+
+            println!();
         }
 
-        println!("Found {} results in {}", number_of_documents, elapsed);
+        println!("===== Found {} results in {} =====", number_of_documents, elapsed);
         buffer.clear();
     }
 
diff --git a/examples/schema-example.toml b/examples/schema-example.toml
new file mode 100644
index 000000000..fcf2685e9
--- /dev/null
+++ b/examples/schema-example.toml
@@ -0,0 +1,19 @@
+# This schema has been generated ...
+# The order in which the attributes are declared is important,
+# it specify the attribute xxx...
+
+identifier = "id"
+
+[attributes.id]
+stored = true
+
+[attributes.title]
+stored = true
+indexed = true
+
+[attributes.description]
+stored = true
+indexed = true
+
+[attributes.image]
+stored = true
diff --git a/src/lib.rs b/src/lib.rs
index b43d8d506..ab291afa5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -63,10 +63,12 @@ impl Attribute {
         }
     }
 
+    #[inline]
     pub fn attribute(&self) -> u16 {
         (self.0 >> 22) as u16
     }
 
+    #[inline]
     pub fn word_index(&self) -> u32 {
         self.0 & 0b0000_0000_0011_1111_1111_1111_1111
     }
@@ -129,10 +131,12 @@ impl WordArea {
         }
     }
 
+    #[inline]
     pub fn byte_index(&self) -> u32 {
         self.0 >> 10
     }
 
+    #[inline]
     pub fn length(&self) -> u16 {
         (self.0 & 0b0000_0000_0000_0000_0011_1111_1111) as u16
     }