Merge pull request #147 from meilisearch/moving-to-sled

Make the repository a workspace and move to sled
2024-11-26 20:15:07 +08:00 · 2019-04-29 15:21:02 +02:00 · 2019-04-29 15:21:02 +02:00 · d7ce6d016b
commit d7ce6d016b
parent 3056b351fa 9023a12ad4
60 changed files with 2314 additions and 3226 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
-/rocksdb
 /target
 /Cargo.lock
+meilidb/Cargo.lock
+meilidb-core/Cargo.lock
 **/*.rs.bk
 **/*.csv
 **/*.json_lines
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,55 +1,10 @@
-[package]
-edition = "2018"
-name = "meilidb"
-version = "0.3.2"
-authors = ["Kerollmops <renault.cle@gmail.com>"]
-
-[dependencies]
-arc-swap = "0.3.7"
-bincode = "1.1.2"
-byteorder = "1.3.1"
-fst = "0.3.3"
-hashbrown = { version = "0.1.8", features = ["serde"] }
-lazy_static = "1.2.0"
-levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
-linked-hash-map = { version = "0.5.1", features = ["serde_impl"] }
-lockfree = "0.5.1"
-log = "0.4.6"
-rayon = "1.0.3"
-sdset = "0.3.1"
-serde = "1.0.88"
-serde_derive = "1.0.88"
-serde_json = { version = "1.0.38", features = ["preserve_order"] }
-size_format = "1.0.2"
-slice-group-by = "0.2.4"
-unidecode = "0.3.0"
-
-[dependencies.toml]
-git = "https://github.com/Kerollmops/toml-rs.git"
-features = ["preserve_order"]
-rev = "0372ba6"
-
-[dependencies.rocksdb]
-git = "https://github.com/pingcap/rust-rocksdb.git"
-rev = "306e201"
-
-[features]
-default = ["simd"]
-i128 = ["bincode/i128", "byteorder/i128"]
-portable = ["rocksdb/portable"]
-simd = ["rocksdb/sse"]
-nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
-
-[dev-dependencies]
-csv = "1.0.5"
-env_logger = "0.6.0"
-jemallocator = "0.1.9"
-quickcheck = "0.8.2"
-rand = "0.6.5"
-rand_xorshift = "0.1.1"
-structopt = "0.2.14"
-tempfile = "3.0.7"
-termcolor = "1.0.4"
+[workspace]
+members = [
+    "meilidb",
+    "meilidb-core",
+    "meilidb-data",
+    "meilidb-tokenizer",
+]

 [profile.release]
 debug = true
--- a/examples/ebay/schema-example.toml
+++ b/examples/ebay/schema-example.toml
@ -1,19 +0,0 @@
-# This schema has been generated ...
-# The order in which the attributes are declared is important,
-# it specify the attribute xxx...
-
-identifier = "id"
-
-[attributes.id]
-stored = true
-
-[attributes.title]
-stored = true
-indexed = true
-
-[attributes.description]
-stored = true
-indexed = true
-
-[attributes.image]
-stored = true
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@ -0,0 +1,29 @@
+[package]
+name = "meilidb-core"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+byteorder = "1.3.1"
+hashbrown = "0.2.2"
+lazy_static = "1.2.0"
+log = "0.4.6"
+meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
+rayon = "1.0.3"
+sdset = "0.3.1"
+serde = { version = "1.0.88", features = ["derive"] }
+slice-group-by = "0.2.4"
+
+[dependencies.fst]
+git = "https://github.com/Kerollmops/fst.git"
+branch = "arc-byte-slice"
+
+[dependencies.levenshtein_automata]
+git = "https://github.com/Kerollmops/levenshtein-automata.git"
+branch = "arc-byte-slice"
+features = ["fst_automaton"]
+
+[features]
+i128 = ["byteorder/i128"]
+nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
--- a/meilidb-core/src/automaton.rs
+++ b/meilidb-core/src/automaton.rs
--- a/meilidb-core/src/criterion/document_id.rs
+++ b/meilidb-core/src/criterion/document_id.rs
@ -1,7 +1,6 @@
 use std::cmp::Ordering;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[derive(Debug, Clone, Copy)]
 pub struct DocumentId;
--- a/meilidb-core/src/criterion/exact.rs
+++ b/meilidb-core/src/criterion/exact.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
--- a/meilidb-core/src/criterion/mod.rs
+++ b/meilidb-core/src/criterion/mod.rs
@ -4,11 +4,10 @@ mod words_proximity;
 mod sum_of_words_attribute;
 mod sum_of_words_position;
 mod exact;
-mod sort_by_attr;
 mod document_id;

 use std::cmp::Ordering;
-use crate::rank::RawDocument;
+use crate::RawDocument;

 pub use self::{
    sum_of_typos::SumOfTypos,
@ -17,7 +16,6 @@ pub use self::{
    sum_of_words_attribute::SumOfWordsAttribute,
    sum_of_words_position::SumOfWordsPosition,
    exact::Exact,
-    sort_by_attr::SortByAttr,
    document_id::DocumentId,
 };

--- a/meilidb-core/src/criterion/number_of_words.rs
+++ b/meilidb-core/src/criterion/number_of_words.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn number_of_query_words(query_index: &[u32]) -> usize {
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@ -2,8 +2,8 @@ use std::cmp::Ordering;

 use slice_group_by::GroupBy;

-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 // This function is a wrong logarithmic 10 function.
 // It is safe to panic on input number higher than 3,
--- a/meilidb-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilidb-core/src/criterion/sum_of_words_attribute.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
--- a/meilidb-core/src/criterion/sum_of_words_position.rs
+++ b/meilidb-core/src/criterion/sum_of_words_position.rs
@ -1,9 +1,7 @@
 use std::cmp::Ordering;
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 #[inline]
 fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
--- a/meilidb-core/src/criterion/words_proximity.rs
+++ b/meilidb-core/src/criterion/words_proximity.rs
@ -1,9 +1,7 @@
 use std::cmp::{self, Ordering};
-
 use slice_group_by::GroupBy;
-
-use crate::rank::criterion::Criterion;
-use crate::rank::RawDocument;
+use crate::criterion::Criterion;
+use crate::RawDocument;

 const MAX_DISTANCE: u16 = 8;

--- a/meilidb-core/src/data/doc_ids.rs
+++ b/meilidb-core/src/data/doc_ids.rs
--- a/meilidb-core/src/data/doc_indexes.rs
+++ b/meilidb-core/src/data/doc_indexes.rs
--- a/meilidb-core/src/data/mod.rs
+++ b/meilidb-core/src/data/mod.rs
--- a/meilidb-core/src/data/shared_data.rs
+++ b/meilidb-core/src/data/shared_data.rs
@ -1,9 +1,9 @@
 use std::sync::Arc;
 use std::ops::Deref;

-#[derive(Default, Clone)]
+#[derive(Clone)]
 pub struct SharedData {
-    pub bytes: Arc<Vec<u8>>,
+    pub bytes: Arc<[u8]>,
    pub offset: usize,
    pub len: usize,
 }
@ -15,7 +15,7 @@ impl SharedData {
        SharedData::new(bytes, 0, len)
    }

-    pub fn new(bytes: Arc<Vec<u8>>, offset: usize, len: usize) -> SharedData {
+    pub fn new(bytes: Arc<[u8]>, offset: usize, len: usize) -> SharedData {
        SharedData { bytes, offset, len }
    }

@ -33,6 +33,16 @@ impl SharedData {
    }
 }

+impl Default for SharedData {
+    fn default() -> SharedData {
+        SharedData {
+            bytes: Arc::from(Vec::new()),
+            offset: 0,
+            len: 0,
+        }
+    }
+}
+
 impl Deref for SharedData {
    type Target = [u8];

--- a/meilidb-core/src/distinct_map.rs
+++ b/meilidb-core/src/distinct_map.rs
--- a/meilidb-core/src/index.rs
+++ b/meilidb-core/src/index.rs
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@ -1,15 +1,117 @@
 pub mod criterion;
+pub mod data;
+mod index;
+mod automaton;
 mod query_builder;
 mod distinct_map;

+pub mod shared_data_cursor;
+pub mod write_to_bytes;
+
 use std::sync::Arc;
+use serde::{Serialize, Deserialize};

 use slice_group_by::GroupBy;
 use rayon::slice::ParallelSliceMut;

-use crate::{Match, DocumentId};
+pub use self::index::{Index, IndexBuilder};
+pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder};

-pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
+/// Represent an internally generated document unique identifier.
+///
+/// It is used to inform the database the document you want to deserialize.
+/// Helpful for custom ranking.
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+pub struct DocumentId(pub u64);
+
+/// This structure represent the position of a word
+/// in a document and its attributes.
+///
+/// This is stored in the map, generated at index time,
+/// extracted and interpreted at search time.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[repr(C)]
+pub struct DocIndex {
+    /// The document identifier where the word was found.
+    pub document_id: DocumentId,
+
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+    pub word_index: u16,
+
+    /// The position in bytes where the word was found
+    /// along with the length of it.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+    pub char_length: u16,
+}
+
+/// This structure represent a matching word with informations
+/// on the location of the word in the document.
+///
+/// The order of the field is important because it defines
+/// the way these structures are ordered between themselves.
+///
+/// The word in itself is not important.
+// TODO do data oriented programming ? very arrays ?
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Match {
+    /// The word index in the query sentence.
+    /// Same as the `attribute_index` but for the query words.
+    ///
+    /// Used to retrieve the automaton that match this word.
+    pub query_index: u32,
+
+    /// The distance the word has with the query word
+    /// (i.e. the Levenshtein distance).
+    pub distance: u8,
+
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+    pub word_index: u16,
+
+    /// Whether the word that match is an exact match or a prefix.
+    pub is_exact: bool,
+
+    /// The position in bytes where the word was found
+    /// along with the length of it.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+    pub char_length: u16,
+}
+
+impl Match {
+    pub fn zero() -> Self {
+        Match {
+            query_index: 0,
+            distance: 0,
+            attribute: 0,
+            word_index: 0,
+            is_exact: false,
+            char_index: 0,
+            char_length: 0,
+        }
+    }
+
+    pub fn max() -> Self {
+        Match {
+            query_index: u32::max_value(),
+            distance: u8::max_value(),
+            attribute: u16::max_value(),
+            word_index: u16::max_value(),
+            is_exact: true,
+            char_index: u16::max_value(),
+            char_length: u16::max_value(),
+        }
+    }
+}

 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Document {
@ -181,3 +283,15 @@ impl Matches {
        }
    }
 }
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::mem;
+
+    #[test]
+    fn docindex_mem_size() {
+        assert_eq!(mem::size_of::<DocIndex>(), 16);
+    }
+}
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@ -1,53 +1,27 @@
-use std::{cmp, mem};
-use std::ops::Range;
-use std::time::Instant;
 use std::hash::Hash;
+use std::ops::{Range, Deref};
 use std::rc::Rc;
+use std::time::Instant;
+use std::{cmp, mem};

 use rayon::slice::ParallelSliceMut;
-use slice_group_by::{GroupByMut, LinearStrGroupBy};
+use slice_group_by::GroupByMut;
+use meilidb_tokenizer::{is_cjk, split_query_string};
 use hashbrown::{HashMap, HashSet};
 use fst::Streamer;
 use log::info;

 use crate::automaton::{self, DfaExt, AutomatonExt};
-use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};
-use crate::rank::criterion::Criteria;
-use crate::database::Index;
-use crate::rank::{raw_documents_from_matches, RawDocument, Document};
-use crate::{is_cjk, Match, DocumentId};
+use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
+use crate::criterion::Criteria;
+use crate::{raw_documents_from_matches, RawDocument, Document};
+use crate::{Index, Match, DocumentId};

-#[derive(Debug, PartialEq, Eq)]
-enum CharCategory {
-    Space,
-    Cjk,
-    Other,
-}
-
-fn classify_char(c: char) -> CharCategory {
-    if c.is_whitespace() { CharCategory::Space }
-    else if is_cjk(c) { CharCategory::Cjk }
-    else { CharCategory::Other }
-}
-
-fn is_word(s: &&str) -> bool {
-    !s.chars().any(char::is_whitespace)
-}
-
-fn same_group_category(a: char, b: char) -> bool {
-    let ca = classify_char(a);
-    let cb = classify_char(b);
-    if ca == CharCategory::Cjk || cb == CharCategory::Cjk { false } else { ca == cb }
-}
-
-fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
+fn generate_automatons(query: &str) -> Vec<DfaExt> {
    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
-    let mut groups = LinearStrGroupBy::new(query, same_group_category)
-                        .filter(is_word)
-                        .map(str::to_lowercase)
-                        .peekable();
-
+    let mut groups = split_query_string(query).map(str::to_lowercase).peekable();
    let mut automatons = Vec::new();
+
    while let Some(word) = groups.next() {
        let has_following_word = groups.peek().is_some();
        let lev = if has_following_word || has_end_whitespace || word.chars().all(is_cjk) {
@ -61,28 +35,26 @@ fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
    automatons
 }

-pub type FilterFunc = fn(DocumentId) -> bool;
-
-pub struct QueryBuilder<'i, 'c, FI> {
-    index: &'i Index,
+pub struct QueryBuilder<'c, I, FI = fn(DocumentId) -> bool> {
+    index: I,
    criteria: Criteria<'c>,
    searchable_attrs: Option<HashSet<u16>>,
    filter: Option<FI>,
 }

-impl<'i, 'c> QueryBuilder<'i, 'c, FilterFunc> {
-    pub fn new(index: &'i Index) -> Self {
+impl<'c, I> QueryBuilder<'c, I, fn(DocumentId) -> bool> {
+    pub fn new(index: I) -> Self {
        QueryBuilder::with_criteria(index, Criteria::default())
    }

-    pub fn with_criteria(index: &'i Index, criteria: Criteria<'c>) -> Self {
+    pub fn with_criteria(index: I, criteria: Criteria<'c>) -> Self {
        QueryBuilder { index, criteria, searchable_attrs: None, filter: None }
    }
 }

-impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
+impl<'c, I, FI> QueryBuilder<'c, I, FI>
 {
-    pub fn with_filter<F>(self, function: F) -> QueryBuilder<'i, 'c, F>
+    pub fn with_filter<F>(self, function: F) -> QueryBuilder<'c, I, F>
    where F: Fn(DocumentId) -> bool,
    {
        QueryBuilder {
@ -93,7 +65,7 @@ impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
        }
    }

-    pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'i, 'c, FI, F>
+    pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'c, I, FI, F>
    where F: Fn(DocumentId) -> Option<K>,
          K: Hash + Eq,
    {
@ -108,9 +80,13 @@ impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
        let attributes = self.searchable_attrs.get_or_insert_with(HashSet::new);
        attributes.insert(attribute);
    }
+}

+impl<'c, I, FI> QueryBuilder<'c, I, FI>
+where I: Deref<Target=Index>,
+{
    fn query_all(&self, query: &str) -> Vec<RawDocument> {
-        let automatons = split_whitespace_automatons(query);
+        let automatons = generate_automatons(query);

        let mut stream = {
            let mut op_builder = fst::map::OpBuilder::new();
@ -118,7 +94,7 @@ impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
                let stream = self.index.map.search(automaton);
                op_builder.push(stream);
            }
-            op_builder.union()
+            op_builder.r#union()
        };

        let mut matches = Vec::new();
@ -159,8 +135,9 @@ impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
    }
 }

-impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
-where FI: Fn(DocumentId) -> bool,
+impl<'c, I, FI> QueryBuilder<'c, I, FI>
+where I: Deref<Target=Index>,
+      FI: Fn(DocumentId) -> bool,
 {
    pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {
        // We delegate the filter work to the distinct query builder,
@ -212,15 +189,15 @@ where FI: Fn(DocumentId) -> bool,
    }
 }

-pub struct DistinctQueryBuilder<'i, 'c, FI, FD> {
-    inner: QueryBuilder<'i, 'c, FI>,
+pub struct DistinctQueryBuilder<'c, I, FI, FD> {
+    inner: QueryBuilder<'c, I, FI>,
    function: FD,
    size: usize,
 }

-impl<'i, 'c, FI, FD> DistinctQueryBuilder<'i, 'c, FI, FD>
+impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD>
 {
-    pub fn with_filter<F>(self, function: F) -> DistinctQueryBuilder<'i, 'c, F, FD>
+    pub fn with_filter<F>(self, function: F) -> DistinctQueryBuilder<'c, I, F, FD>
    where F: Fn(DocumentId) -> bool,
    {
        DistinctQueryBuilder {
@ -235,8 +212,9 @@ impl<'i, 'c, FI, FD> DistinctQueryBuilder<'i, 'c, FI, FD>
    }
 }

-impl<'i, 'c, FI, FD, K> DistinctQueryBuilder<'i, 'c, FI, FD>
-where FI: Fn(DocumentId) -> bool,
+impl<'c, I, FI, FD, K> DistinctQueryBuilder<'c, I, FI, FD>
+where I: Deref<Target=Index>,
+      FI: Fn(DocumentId) -> bool,
      FD: Fn(DocumentId) -> Option<K>,
      K: Hash + Eq,
 {
--- a/meilidb-core/src/shared_data_cursor.rs
+++ b/meilidb-core/src/shared_data_cursor.rs
@ -7,12 +7,12 @@ pub struct SharedDataCursor(Cursor<SharedData>);
 impl SharedDataCursor {
    pub fn from_bytes(bytes: Vec<u8>) -> SharedDataCursor {
        let len = bytes.len();
-        let bytes = Arc::new(bytes);
+        let bytes = Arc::from(bytes);

        SharedDataCursor::from_shared_bytes(bytes, 0, len)
    }

-    pub fn from_shared_bytes(bytes: Arc<Vec<u8>>, offset: usize, len: usize) -> SharedDataCursor {
+    pub fn from_shared_bytes(bytes: Arc<[u8]>, offset: usize, len: usize) -> SharedDataCursor {
        let data = SharedData::new(bytes, offset, len);
        let cursor = Cursor::new(data);

--- a/meilidb-core/src/write_to_bytes.rs
+++ b/meilidb-core/src/write_to_bytes.rs
--- a/meilidb-data/Cargo.toml
+++ b/meilidb-data/Cargo.toml
@ -0,0 +1,25 @@
+[package]
+name = "meilidb-data"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+arc-swap = "0.3.11"
+bincode = "1.1.2"
+byteorder = "1.3.1"
+hashbrown = { version = "0.2.2", features = ["serde"] }
+linked-hash-map = { version = "0.5.2", features = ["serde_impl"] }
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
+ordered-float = { version = "1.0.2", features = ["serde"] }
+sdset = "0.3.1"
+serde = { version = "1.0.90", features = ["derive"] }
+serde_json = { version = "1.0.39", features = ["preserve_order"] }
+sled = "0.23.0"
+toml = { version = "0.5.0", features = ["preserve_order"] }
+deunicode = "1.0.0"
+
+[dependencies.rmp-serde]
+git = "https://github.com/3Hren/msgpack-rust.git"
+rev = "40b3d48"
--- a/meilidb-data/src/database.rs
+++ b/meilidb-data/src/database.rs
@ -0,0 +1,464 @@
+use std::collections::HashSet;
+use std::io::{self, Cursor, BufRead};
+use std::iter::FromIterator;
+use std::path::Path;
+use std::sync::Arc;
+use std::{error, fmt};
+
+use arc_swap::{ArcSwap, Lease};
+use byteorder::{ReadBytesExt, BigEndian};
+use hashbrown::HashMap;
+use meilidb_core::criterion::Criteria;
+use meilidb_core::QueryBuilder;
+use meilidb_core::shared_data_cursor::{FromSharedDataCursor, SharedDataCursor};
+use meilidb_core::write_to_bytes::WriteToBytes;
+use meilidb_core::{DocumentId, Index as WordIndex};
+use rmp_serde::decode::{Error as RmpError};
+use sdset::SetBuf;
+use serde::de;
+use sled::IVec;
+
+use crate::{Schema, SchemaAttr, RankedMap};
+use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
+use crate::indexer::Indexer;
+
+#[derive(Debug)]
+pub enum Error {
+    SchemaDiffer,
+    SchemaMissing,
+    WordIndexMissing,
+    MissingDocumentId,
+    SledError(sled::Error),
+    BincodeError(bincode::Error),
+    SerializerError(SerializerError),
+}
+
+impl From<sled::Error> for Error {
+    fn from(error: sled::Error) -> Error {
+        Error::SledError(error)
+    }
+}
+
+impl From<bincode::Error> for Error {
+    fn from(error: bincode::Error) -> Error {
+        Error::BincodeError(error)
+    }
+}
+
+impl From<SerializerError> for Error {
+    fn from(error: SerializerError) -> Error {
+        Error::SerializerError(error)
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::Error::*;
+        match self {
+            SchemaDiffer => write!(f, "schemas differ"),
+            SchemaMissing => write!(f, "this index does not have a schema"),
+            WordIndexMissing => write!(f, "this index does not have a word index"),
+            MissingDocumentId => write!(f, "document id is missing"),
+            SledError(e) => write!(f, "sled error; {}", e),
+            BincodeError(e) => write!(f, "bincode error; {}", e),
+            SerializerError(e) => write!(f, "serializer error; {}", e),
+        }
+    }
+}
+
+impl error::Error for Error { }
+
+fn index_name(name: &str) -> Vec<u8> {
+    format!("index-{}", name).into_bytes()
+}
+
+fn document_key(id: DocumentId, attr: SchemaAttr) -> Vec<u8> {
+    let DocumentId(document_id) = id;
+    let SchemaAttr(schema_attr) = attr;
+
+    let mut bytes = Vec::new();
+    bytes.extend_from_slice(b"document-");
+    bytes.extend_from_slice(&document_id.to_be_bytes()[..]);
+    bytes.extend_from_slice(&schema_attr.to_be_bytes()[..]);
+    bytes
+}
+
+trait CursorExt {
+    fn consume_if_eq(&mut self, needle: &[u8]) -> bool;
+}
+
+impl<T: AsRef<[u8]>> CursorExt for Cursor<T> {
+    fn consume_if_eq(&mut self, needle: &[u8]) -> bool {
+        let position = self.position() as usize;
+        let slice = self.get_ref().as_ref();
+
+        if slice[position..].starts_with(needle) {
+            self.consume(needle.len());
+            true
+        } else {
+            false
+        }
+    }
+}
+
+fn extract_document_key(key: Vec<u8>) -> io::Result<(DocumentId, SchemaAttr)> {
+    let mut key = Cursor::new(key);
+
+    if !key.consume_if_eq(b"document-") {
+        return Err(io::Error::from(io::ErrorKind::InvalidData))
+    }
+
+    let document_id = key.read_u64::<BigEndian>().map(DocumentId)?;
+    let schema_attr = key.read_u16::<BigEndian>().map(SchemaAttr)?;
+
+    Ok((document_id, schema_attr))
+}
+
+#[derive(Clone)]
+pub struct Database {
+    opened: Arc<ArcSwap<HashMap<String, RawIndex>>>,
+    inner: sled::Db,
+}
+
+impl Database {
+    pub fn start_default<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
+        let inner = sled::Db::start_default(path)?;
+        let opened = Arc::new(ArcSwap::new(Arc::new(HashMap::new())));
+        Ok(Database { opened, inner })
+    }
+
+    pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
+        // check if the index was already opened
+        if let Some(raw_index) = self.opened.lease().get(name) {
+            return Ok(Some(Index(raw_index.clone())))
+        }
+
+        let raw_name = index_name(name);
+        if self.inner.tree_names().into_iter().any(|tn| tn == raw_name) {
+            let tree = self.inner.open_tree(raw_name)?;
+            let raw_index = RawIndex::from_raw(tree)?;
+
+            self.opened.rcu(|opened| {
+                let mut opened = HashMap::clone(opened);
+                opened.insert(name.to_string(), raw_index.clone());
+                opened
+            });
+
+            return Ok(Some(Index(raw_index)))
+        }
+
+        Ok(None)
+    }
+
+    pub fn create_index(&self, name: String, schema: Schema) -> Result<Index, Error> {
+        match self.open_index(&name)? {
+            Some(index) => {
+                if index.schema() != &schema {
+                    return Err(Error::SchemaDiffer);
+                }
+
+                Ok(index)
+            },
+            None => {
+                let raw_name = index_name(&name);
+                let tree = self.inner.open_tree(raw_name)?;
+                let raw_index = RawIndex::new_from_raw(tree, schema)?;
+
+                self.opened.rcu(|opened| {
+                    let mut opened = HashMap::clone(opened);
+                    opened.insert(name.clone(), raw_index.clone());
+                    opened
+                });
+
+                Ok(Index(raw_index))
+            },
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct RawIndex {
+    schema: Schema,
+    word_index: Arc<ArcSwap<WordIndex>>,
+    ranked_map: Arc<ArcSwap<RankedMap>>,
+    inner: Arc<sled::Tree>,
+}
+
+impl RawIndex {
+    fn from_raw(inner: Arc<sled::Tree>) -> Result<RawIndex, Error> {
+        let schema = {
+            let bytes = inner.get("schema")?;
+            let bytes = bytes.ok_or(Error::SchemaMissing)?;
+            Schema::read_from_bin(bytes.as_ref())?
+        };
+
+        let bytes = inner.get("word-index")?;
+        let bytes = bytes.ok_or(Error::WordIndexMissing)?;
+        let word_index = {
+            let len = bytes.len();
+            let bytes: Arc<[u8]> = Into::into(bytes);
+            let mut cursor = SharedDataCursor::from_shared_bytes(bytes, 0, len);
+
+            // TODO must handle this error
+            let word_index = WordIndex::from_shared_data_cursor(&mut cursor).unwrap();
+
+            Arc::new(ArcSwap::new(Arc::new(word_index)))
+        };
+
+        let ranked_map = {
+            let map = match inner.get("ranked-map")? {
+                Some(bytes) => bincode::deserialize(bytes.as_ref())?,
+                None => RankedMap::default(),
+            };
+
+            Arc::new(ArcSwap::new(Arc::new(map)))
+        };
+
+        Ok(RawIndex { schema, word_index, ranked_map, inner })
+    }
+
+    fn new_from_raw(inner: Arc<sled::Tree>, schema: Schema) -> Result<RawIndex, Error> {
+        let mut schema_bytes = Vec::new();
+        schema.write_to_bin(&mut schema_bytes)?;
+        inner.set("schema", schema_bytes)?;
+
+        let word_index = WordIndex::default();
+        inner.set("word-index", word_index.into_bytes())?;
+        let word_index = Arc::new(ArcSwap::new(Arc::new(word_index)));
+
+        let ranked_map = Arc::new(ArcSwap::new(Arc::new(RankedMap::default())));
+
+        Ok(RawIndex { schema, word_index, ranked_map, inner })
+    }
+
+    pub fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
+    pub fn word_index(&self) -> Lease<Arc<WordIndex>> {
+        self.word_index.lease()
+    }
+
+    pub fn ranked_map(&self) -> Lease<Arc<RankedMap>> {
+        self.ranked_map.lease()
+    }
+
+    pub fn update_word_index(&self, word_index: Arc<WordIndex>) -> sled::Result<()> {
+        let data = word_index.into_bytes();
+        self.inner.set("word-index", data).map(drop)?;
+        self.word_index.store(word_index);
+
+        Ok(())
+    }
+
+    pub fn update_ranked_map(&self, ranked_map: Arc<RankedMap>) -> sled::Result<()> {
+        let data = bincode::serialize(ranked_map.as_ref()).unwrap();
+        self.inner.set("ranked-map", data).map(drop)?;
+        self.ranked_map.store(ranked_map);
+
+        Ok(())
+    }
+
+    pub fn set_document_attribute<V>(
+        &self,
+        id: DocumentId,
+        attr: SchemaAttr,
+        value: V,
+    ) -> Result<Option<IVec>, sled::Error>
+    where IVec: From<V>,
+    {
+        let key = document_key(id, attr);
+        Ok(self.inner.set(key, value)?)
+    }
+
+    pub fn get_document_attribute(
+        &self,
+        id: DocumentId,
+        attr: SchemaAttr
+    ) -> Result<Option<IVec>, sled::Error>
+    {
+        let key = document_key(id, attr);
+        Ok(self.inner.get(key)?)
+    }
+
+    pub fn get_document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
+        let start = document_key(id, SchemaAttr::min());
+        let end = document_key(id, SchemaAttr::max());
+        DocumentFieldsIter(self.inner.range(start..=end))
+    }
+
+    pub fn del_document_attribute(
+        &self,
+        id: DocumentId,
+        attr: SchemaAttr
+    ) -> Result<Option<IVec>, sled::Error>
+    {
+        let key = document_key(id, attr);
+        Ok(self.inner.del(key)?)
+    }
+}
+
+pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
+
+impl<'a> Iterator for DocumentFieldsIter<'a> {
+    type Item = Result<(DocumentId, SchemaAttr, IVec), Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.0.next() {
+            Some(Ok((key, value))) => {
+                let (id, attr) = extract_document_key(key).unwrap();
+                Some(Ok((id, attr, value)))
+            },
+            Some(Err(e)) => Some(Err(Error::SledError(e))),
+            None => None,
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct Index(RawIndex);
+
+impl Index {
+    pub fn query_builder(&self) -> QueryBuilder<Lease<Arc<WordIndex>>> {
+        let word_index = self.word_index();
+        QueryBuilder::new(word_index)
+    }
+
+    pub fn query_builder_with_criteria<'c>(
+        &self,
+        criteria: Criteria<'c>,
+    ) -> QueryBuilder<'c, Lease<Arc<WordIndex>>>
+    {
+        let word_index = self.word_index();
+        QueryBuilder::with_criteria(word_index, criteria)
+    }
+
+    pub fn schema(&self) -> &Schema {
+        self.0.schema()
+    }
+
+    pub fn word_index(&self) -> Lease<Arc<WordIndex>> {
+        self.0.word_index()
+    }
+
+    pub fn ranked_map(&self) -> Lease<Arc<RankedMap>> {
+        self.0.ranked_map()
+    }
+
+    pub fn documents_addition(&self) -> DocumentsAddition {
+        let index = self.0.clone();
+        let ranked_map = self.0.ranked_map().clone();
+        DocumentsAddition::from_raw(index, ranked_map)
+    }
+
+    pub fn documents_deletion(&self) -> DocumentsDeletion {
+        let index = self.0.clone();
+        DocumentsDeletion::from_raw(index)
+    }
+
+    pub fn document<T>(
+        &self,
+        fields: Option<&HashSet<&str>>,
+        id: DocumentId,
+    ) -> Result<Option<T>, RmpError>
+    where T: de::DeserializeOwned,
+    {
+        let fields = match fields {
+            Some(fields) => {
+                let iter = fields.iter().filter_map(|n| self.0.schema().attribute(n));
+                Some(HashSet::from_iter(iter))
+            },
+            None => None,
+        };
+
+        let mut deserializer = Deserializer {
+            document_id: id,
+            raw_index: &self.0,
+            fields: fields.as_ref(),
+        };
+
+        // TODO: currently we return an error if all document fields are missing,
+        //       returning None would have been better
+        T::deserialize(&mut deserializer).map(Some)
+    }
+}
+
+pub struct DocumentsAddition {
+    inner: RawIndex,
+    indexer: Indexer,
+    ranked_map: RankedMap,
+}
+
+impl DocumentsAddition {
+    pub fn from_raw(inner: RawIndex, ranked_map: RankedMap) -> DocumentsAddition {
+        DocumentsAddition { inner, indexer: Indexer::new(), ranked_map }
+    }
+
+    pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
+    where D: serde::Serialize,
+    {
+        let schema = self.inner.schema();
+        let identifier = schema.identifier_name();
+
+        let document_id = match extract_document_id(identifier, &document)? {
+            Some(id) => id,
+            None => return Err(Error::MissingDocumentId),
+        };
+
+        let serializer = Serializer {
+            schema,
+            index: &self.inner,
+            indexer: &mut self.indexer,
+            ranked_map: &mut self.ranked_map,
+            document_id,
+        };
+
+        document.serialize(serializer)?;
+
+        Ok(())
+    }
+    pub fn finalize(self) -> sled::Result<()> {
+        let delta_index = self.indexer.build();
+
+        let index = self.inner.word_index();
+        let new_index = index.r#union(&delta_index);
+
+        let new_index = Arc::from(new_index);
+        self.inner.update_word_index(new_index)?;
+
+        Ok(())
+    }
+}
+
+pub struct DocumentsDeletion {
+    inner: RawIndex,
+    documents: Vec<DocumentId>,
+}
+
+impl DocumentsDeletion {
+    pub fn from_raw(inner: RawIndex) -> DocumentsDeletion {
+        DocumentsDeletion {
+            inner,
+            documents: Vec::new(),
+        }
+    }
+
+    pub fn delete_document(&mut self, id: DocumentId) {
+        self.documents.push(id);
+    }
+
+    pub fn finalize(mut self) -> Result<(), Error> {
+        self.documents.sort_unstable();
+        self.documents.dedup();
+
+        let idset = SetBuf::new_unchecked(self.documents);
+        let index = self.inner.word_index();
+
+        let new_index = index.remove_documents(&idset);
+        let new_index = Arc::from(new_index);
+
+        self.inner.update_word_index(new_index)?;
+
+        Ok(())
+    }
+}
--- a/meilidb-data/src/index_event.rs
+++ b/meilidb-data/src/index_event.rs
@ -0,0 +1,45 @@
+use std::error::Error;
+
+use byteorder::{ReadBytesExt, WriteBytesExt};
+
+use meilidb_core::{Index as WordIndex};
+use meilidb_core::data::DocIds;
+use meilidb_core::write_to_bytes::WriteToBytes;
+use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+
+enum NewIndexEvent<'a> {
+    RemovedDocuments(&'a DocIds),
+    UpdatedDocuments(&'a WordIndex),
+}
+
+impl<'a> WriteToBytes for NewIndexEvent<'a> {
+    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
+        match self {
+            NewIndexEvent::RemovedDocuments(doc_ids) => {
+                let _ = bytes.write_u8(0);
+                doc_ids.write_to_bytes(bytes);
+            },
+            NewIndexEvent::UpdatedDocuments(index) => {
+                let _ = bytes.write_u8(1);
+                index.write_to_bytes(bytes);
+            }
+        }
+    }
+}
+
+enum IndexEvent {
+    RemovedDocuments(DocIds),
+    UpdatedDocuments(WordIndex),
+}
+
+impl FromSharedDataCursor for IndexEvent {
+    type Error = Box<Error>;
+
+    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> {
+        match cursor.read_u8()? {
+            0 => DocIds::from_shared_data_cursor(cursor).map(IndexEvent::RemovedDocuments),
+            1 => WordIndex::from_shared_data_cursor(cursor).map(IndexEvent::UpdatedDocuments),
+            _ => Err("invalid index event type".into()),
+        }
+    }
+}
--- a/meilidb-data/src/indexer.rs
+++ b/meilidb-data/src/indexer.rs
@ -0,0 +1,117 @@
+use std::collections::BTreeMap;
+use std::convert::TryFrom;
+
+use deunicode::deunicode_with_tofu;
+use meilidb_core::{DocumentId, DocIndex};
+use meilidb_core::{Index as WordIndex, IndexBuilder as WordIndexBuilder};
+use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
+use sdset::Set;
+
+use crate::SchemaAttr;
+
+type Word = Vec<u8>; // TODO make it be a SmallVec
+
+pub struct Indexer {
+    word_limit: usize, // the maximum number of indexed words
+    indexed: BTreeMap<Word, Vec<DocIndex>>,
+}
+
+impl Indexer {
+    pub fn new() -> Indexer {
+        Indexer {
+            word_limit: 1000,
+            indexed: BTreeMap::new(),
+        }
+    }
+
+    pub fn with_word_limit(limit: usize) -> Indexer {
+        Indexer {
+            word_limit: limit,
+            indexed: BTreeMap::new(),
+        }
+    }
+
+    pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
+        for token in Tokenizer::new(text) {
+            let must_continue = index_token(token, id, attr, self.word_limit, &mut self.indexed);
+            if !must_continue { break }
+        }
+    }
+
+    pub fn index_text_seq<'a, I>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
+    where I: IntoIterator<Item=&'a str>,
+    {
+        let iter = iter.into_iter();
+        for token in SeqTokenizer::new(iter) {
+            let must_continue = index_token(token, id, attr, self.word_limit, &mut self.indexed);
+            if !must_continue { break }
+        }
+    }
+
+    pub fn build(self) -> WordIndex {
+        let mut builder = WordIndexBuilder::new();
+
+        for (key, mut indexes) in self.indexed {
+            indexes.sort_unstable();
+            indexes.dedup();
+
+            let indexes = Set::new_unchecked(&indexes);
+            builder.insert(key, indexes).unwrap();
+        }
+
+        builder.build()
+    }
+}
+
+fn index_token(
+    token: Token,
+    id: DocumentId,
+    attr: SchemaAttr,
+    word_limit: usize,
+    indexed: &mut BTreeMap<Word, Vec<DocIndex>>,
+) -> bool
+{
+    if token.word_index >= word_limit { return false }
+
+    let lower = token.word.to_lowercase();
+    let token = Token { word: &lower, ..token };
+    match token_to_docindex(id, attr, token) {
+        Some(docindex) => {
+            let word = Vec::from(token.word);
+            indexed.entry(word).or_insert_with(Vec::new).push(docindex);
+        },
+        None => return false,
+    }
+
+    if !lower.contains(is_cjk) {
+        let unidecoded = deunicode_with_tofu(&lower, "");
+        if unidecoded != lower {
+            let token = Token { word: &unidecoded, ..token };
+            match token_to_docindex(id, attr, token) {
+                Some(docindex) => {
+                    let word = Vec::from(token.word);
+                    indexed.entry(word).or_insert_with(Vec::new).push(docindex);
+                },
+                None => return false,
+            }
+        }
+    }
+
+    true
+}
+
+fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<DocIndex> {
+    let word_index = u16::try_from(token.word_index).ok()?;
+    let char_index = u16::try_from(token.char_index).ok()?;
+    let char_length = u16::try_from(token.word.chars().count()).ok()?;
+
+    let docindex = DocIndex {
+        document_id: id,
+        attribute: attr.0,
+        word_index: word_index,
+        char_index: char_index,
+        char_length: char_length,
+    };
+
+    Some(docindex)
+}
--- a/meilidb-data/src/lib.rs
+++ b/meilidb-data/src/lib.rs
@ -0,0 +1,12 @@
+mod database;
+mod index_event;
+mod indexer;
+mod number;
+mod ranked_map;
+mod serde;
+pub mod schema;
+
+pub use self::database::{Database, Index};
+pub use self::number::Number;
+pub use self::ranked_map::RankedMap;
+pub use self::schema::{Schema, SchemaAttr};
--- a/meilidb-data/src/number.rs
+++ b/meilidb-data/src/number.rs
@ -0,0 +1,55 @@
+use std::num::{ParseIntError, ParseFloatError};
+use std::str::FromStr;
+use std::fmt;
+
+use ordered_float::OrderedFloat;
+use serde::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Number {
+    Unsigned(u64),
+    Signed(i64),
+    Float(OrderedFloat<f64>),
+}
+
+impl FromStr for Number {
+    type Err = ParseNumberError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let uint_error = match u64::from_str(s) {
+            Ok(unsigned) => return Ok(Number::Unsigned(unsigned)),
+            Err(error) => error,
+        };
+
+        let int_error = match i64::from_str(s) {
+            Ok(signed) => return Ok(Number::Signed(signed)),
+            Err(error) => error,
+        };
+
+        let float_error = match f64::from_str(s) {
+            Ok(float) => return Ok(Number::Float(OrderedFloat(float))),
+            Err(error) => error,
+        };
+
+        Err(ParseNumberError { uint_error, int_error, float_error })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ParseNumberError {
+    uint_error: ParseIntError,
+    int_error: ParseIntError,
+    float_error: ParseFloatError,
+}
+
+impl fmt::Display for ParseNumberError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.uint_error == self.int_error {
+            write!(f, "can not parse number: {}, {}", self.uint_error, self.float_error)
+        } else {
+            write!(f, "can not parse number: {}, {}, {}",
+                self.uint_error, self.int_error, self.float_error)
+        }
+    }
+}
--- a/meilidb-data/src/ranked_map.rs
+++ b/meilidb-data/src/ranked_map.rs
@ -0,0 +1,5 @@
+use hashbrown::HashMap;
+use meilidb_core::DocumentId;
+use crate::{SchemaAttr, Number};
+
+pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>;
--- a/meilidb-data/src/schema.rs
+++ b/meilidb-data/src/schema.rs
@ -5,13 +5,9 @@ use std::{fmt, u16};
 use std::ops::BitOr;
 use std::sync::Arc;

-use serde_derive::{Serialize, Deserialize};
+use serde::{Serialize, Deserialize};
 use linked_hash_map::LinkedHashMap;

-use crate::database::serde::find_id::FindDocumentIdSerializer;
-use crate::database::serde::SerializerError;
-use crate::DocumentId;
-
 pub const STORED: SchemaProps  = SchemaProps { stored: true,  indexed: false, ranked: false };
 pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true,  ranked: false };
 pub const RANKED: SchemaProps  = SchemaProps { stored: false, indexed: false, ranked: true  };
@ -166,14 +162,6 @@ impl Schema {
        attributes
    }

-    pub fn document_id<T>(&self, document: T) -> Result<DocumentId, SerializerError>
-    where T: serde::Serialize,
-    {
-        let id_attribute_name = &self.inner.identifier;
-        let serializer = FindDocumentIdSerializer { id_attribute_name };
-        document.serialize(serializer)
-    }
-
    pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
        let (_, props) = self.inner.props[attr.0 as usize];
        props
--- a/meilidb-data/src/serde/convert_to_number.rs
+++ b/meilidb-data/src/serde/convert_to_number.rs
@ -1,12 +1,16 @@
-use serde::Serialize;
+use std::str::FromStr;
+
+use ordered_float::OrderedFloat;
 use serde::ser;
+use serde::Serialize;

-use crate::database::serde::SerializerError;
+use super::SerializerError;
+use crate::Number;

-pub struct KeyToStringSerializer;
+pub struct ConvertToNumber;

-impl ser::Serializer for KeyToStringSerializer {
-    type Ok = String;
+impl ser::Serializer for ConvertToNumber {
+    type Ok = Number;
    type Error = SerializerError;
    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
@ -16,48 +20,78 @@ impl ser::Serializer for KeyToStringSerializer {
    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
+    fn serialize_bool(self, value: bool) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }

-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
+    fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "char" })
+    }

-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }

-        f32 => serialize_f32,
-        f64 => serialize_f64,
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(value))
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(value))
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(OrderedFloat(value as f64)))
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(OrderedFloat(value)))
    }

    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
-        Ok(value.to_string())
+        Ok(Number::from_str(value)?)
    }

    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
+        Err(SerializerError::UnrankableType { type_name: "&[u8]" })
    }

    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnrankableType { type_name: "Option" })
    }

    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnrankableType { type_name: "Option" })
    }

    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
+        Err(SerializerError::UnrankableType { type_name: "()" })
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
+        Err(SerializerError::UnrankableType { type_name: "unit struct" })
    }

    fn serialize_unit_variant(
@ -67,7 +101,7 @@ impl ser::Serializer for KeyToStringSerializer {
        _variant: &'static str
    ) -> Result<Self::Ok, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
+        Err(SerializerError::UnrankableType { type_name: "unit variant" })
    }

    fn serialize_newtype_struct<T: ?Sized>(
@ -89,15 +123,15 @@ impl ser::Serializer for KeyToStringSerializer {
    ) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
+        Err(SerializerError::UnrankableType { type_name: "newtype variant" })
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
+        Err(SerializerError::UnrankableType { type_name: "sequence" })
    }

    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
+        Err(SerializerError::UnrankableType { type_name: "tuple" })
    }

    fn serialize_tuple_struct(
@ -106,7 +140,7 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeTupleStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
+        Err(SerializerError::UnrankableType { type_name: "tuple struct" })
    }

    fn serialize_tuple_variant(
@ -117,11 +151,11 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeTupleVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
+        Err(SerializerError::UnrankableType { type_name: "tuple variant" })
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "map" })
+        Err(SerializerError::UnrankableType { type_name: "map" })
    }

    fn serialize_struct(
@ -130,7 +164,7 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct" })
+        Err(SerializerError::UnrankableType { type_name: "struct" })
    }

    fn serialize_struct_variant(
@ -141,6 +175,6 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeStructVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
+        Err(SerializerError::UnrankableType { type_name: "struct variant" })
    }
 }
--- a/meilidb-data/src/serde/convert_to_string.rs
+++ b/meilidb-data/src/serde/convert_to_string.rs
@ -1,15 +1,12 @@
-use std::str::FromStr;
-
 use serde::Serialize;
-use serde::{ser, ser::Error};
+use serde::ser;

-use crate::database::serde::SerializerError;
-use crate::database::Number;
+use super::SerializerError;

-pub struct ValueToNumberSerializer;
+pub struct ConvertToString;

-impl ser::Serializer for ValueToNumberSerializer {
-    type Ok = Number;
+impl ser::Serializer for ConvertToString {
+    type Ok = String;
    type Error = SerializerError;
    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
@ -19,75 +16,78 @@ impl ser::Serializer for ValueToNumberSerializer {
    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
+    fn serialize_bool(self, value: bool) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "boolean" })
+    }
+
+    fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
    }

    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Signed(value as i64))
+        Ok(value.to_string())
    }

    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Signed(value as i64))
+        Ok(value.to_string())
    }

    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Signed(value as i64))
+        Ok(value.to_string())
    }

    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Signed(value as i64))
+        Ok(value.to_string())
    }

    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Unsigned(value as u64))
+        Ok(value.to_string())
    }

    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Unsigned(value as u64))
+        Ok(value.to_string())
    }

    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Unsigned(value as u64))
+        Ok(value.to_string())
    }

    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Unsigned(value as u64))
+        Ok(value.to_string())
    }

    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Float(value as f64))
+        Ok(value.to_string())
    }

    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
-        Ok(Number::Float(value))
+        Ok(value.to_string())
    }

    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
-        Number::from_str(value).map_err(SerializerError::custom)
+        Ok(value.to_string())
    }

    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
    }

    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
+        Err(SerializerError::UnserializableType { type_name: "()" })
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
    }

    fn serialize_unit_variant(
@ -97,7 +97,7 @@ impl ser::Serializer for ValueToNumberSerializer {
        _variant: &'static str
    ) -> Result<Self::Ok, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
    }

    fn serialize_newtype_struct<T: ?Sized>(
@ -119,15 +119,15 @@ impl ser::Serializer for ValueToNumberSerializer {
    ) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
    }

    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
    }

    fn serialize_tuple_struct(
@ -136,7 +136,7 @@ impl ser::Serializer for ValueToNumberSerializer {
        _len: usize
    ) -> Result<Self::SerializeTupleStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
    }

    fn serialize_tuple_variant(
@ -147,11 +147,11 @@ impl ser::Serializer for ValueToNumberSerializer {
        _len: usize
    ) -> Result<Self::SerializeTupleVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "map" })
+        Err(SerializerError::UnserializableType { type_name: "map" })
    }

    fn serialize_struct(
@ -160,7 +160,7 @@ impl ser::Serializer for ValueToNumberSerializer {
        _len: usize
    ) -> Result<Self::SerializeStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct" })
+        Err(SerializerError::UnserializableType { type_name: "struct" })
    }

    fn serialize_struct_variant(
@ -171,6 +171,6 @@ impl ser::Serializer for ValueToNumberSerializer {
        _len: usize
    ) -> Result<Self::SerializeStructVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
    }
 }
--- a/meilidb-data/src/serde/deserializer.rs
+++ b/meilidb-data/src/serde/deserializer.rs
@ -0,0 +1,97 @@
+use std::collections::HashSet;
+use std::io::Cursor;
+
+use meilidb_core::DocumentId;
+use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
+use rmp_serde::decode::{Error as RmpError};
+use serde::{de, forward_to_deserialize_any};
+
+use crate::database::RawIndex;
+use crate::SchemaAttr;
+
+pub struct Deserializer<'a> {
+    pub document_id: DocumentId,
+    pub raw_index: &'a RawIndex,
+    pub fields: Option<&'a HashSet<SchemaAttr>>,
+}
+
+impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
+{
+    type Error = RmpError;
+
+    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        self.deserialize_map(visitor)
+    }
+
+    forward_to_deserialize_any! {
+        bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
+        bytes byte_buf unit_struct tuple_struct
+        identifier tuple ignored_any option newtype_struct enum struct
+    }
+
+    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        let document_attributes = self.raw_index.get_document_fields(self.document_id);
+        let document_attributes = document_attributes.filter_map(|result| {
+            match result {
+                Ok(value) => Some(value),
+                Err(e) => {
+                    // TODO: must log the error
+                    // error!("sled iter error; {}", e);
+                    None
+                },
+            }
+        });
+        let iter = document_attributes.filter_map(|(_, attr, value)| {
+            if self.fields.map_or(true, |f| f.contains(&attr)) {
+                let attribute_name = self.raw_index.schema().attribute_name(attr);
+                Some((attribute_name, Value::new(value)))
+            } else {
+                None
+            }
+        });
+
+        let map_deserializer = de::value::MapDeserializer::new(iter);
+        visitor.visit_map(map_deserializer)
+    }
+}
+
+struct Value<A>(RmpDeserializer<ReadReader<Cursor<A>>>) where A: AsRef<[u8]>;
+
+impl<A> Value<A> where A: AsRef<[u8]>
+{
+    fn new(value: A) -> Value<A> {
+        Value(RmpDeserializer::new(Cursor::new(value)))
+    }
+}
+
+impl<'de, A> de::IntoDeserializer<'de, RmpError> for Value<A>
+where A: AsRef<[u8]>,
+{
+    type Deserializer = Self;
+
+    fn into_deserializer(self) -> Self::Deserializer {
+        self
+    }
+}
+
+impl<'de, 'a, A> de::Deserializer<'de> for Value<A>
+where A: AsRef<[u8]>,
+{
+    type Error = RmpError;
+
+    fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        self.0.deserialize_any(visitor)
+    }
+
+    forward_to_deserialize_any! {
+        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
+        bytes byte_buf option unit unit_struct newtype_struct seq tuple
+        tuple_struct map struct enum identifier ignored_any
+    }
+}
--- a/meilidb-data/src/serde/extract_document_id.rs
+++ b/meilidb-data/src/serde/extract_document_id.rs
@ -1,23 +1,41 @@
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
+
+use meilidb_core::DocumentId;
 use serde::Serialize;
 use serde::ser;

-use crate::database::serde::key_to_string::KeyToStringSerializer;
-use crate::database::serde::{SerializerError, calculate_hash};
-use crate::DocumentId;
+use super::{SerializerError, ConvertToString};

-pub struct FindDocumentIdSerializer<'a> {
-    pub id_attribute_name: &'a str,
+pub fn extract_document_id<D>(
+    identifier: &str,
+    document: &D,
+) -> Result<Option<DocumentId>, SerializerError>
+where D: serde::Serialize,
+{
+    let serializer = ExtractDocumentId { identifier };
+    document.serialize(serializer)
 }

-impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
-    type Ok = DocumentId;
+fn calculate_hash<T: Hash>(t: &T) -> u64 {
+    let mut s = DefaultHasher::new();
+    t.hash(&mut s);
+    s.finish()
+}
+
+struct ExtractDocumentId<'a> {
+    identifier: &'a str,
+}
+
+impl<'a> ser::Serializer for ExtractDocumentId<'a> {
+    type Ok = Option<DocumentId>;
    type Error = SerializerError;
    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = FindDocumentIdMapSerializer<'a>;
-    type SerializeStruct = FindDocumentIdStructSerializer<'a>;
+    type SerializeMap = ExtractDocumentIdMapSerializer<'a>;
+    type SerializeStruct = ExtractDocumentIdStructSerializer<'a>;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

    forward_to_unserializable_type! {
@ -38,30 +56,30 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        f64 => serialize_f64,
    }

-    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "str" })
+    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "str" })
    }

    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
    }

    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
+        Err(SerializerError::UnserializableType { type_name: "()" })
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
    }

    fn serialize_unit_variant(
@ -71,7 +89,7 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _variant: &'static str
    ) -> Result<Self::Ok, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
    }

    fn serialize_newtype_struct<T: ?Sized>(
@ -93,15 +111,15 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
    ) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
    }

    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
    }

    fn serialize_tuple_struct(
@ -110,7 +128,7 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeTupleStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
    }

    fn serialize_tuple_variant(
@ -121,15 +139,17 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeTupleVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Ok(FindDocumentIdMapSerializer {
-            id_attribute_name: self.id_attribute_name,
+        let serializer = ExtractDocumentIdMapSerializer {
+            identifier: self.identifier,
            document_id: None,
            current_key_name: None,
-        })
+        };
+
+        Ok(serializer)
    }

    fn serialize_struct(
@ -138,10 +158,12 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeStruct, Self::Error>
    {
-        Ok(FindDocumentIdStructSerializer {
-            id_attribute_name: self.id_attribute_name,
+        let serializer = ExtractDocumentIdStructSerializer {
+            identifier: self.identifier,
            document_id: None,
-        })
+        };
+
+        Ok(serializer)
    }

    fn serialize_struct_variant(
@ -152,24 +174,24 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeStructVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
    }
 }

-pub struct FindDocumentIdMapSerializer<'a> {
-    id_attribute_name: &'a str,
+pub struct ExtractDocumentIdMapSerializer<'a> {
+    identifier: &'a str,
    document_id: Option<DocumentId>,
    current_key_name: Option<String>,
 }

-impl<'a> ser::SerializeMap for FindDocumentIdMapSerializer<'a> {
-    type Ok = DocumentId;
+impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
+    type Ok = Option<DocumentId>;
    type Error = SerializerError;

    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
    where T: Serialize,
    {
-        let key = key.serialize(KeyToStringSerializer)?;
+        let key = key.serialize(ConvertToString)?;
        self.current_key_name = Some(key);
        Ok(())
    }
@ -188,9 +210,9 @@ impl<'a> ser::SerializeMap for FindDocumentIdMapSerializer<'a> {
    ) -> Result<(), Self::Error>
    where K: Serialize, V: Serialize,
    {
-        let key = key.serialize(KeyToStringSerializer)?;
+        let key = key.serialize(ConvertToString)?;

-        if self.id_attribute_name == key {
+        if self.identifier == key {
            // TODO is it possible to have multiple ids?
            let id = bincode::serialize(value).unwrap();
            let hash = calculate_hash(&id);
@ -201,20 +223,17 @@ impl<'a> ser::SerializeMap for FindDocumentIdMapSerializer<'a> {
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
-        match self.document_id {
-            Some(document_id) => Ok(document_id),
-            None => Err(SerializerError::DocumentIdNotFound)
-        }
+        Ok(self.document_id)
    }
 }

-pub struct FindDocumentIdStructSerializer<'a> {
-    id_attribute_name: &'a str,
+pub struct ExtractDocumentIdStructSerializer<'a> {
+    identifier: &'a str,
    document_id: Option<DocumentId>,
 }

-impl<'a> ser::SerializeStruct for FindDocumentIdStructSerializer<'a> {
-    type Ok = DocumentId;
+impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
+    type Ok = Option<DocumentId>;
    type Error = SerializerError;

    fn serialize_field<T: ?Sized>(
@ -224,7 +243,7 @@ impl<'a> ser::SerializeStruct for FindDocumentIdStructSerializer<'a> {
    ) -> Result<(), Self::Error>
    where T: Serialize,
    {
-        if self.id_attribute_name == key {
+        if self.identifier == key {
            // TODO can it be possible to have multiple ids?
            let id = bincode::serialize(value).unwrap();
            let hash = calculate_hash(&id);
@ -235,9 +254,6 @@ impl<'a> ser::SerializeStruct for FindDocumentIdStructSerializer<'a> {
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
-        match self.document_id {
-            Some(document_id) => Ok(document_id),
-            None => Err(SerializerError::DocumentIdNotFound)
-        }
+        Ok(self.document_id)
    }
 }
--- a/meilidb-data/src/serde/indexer.rs
+++ b/meilidb-data/src/serde/indexer.rs
@ -0,0 +1,337 @@
+use meilidb_core::DocumentId;
+use serde::ser;
+use serde::Serialize;
+
+use crate::database::RawIndex;
+use crate::indexer::Indexer as RawIndexer;
+use crate::schema::SchemaAttr;
+use super::{SerializerError, ConvertToString};
+
+pub struct Indexer<'a> {
+    pub attribute: SchemaAttr,
+    pub indexer: &'a mut RawIndexer,
+    pub document_id: DocumentId,
+}
+
+impl<'a> ser::Serializer for Indexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+    type SerializeSeq = SeqIndexer<'a>;
+    type SerializeTuple = TupleIndexer<'a>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = MapIndexer<'a>;
+    type SerializeStruct = StructSerializer<'a>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    fn serialize_bool(self, value: bool) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "boolean" })
+    }
+
+    fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
+        self.indexer.index_text(self.document_id, self.attribute, text);
+        Ok(())
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.indexer.index_text(self.document_id, self.attribute, &text);
+        Ok(())
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnindexableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        let indexer = SeqIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        let indexer = TupleIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        let indexer = MapIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "struct" })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "struct variant" })
+    }
+}
+
+pub struct SeqIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct MapIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeMap for MapIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = key.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct StructSerializer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeStruct for StructSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key_text = key.to_owned();
+        let value_text = value.serialize(ConvertToString)?;
+        self.texts.push(key_text);
+        self.texts.push(value_text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct TupleIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: Serialize
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
--- a/meilidb-data/src/serde/mod.rs
+++ b/meilidb-data/src/serde/mod.rs
@ -0,0 +1,97 @@
+macro_rules! forward_to_unserializable_type {
+    ($($ty:ident => $se_method:ident,)*) => {
+        $(
+            fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
+                Err(SerializerError::UnserializableType { type_name: "$ty" })
+            }
+        )*
+    }
+}
+
+mod convert_to_number;
+mod convert_to_string;
+mod deserializer;
+mod extract_document_id;
+mod indexer;
+mod serializer;
+
+pub use self::deserializer::Deserializer;
+pub use self::extract_document_id::extract_document_id;
+pub use self::convert_to_string::ConvertToString;
+pub use self::convert_to_number::ConvertToNumber;
+pub use self::indexer::Indexer;
+pub use self::serializer::Serializer;
+
+use std::{fmt, error::Error};
+use rmp_serde::encode::Error as RmpError;
+use serde::ser;
+use crate::number::ParseNumberError;
+
+#[derive(Debug)]
+pub enum SerializerError {
+    DocumentIdNotFound,
+    RmpError(RmpError),
+    SledError(sled::Error),
+    ParseNumberError(ParseNumberError),
+    UnserializableType { type_name: &'static str },
+    UnindexableType { type_name: &'static str },
+    UnrankableType { type_name: &'static str },
+    Custom(String),
+}
+
+impl ser::Error for SerializerError {
+    fn custom<T: fmt::Display>(msg: T) -> Self {
+        SerializerError::Custom(msg.to_string())
+    }
+}
+
+impl fmt::Display for SerializerError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            SerializerError::DocumentIdNotFound => {
+                write!(f, "serialized document does not have an id according to the schema")
+            }
+            SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
+            SerializerError::SledError(e) => write!(f, "sled related error: {}", e),
+            SerializerError::ParseNumberError(e) => {
+                write!(f, "error while trying to parse a number: {}", e)
+            },
+            SerializerError::UnserializableType { type_name } => {
+                write!(f, "{} are not a serializable type", type_name)
+            },
+            SerializerError::UnindexableType { type_name } => {
+                write!(f, "{} are not an indexable type", type_name)
+            },
+            SerializerError::UnrankableType { type_name } => {
+                write!(f, "{} types can not be used for ranking", type_name)
+            },
+            SerializerError::Custom(s) => f.write_str(s),
+        }
+    }
+}
+
+impl Error for SerializerError {}
+
+impl From<String> for SerializerError {
+    fn from(value: String) -> SerializerError {
+        SerializerError::Custom(value)
+    }
+}
+
+impl From<RmpError> for SerializerError {
+    fn from(error: RmpError) -> SerializerError {
+        SerializerError::RmpError(error)
+    }
+}
+
+impl From<sled::Error> for SerializerError {
+    fn from(error: sled::Error) -> SerializerError {
+        SerializerError::SledError(error)
+    }
+}
+
+impl From<ParseNumberError> for SerializerError {
+    fn from(error: ParseNumberError) -> SerializerError {
+        SerializerError::ParseNumberError(error)
+    }
+}
--- a/meilidb-data/src/serde/serializer.rs
+++ b/meilidb-data/src/serde/serializer.rs
@ -0,0 +1,295 @@
+use meilidb_core::DocumentId;
+use serde::ser;
+
+use crate::database::RawIndex;
+use crate::ranked_map::RankedMap;
+use crate::indexer::Indexer as RawIndexer;
+use crate::schema::{Schema, SchemaAttr};
+use super::{SerializerError, ConvertToString, ConvertToNumber, Indexer};
+
+pub struct Serializer<'a> {
+    pub schema: &'a Schema,
+    pub index: &'a RawIndex,
+    pub indexer: &'a mut RawIndexer,
+    pub ranked_map: &'a mut RankedMap,
+    pub document_id: DocumentId,
+}
+
+impl<'a> ser::Serializer for Serializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = MapSerializer<'a>;
+    type SerializeStruct = StructSerializer<'a>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    forward_to_unserializable_type! {
+        bool => serialize_bool,
+        char => serialize_char,
+
+        i8  => serialize_i8,
+        i16 => serialize_i16,
+        i32 => serialize_i32,
+        i64 => serialize_i64,
+
+        u8  => serialize_u8,
+        u16 => serialize_u16,
+        u32 => serialize_u32,
+        u64 => serialize_u64,
+
+        f32 => serialize_f32,
+        f64 => serialize_f64,
+    }
+
+    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "str" })
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        Ok(MapSerializer {
+            schema: self.schema,
+            document_id: self.document_id,
+            index: self.index,
+            indexer: self.indexer,
+            ranked_map: self.ranked_map,
+            current_key_name: None,
+        })
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Ok(StructSerializer {
+            schema: self.schema,
+            document_id: self.document_id,
+            index: self.index,
+            indexer: self.indexer,
+            ranked_map: self.ranked_map,
+        })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
+    }
+}
+
+pub struct MapSerializer<'a> {
+    schema: &'a Schema,
+    document_id: DocumentId,
+    index: &'a RawIndex,
+    indexer: &'a mut RawIndexer,
+    ranked_map: &'a mut RankedMap,
+    current_key_name: Option<String>,
+}
+
+impl<'a> ser::SerializeMap for MapSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+        self.current_key_name = Some(key);
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key = self.current_key_name.take().unwrap();
+        self.serialize_entry(&key, value)
+    }
+
+    fn serialize_entry<K: ?Sized, V: ?Sized>(
+        &mut self,
+        key: &K,
+        value: &V,
+    ) -> Result<(), Self::Error>
+    where K: ser::Serialize, V: ser::Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+
+        serialize_value(
+            self.schema,
+            self.document_id,
+            self.index,
+            self.indexer,
+            self.ranked_map,
+            &key,
+            value,
+        )
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(())
+    }
+}
+
+pub struct StructSerializer<'a> {
+    schema: &'a Schema,
+    document_id: DocumentId,
+    index: &'a RawIndex,
+    indexer: &'a mut RawIndexer,
+    ranked_map: &'a mut RankedMap,
+}
+
+impl<'a> ser::SerializeStruct for StructSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        serialize_value(
+            self.schema,
+            self.document_id,
+            self.index,
+            self.indexer,
+            self.ranked_map,
+            key,
+            value,
+        )
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(())
+    }
+}
+
+fn serialize_value<T: ?Sized>(
+    schema: &Schema,
+    document_id: DocumentId,
+    index: &RawIndex,
+    indexer: &mut RawIndexer,
+    ranked_map: &mut RankedMap,
+    key: &str,
+    value: &T,
+) -> Result<(), SerializerError>
+where T: ser::Serialize,
+{
+    if let Some(attr) = schema.attribute(key) {
+        let props = schema.props(attr);
+
+        if props.is_stored() {
+            let value = rmp_serde::to_vec_named(value)?;
+            index.set_document_attribute(document_id, attr, value)?;
+        }
+
+        if props.is_indexed() {
+            let indexer = Indexer {
+                attribute: attr,
+                indexer: indexer,
+                document_id: document_id,
+            };
+            value.serialize(indexer)?;
+        }
+
+        if props.is_ranked() {
+            let key = (document_id, attr);
+            let number = value.serialize(ConvertToNumber)?;
+            ranked_map.insert(key, number);
+        }
+    }
+
+    Ok(())
+}
--- a/meilidb-tokenizer/Cargo.toml
+++ b/meilidb-tokenizer/Cargo.toml
@ -0,0 +1,8 @@
+[package]
+name = "meilidb-tokenizer"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+slice-group-by = "0.2.4"
--- a/meilidb-tokenizer/src/lib.rs
+++ b/meilidb-tokenizer/src/lib.rs
@ -0,0 +1,295 @@
+use std::iter::Peekable;
+use slice_group_by::StrGroupBy;
+use self::SeparatorCategory::*;
+
+pub fn is_cjk(c: char) -> bool {
+    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
+    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
+    (c >= '\u{3040}' && c <= '\u{309f}') ||
+    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
+    (c >= '\u{3100}' && c <= '\u{312f}') ||
+    (c >= '\u{3200}' && c <= '\u{32ff}') ||
+    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
+    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
+    (c >= '\u{f900}' && c <= '\u{faff}')
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum SeparatorCategory {
+    Soft,
+    Hard,
+}
+
+impl SeparatorCategory {
+    fn merge(self, other: SeparatorCategory) -> SeparatorCategory {
+        if let (Soft, Soft) = (self, other) { Soft } else { Hard }
+    }
+
+    fn to_usize(self) -> usize {
+        match self {
+            Soft => 1,
+            Hard => 8,
+        }
+    }
+}
+
+fn is_separator(c: char) -> bool {
+    classify_separator(c).is_some()
+}
+
+fn classify_separator(c: char) -> Option<SeparatorCategory> {
+    match c {
+        ' ' | '\'' | '"' => Some(Soft),
+        '.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Hard),
+        _ => None,
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum CharCategory {
+    Separator(SeparatorCategory),
+    Cjk,
+    Other,
+}
+
+fn classify_char(c: char) -> CharCategory {
+    if let Some(category) = classify_separator(c) {
+        CharCategory::Separator(category)
+    } else if is_cjk(c) {
+        CharCategory::Cjk
+    } else {
+        CharCategory::Other
+    }
+}
+
+fn is_str_word(s: &str) -> bool {
+    !s.chars().any(is_separator)
+}
+
+fn same_group_category(a: char, b: char) -> bool {
+    match (classify_char(a), classify_char(b)) {
+        (CharCategory::Cjk, _) | (_, CharCategory::Cjk) => false,
+        (CharCategory::Separator(_), CharCategory::Separator(_)) => true,
+        (a, b) => a == b,
+    }
+}
+
+// fold the number of chars along with the index position
+fn chars_count_index((n, _): (usize, usize), (i, c): (usize, char)) -> (usize, usize) {
+    (n + 1, i + c.len_utf8())
+}
+
+pub fn split_query_string(query: &str) -> impl Iterator<Item=&str> {
+    Tokenizer::new(query).map(|t| t.word)
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct Token<'a> {
+    pub word: &'a str,
+    pub word_index: usize,
+    pub char_index: usize,
+}
+
+pub struct Tokenizer<'a> {
+    inner: &'a str,
+    word_index: usize,
+    char_index: usize,
+}
+
+impl<'a> Tokenizer<'a> {
+    pub fn new(string: &str) -> Tokenizer {
+        // skip every separator and set `char_index`
+        // to the number of char trimmed
+        let (count, index) = string.char_indices()
+                                   .take_while(|(_, c)| is_separator(*c))
+                                   .fold((0, 0), chars_count_index);
+
+        Tokenizer {
+            inner: &string[index..],
+            word_index: 0,
+            char_index: count,
+        }
+    }
+}
+
+impl<'a> Iterator for Tokenizer<'a> {
+    type Item = Token<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut iter = self.inner.linear_group_by(same_group_category).peekable();
+
+        while let (Some(string), next_string) = (iter.next(), iter.peek()) {
+            let (count, index) = string.char_indices().fold((0, 0), chars_count_index);
+
+            if !is_str_word(string) {
+                self.word_index += string.chars()
+                                         .filter_map(classify_separator)
+                                         .fold(Soft, |a, x| a.merge(x))
+                                         .to_usize();
+                self.char_index += count;
+                self.inner = &self.inner[index..];
+                continue;
+            }
+
+            let token = Token {
+                word: string,
+                word_index: self.word_index,
+                char_index: self.char_index,
+            };
+
+            if next_string.filter(|s| is_str_word(s)).is_some() {
+                self.word_index += 1;
+            }
+
+            self.char_index += count;
+            self.inner = &self.inner[index..];
+
+            return Some(token);
+        }
+
+        self.inner = "";
+        None
+    }
+}
+
+pub struct SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    inner: I,
+    current: Option<Peekable<Tokenizer<'a>>>,
+    word_offset: usize,
+    char_offset: usize,
+}
+
+impl<'a, I> SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    pub fn new(mut iter: I) -> SeqTokenizer<'a, I> {
+        let current = iter.next().map(|s| Tokenizer::new(s).peekable());
+        SeqTokenizer {
+            inner: iter,
+            current: current,
+            word_offset: 0,
+            char_offset: 0,
+        }
+    }
+}
+
+impl<'a, I> Iterator for SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    type Item = Token<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match &mut self.current {
+            Some(current) => {
+                match current.next() {
+                    Some(token) => {
+                        // we must apply the word and char offsets
+                        // to the token before returning it
+                        let token = Token {
+                            word: token.word,
+                            word_index: token.word_index + self.word_offset,
+                            char_index: token.char_index + self.char_offset,
+                        };
+
+                        // if this is the last iteration on this text
+                        // we must save the offsets for next texts
+                        if current.peek().is_none() {
+                            let hard_space = SeparatorCategory::Hard.to_usize();
+                            self.word_offset = token.word_index + hard_space;
+                            self.char_offset = token.char_index + hard_space;
+                        }
+
+                        Some(token)
+                    },
+                    None => {
+                        // no more words in this text we must
+                        // start tokenizing the next text
+                        self.current = self.inner.next().map(|s| Tokenizer::new(s).peekable());
+                        self.next()
+                    },
+                }
+            },
+            // no more texts available
+            None => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn easy() {
+        let mut tokenizer = Tokenizer::new("salut");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "salut", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo    ");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard() {
+        let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "wtf", word_index: 16, char_index: 12 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 18 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 32, char_index: 24 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard_long_chars() {
+        let mut tokenizer = Tokenizer::new(" .? yo 😂. aïe");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard_kanjis() {
+        let mut tokenizer = Tokenizer::new("\u{2ec4}lolilol\u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 2, char_index: 8 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("\u{2ec4}\u{2ed3}\u{2ef2} lolilol - hello    \u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ed3}", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ef2}", word_index: 2, char_index: 2 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 3, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "hello", word_index: 11, char_index: 14 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 12, char_index: 23 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+}
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@ -0,0 +1,27 @@
+[package]
+edition = "2018"
+name = "meilidb"
+version = "0.3.1"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+
+[dependencies]
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+meilidb-data = { path = "../meilidb-data", version = "0.1.0" }
+meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
+
+[features]
+default = []
+i128 = ["meilidb-core/i128"]
+nightly = ["meilidb-core/nightly"]
+
+[dev-dependencies]
+csv = "1.0.7"
+env_logger = "0.6.1"
+jemallocator = "0.1.9"
+quickcheck = "0.8.2"
+rand = "0.6.5"
+rand_xorshift = "0.1.1"
+serde = { version = "1.0.90", features = ["derive"] }
+structopt = "0.2.15"
+tempfile = "3.0.7"
+termcolor = "1.0.4"
--- a/meilidb/examples/create-database.rs
+++ b/meilidb/examples/create-database.rs
@ -9,11 +9,10 @@ use std::error::Error;
 use std::borrow::Cow;
 use std::fs::File;

-use serde_derive::{Serialize, Deserialize};
+use serde::{Serialize, Deserialize};
 use structopt::StructOpt;

-use meilidb::database::{Database, Schema};
-use meilidb::tokenizer::DefaultBuilder;
+use meilidb_data::{Database, Schema};

 #[derive(Debug, StructOpt)]
 pub struct Opt {
@ -51,9 +50,9 @@ fn index(
    stop_words: &HashSet<String>,
 ) -> Result<Database, Box<Error>>
 {
-    let database = Database::create(database_path)?;
+    let database = Database::start_default(database_path)?;

-    database.create_index("default", &schema)?;
+    let index = database.create_index("default".to_string(), schema.clone())?;

    let mut rdr = csv::Reader::from_path(csv_data_path)?;
    let mut raw_record = csv::StringRecord::new();
@ -63,8 +62,7 @@ fn index(
    let mut end_of_file = false;

    while !end_of_file {
-        let tokenizer_builder = DefaultBuilder::new();
-        let mut update = database.start_update("default")?;
+        let mut update = index.documents_addition();

        loop {
            end_of_file = !rdr.read_record(&mut raw_record)?;
@ -78,7 +76,7 @@ fn index(
                }
            };

-            update.update_document(&document, &tokenizer_builder, &stop_words)?;
+            update.update_document(&document)?;

            print!("\rindexing document {}", i);
            i += 1;
@ -91,7 +89,7 @@ fn index(
        println!();

        println!("committing update...");
-        database.commit_update(update)?;
+        update.finalize()?;
    }

    Ok(database)
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@ -2,19 +2,19 @@
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;

 use std::collections::btree_map::{BTreeMap, Entry};
+use std::collections::{HashMap, HashSet};
 use std::iter::FromIterator;
 use std::io::{self, Write};
-use std::time::Instant;
+use std::time::{Instant, Duration};
 use std::path::PathBuf;
 use std::error::Error;

-use hashbrown::{HashMap, HashSet};
 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
 use structopt::StructOpt;
+use meilidb_core::Match;

-use meilidb::database::schema::SchemaAttr;
-use meilidb::database::Database;
-use meilidb::Match;
+use meilidb_data::schema::SchemaAttr;
+use meilidb_data::Database;

 #[derive(Debug, StructOpt)]
 pub struct Opt {
@ -138,12 +138,19 @@ fn main() -> Result<(), Box<Error>> {
    let opt = Opt::from_args();

    let start = Instant::now();
-    let database = Database::open(&opt.database_path)?;
-    println!("database prepared for you in {:.2?}", start.elapsed());
+    let database = Database::start_default(&opt.database_path)?;

    let mut buffer = String::new();
    let input = io::stdin();

+    let index = database.open_index("default")?.unwrap();
+    let schema = index.schema();
+
+    println!("database prepared for you in {:.2?}", start.elapsed());
+
+    let fields = opt.displayed_fields.iter().map(String::as_str);
+    let fields = HashSet::from_iter(fields);
+
    loop {
        print!("Searching for: ");
        io::stdout().flush()?;
@ -151,32 +158,28 @@ fn main() -> Result<(), Box<Error>> {
        if input.read_line(&mut buffer)? == 0 { break }
        let query = buffer.trim_end_matches('\n');

-        let view = database.view("default")?;
-        let schema = view.schema();
+        let start_total = Instant::now();

-        let start = Instant::now();
-
-        let builder = view.query_builder();
+        let builder = index.query_builder();
        let documents = builder.query(query, 0..opt.number_results);

+        let mut retrieve_duration = Duration::default();
+
        let number_of_documents = documents.len();
        for mut doc in documents {

            doc.matches.sort_unstable_by_key(|m| (m.char_index, m.char_index));

-            match view.document_by_id::<Document>(doc.id) {
-                Ok(document) => {
-                    for name in &opt.displayed_fields {
-                        let attr = match schema.attribute(name) {
-                            Some(attr) => attr,
-                            None => continue,
-                        };
-                        let text = match document.get(name) {
-                            Some(text) => text,
-                            None => continue,
-                        };
+            let start_retrieve = Instant::now();
+            let result = index.document::<Document>(Some(&fields), doc.id);
+            retrieve_duration += start_retrieve.elapsed();

+            match result {
+                Ok(Some(document)) => {
+                    for (name, text) in document {
                        print!("{}: ", name);
+
+                        let attr = schema.attribute(&name).unwrap();
                        let matches = doc.matches.iter()
                                        .filter(|m| SchemaAttr::new(m.attribute) == attr)
                                        .cloned();
@ -186,6 +189,7 @@ fn main() -> Result<(), Box<Error>> {
                        println!();
                    }
                },
+                Ok(None) => eprintln!("missing document"),
                Err(e) => eprintln!("{}", e),
            }

@ -202,7 +206,8 @@ fn main() -> Result<(), Box<Error>> {
            println!();
        }

-        eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start.elapsed());
+        eprintln!("document field retrieve took {:.2?}", retrieve_duration);
+        eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
        buffer.clear();
    }

--- a/meilidb/src/common_words.rs
+++ b/meilidb/src/common_words.rs
--- a/meilidb/src/lib.rs
+++ b/meilidb/src/lib.rs
@ -0,0 +1,7 @@
+#![cfg_attr(feature = "nightly", feature(test))]
+
+mod common_words;
+mod sort_by_attr;
+
+pub use self::sort_by_attr::SortByAttr;
+pub use self::common_words::CommonWords;
--- a/src/rank/criterion/sort_by_attr.rs
+++ b/src/rank/criterion/sort_by_attr.rs
@ -2,10 +2,9 @@ use std::cmp::Ordering;
 use std::error::Error;
 use std::fmt;

-use crate::database::schema::{Schema, SchemaAttr};
-use crate::rank::criterion::Criterion;
-use crate::database::RankedMap;
-use crate::rank::RawDocument;
+use meilidb_core::criterion::Criterion;
+use meilidb_core::RawDocument;
+use meilidb_data::{Schema, SchemaAttr, RankedMap};

 /// An helper struct that permit to sort documents by
 /// some of their stored attributes.
--- a/src/database/config.rs
+++ b/src/database/config.rs
@ -1,46 +0,0 @@
-use std::collections::{HashSet, HashMap};
-use serde_derive::{Serialize, Deserialize};
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum RankingOrdering {
-    Asc,
-    Dsc
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct AccessToken {
-    pub read_key: String,
-    pub write_key: String,
-    pub admin_key: String,
-}
-
-
-#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct Config {
-    pub stop_words: Option<HashSet<String>>,
-    pub ranking_order: Option<Vec<String>>,
-    pub distinct_field: Option<String>,
-    pub ranking_rules: Option<HashMap<String, RankingOrdering>>,
-    pub access_token: Option<AccessToken>,
-}
-
-impl Config {
-    pub fn update_with(&mut self, new: Config) {
-        if let Some(stop_words) = new.stop_words {
-            self.stop_words = Some(stop_words);
-        };
-        if let Some(ranking_order) = new.ranking_order {
-            self.ranking_order = Some(ranking_order);
-        };
-        if let Some(distinct_field) = new.distinct_field {
-            self.distinct_field = Some(distinct_field);
-        };
-        if let Some(ranking_rules) = new.ranking_rules {
-            self.ranking_rules = Some(ranking_rules);
-        };
-        if let Some(access_token) = new.access_token {
-            self.access_token = Some(access_token);
-        };
-    }
-}
--- a/src/database/document_key.rs
+++ b/src/database/document_key.rs
@ -1,149 +0,0 @@
-use std::io::{Cursor, Read, Write};
-use std::mem::size_of;
-use std::fmt;
-
-use byteorder::{BigEndian, WriteBytesExt, ReadBytesExt};
-
-use crate::database::schema::SchemaAttr;
-use crate::DocumentId;
-
-const DOC_KEY_LEN:      usize = 4 + size_of::<u64>();
-const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::<u16>();
-
-#[derive(Copy, Clone)]
-pub struct DocumentKey([u8; DOC_KEY_LEN]);
-
-impl DocumentKey {
-    pub fn new(id: DocumentId) -> DocumentKey {
-        let mut buffer = [0; DOC_KEY_LEN];
-
-        let mut wtr = Cursor::new(&mut buffer[..]);
-        wtr.write_all(b"doc-").unwrap();
-        wtr.write_u64::<BigEndian>(id.0).unwrap();
-
-        DocumentKey(buffer)
-    }
-
-    pub fn from_bytes(mut bytes: &[u8]) -> DocumentKey {
-        assert!(bytes.len() >= DOC_KEY_LEN);
-        assert_eq!(&bytes[..4], b"doc-");
-
-        let mut buffer = [0; DOC_KEY_LEN];
-        bytes.read_exact(&mut buffer).unwrap();
-
-        DocumentKey(buffer)
-    }
-
-    pub fn with_attribute(&self, attr: SchemaAttr) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(self.document_id(), attr)
-    }
-
-    pub fn with_attribute_min(&self) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(self.document_id(), SchemaAttr::min())
-    }
-
-    pub fn with_attribute_max(&self) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(self.document_id(), SchemaAttr::max())
-    }
-
-    pub fn document_id(&self) -> DocumentId {
-        let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap();
-        DocumentId(id)
-    }
-}
-
-impl AsRef<[u8]> for DocumentKey {
-    fn as_ref(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl fmt::Debug for DocumentKey {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("DocumentKey")
-            .field("document_id", &self.document_id())
-            .finish()
-    }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]);
-
-impl DocumentKeyAttr {
-    pub fn new(id: DocumentId, attr: SchemaAttr) -> DocumentKeyAttr {
-        let mut buffer = [0; DOC_KEY_ATTR_LEN];
-        let DocumentKey(raw_key) = DocumentKey::new(id);
-
-        let mut wtr = Cursor::new(&mut buffer[..]);
-        wtr.write_all(&raw_key).unwrap();
-        wtr.write_all(b"-").unwrap();
-        wtr.write_u16::<BigEndian>(attr.0).unwrap();
-
-        DocumentKeyAttr(buffer)
-    }
-
-    pub fn with_attribute_min(id: DocumentId) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(id, SchemaAttr::min())
-    }
-
-    pub fn with_attribute_max(id: DocumentId) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(id, SchemaAttr::max())
-    }
-
-    pub fn from_bytes(mut bytes: &[u8]) -> DocumentKeyAttr {
-        assert!(bytes.len() >= DOC_KEY_ATTR_LEN);
-        assert_eq!(&bytes[..4], b"doc-");
-
-        let mut buffer = [0; DOC_KEY_ATTR_LEN];
-        bytes.read_exact(&mut buffer).unwrap();
-
-        DocumentKeyAttr(buffer)
-    }
-
-    pub fn document_id(&self) -> DocumentId {
-        let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap();
-        DocumentId(id)
-    }
-
-    pub fn attribute(&self) -> SchemaAttr {
-        let offset = 4 + size_of::<u64>() + 1;
-        let value = (&self.0[offset..]).read_u16::<BigEndian>().unwrap();
-        SchemaAttr::new(value)
-    }
-
-    pub fn into_document_key(self) -> DocumentKey {
-        DocumentKey::new(self.document_id())
-    }
-}
-
-impl AsRef<[u8]> for DocumentKeyAttr {
-    fn as_ref(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl fmt::Debug for DocumentKeyAttr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("DocumentKeyAttr")
-            .field("document_id", &self.document_id())
-            .field("attribute", &self.attribute().0)
-            .finish()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn keep_as_ref_order() {
-        for (a, b) in (0..).zip(1..).take(u16::max_value() as usize - 1) {
-            let id = DocumentId(0);
-            let a = DocumentKeyAttr::new(id, SchemaAttr(a));
-            let b = DocumentKeyAttr::new(id, SchemaAttr(b));
-
-            assert!(a < b);
-            assert!(a.as_ref() < b.as_ref());
-        }
-    }
-}
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@ -1,911 +0,0 @@
-use std::time::Instant;
-use std::error::Error;
-use std::ffi::OsStr;
-use std::sync::Arc;
-use std::fs;
-use std::path::{Path, PathBuf};
-use std::sync::atomic::{AtomicBool, Ordering};
-use std::ops::{Deref, DerefMut};
-
-use rocksdb::rocksdb_options::{DBOptions, ColumnFamilyOptions};
-use rocksdb::rocksdb::{Writable, Snapshot};
-use rocksdb::{DB, MergeOperands};
-use size_format::SizeFormatterBinary;
-use arc_swap::ArcSwap;
-use lockfree::map::Map;
-use hashbrown::HashMap;
-use log::{info, error, warn};
-
-use crate::database::schema::SchemaAttr;
-use crate::shared_data_cursor::FromSharedDataCursor;
-use crate::write_to_bytes::WriteToBytes;
-use crate::DocumentId;
-
-use self::update::{ReadIndexEvent, ReadRankedMapEvent};
-
-pub use self::config::Config;
-pub use self::document_key::{DocumentKey, DocumentKeyAttr};
-pub use self::view::{DatabaseView, DocumentIter};
-pub use self::update::Update;
-pub use self::serde::SerializerError;
-pub use self::schema::Schema;
-pub use self::index::Index;
-pub use self::number::{Number, ParseNumberError};
-
-pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>;
-
-const DATA_INDEX:      &[u8] = b"data-index";
-const DATA_RANKED_MAP: &[u8] = b"data-ranked-map";
-const DATA_SCHEMA:     &[u8] = b"data-schema";
-const CONFIG:          &[u8] = b"config";
-
-pub mod config;
-pub mod schema;
-pub(crate) mod index;
-mod number;
-mod document_key;
-mod serde;
-mod update;
-mod view;
-
-fn retrieve_data_schema<D>(snapshot: &Snapshot<D>) -> Result<Schema, Box<Error>>
-where D: Deref<Target=DB>
-{
-    match snapshot.get(DATA_SCHEMA)? {
-        Some(vector) => Ok(Schema::read_from_bin(&*vector)?),
-        None => Err(String::from("BUG: no schema found in the database").into()),
-    }
-}
-
-fn retrieve_data_index<D>(snapshot: &Snapshot<D>) -> Result<Index, Box<Error>>
-where D: Deref<Target=DB>
-{
-    let start = Instant::now();
-    let vector = snapshot.get(DATA_INDEX)?;
-    info!("loading index from kv-store took {:.2?}", start.elapsed());
-
-    match vector {
-        Some(vector) => {
-            let start = Instant::now();
-
-            let bytes = vector.as_ref().to_vec();
-            info!("index size is {}B", SizeFormatterBinary::new(bytes.len() as u64));
-
-            let event = ReadIndexEvent::from_bytes(bytes)?;
-            let index = event.updated_documents().expect("BUG: invalid event deserialized");
-
-            info!("loading index from bytes took {:.2?}", start.elapsed());
-
-            Ok(index)
-        },
-        None => Ok(Index::default()),
-    }
-}
-
-fn retrieve_data_ranked_map<D>(snapshot: &Snapshot<D>) -> Result<RankedMap, Box<Error>>
-where D: Deref<Target=DB>,
-{
-    let start = Instant::now();
-    let vector = snapshot.get(DATA_RANKED_MAP)?;
-    info!("loading ranked map from kv-store took {:.2?}", start.elapsed());
-
-    match vector {
-        Some(vector) => {
-            let start = Instant::now();
-
-            let bytes = vector.as_ref().to_vec();
-            info!("ranked map size is {}B", SizeFormatterBinary::new(bytes.len() as u64));
-
-            let event = ReadRankedMapEvent::from_bytes(bytes)?;
-            let ranked_map = event.updated_documents().expect("BUG: invalid event deserialized");
-
-            info!("loading ranked map from bytes took {:.2?}", start.elapsed());
-
-            Ok(ranked_map)
-        },
-        None => Ok(RankedMap::new()),
-    }
-}
-
-fn retrieve_config<D>(snapshot: &Snapshot<D>) -> Result<Config, Box<Error>>
-where D: Deref<Target=DB>,
-{
-    match snapshot.get(CONFIG)? {
-        Some(vector) => Ok(bincode::deserialize(&*vector)?),
-        None => Ok(Config::default()),
-    }
-}
-
-fn merge_indexes(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
-    use self::update::ReadIndexEvent::{self, *};
-    use self::update::WriteIndexEvent;
-
-    let mut index = Index::default();
-    for bytes in existing.into_iter().chain(operands) {
-        match ReadIndexEvent::from_bytes(bytes.to_vec()).unwrap() {
-            RemovedDocuments(d) => index = index.remove_documents(d.as_ref()),
-            UpdatedDocuments(i) => index = index.union(&i),
-        }
-    }
-
-    WriteIndexEvent::UpdatedDocuments(&index).into_bytes()
-}
-
-fn merge_ranked_maps(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
-    use self::update::ReadRankedMapEvent::{self, *};
-    use self::update::WriteRankedMapEvent;
-
-    let mut ranked_map = RankedMap::default();
-    for bytes in existing.into_iter().chain(operands) {
-        match ReadRankedMapEvent::from_bytes(bytes.to_vec()).unwrap() {
-            RemovedDocuments(d) => ranked_map.retain(|(k, _), _| !d.as_ref().binary_search(k).is_ok()),
-            UpdatedDocuments(i) => ranked_map.extend(i),
-        }
-    }
-
-    WriteRankedMapEvent::UpdatedDocuments(&ranked_map).into_bytes()
-}
-
-fn merge_operator(key: &[u8], existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
-    match key {
-        DATA_INDEX      => merge_indexes(existing, operands),
-        DATA_RANKED_MAP => merge_ranked_maps(existing, operands),
-        key             => panic!("The merge operator does not support merging {:?}", key),
-    }
-}
-
-pub struct IndexUpdate {
-    index: String,
-    update: Update,
-}
-
-impl Deref for IndexUpdate {
-    type Target = Update;
-
-    fn deref(&self) -> &Update {
-        &self.update
-    }
-}
-
-impl DerefMut for IndexUpdate {
-    fn deref_mut(&mut self) -> &mut Update {
-        &mut self.update
-    }
-}
-
-struct DatabaseIndex {
-    db: Arc<DB>,
-
-    // This view is updated each time the DB ingests an update.
-    view: ArcSwap<DatabaseView<Arc<DB>>>,
-
-    // The path of the mdb folder stored on disk.
-    path: PathBuf,
-
-    // must_die false by default, must be set as true when the Index is dropped.
-    // It is used to erase the folder saved on disk when the user request to delete an index.
-    must_die: AtomicBool,
-}
-
-impl DatabaseIndex {
-    fn create<P: AsRef<Path>>(path: P, schema: &Schema) -> Result<DatabaseIndex, Box<Error>> {
-        let path = path.as_ref();
-        if path.exists() {
-            return Err(format!("File already exists at path: {}, cannot create database.",
-                                path.display()).into())
-        }
-
-        let path_lossy = path.to_string_lossy();
-        let mut opts = DBOptions::new();
-        opts.create_if_missing(true);
-        // opts.error_if_exists(true); // FIXME pull request that
-
-        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data merge operator", merge_operator);
-
-        let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?;
-
-        let mut schema_bytes = Vec::new();
-        schema.write_to_bin(&mut schema_bytes)?;
-        db.put(DATA_SCHEMA, &schema_bytes)?;
-
-        let db = Arc::new(db);
-        let snapshot = Snapshot::new(db.clone());
-        let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?));
-
-        Ok(DatabaseIndex {
-            db: db,
-            view: view,
-            path: path.to_path_buf(),
-            must_die: AtomicBool::new(false)
-        })
-    }
-
-    fn open<P: AsRef<Path>>(path: P) -> Result<DatabaseIndex, Box<Error>> {
-        let path_lossy = path.as_ref().to_string_lossy();
-
-        let mut opts = DBOptions::new();
-        opts.create_if_missing(false);
-
-        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data merge operator", merge_operator);
-
-        let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?;
-
-        // FIXME create a generic function to do that !
-        let _schema = match db.get(DATA_SCHEMA)? {
-            Some(value) => Schema::read_from_bin(&*value)?,
-            None => return Err(String::from("Database does not contain a schema").into()),
-        };
-
-        let db = Arc::new(db);
-        let snapshot = Snapshot::new(db.clone());
-        let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?));
-
-        Ok(DatabaseIndex {
-            db: db,
-            view: view,
-            path: path.as_ref().to_path_buf(),
-            must_die: AtomicBool::new(false)
-        })
-    }
-
-    fn must_die(&self) {
-        self.must_die.store(true, Ordering::Relaxed)
-    }
-
-    fn start_update(&self) -> Result<Update, Box<Error>> {
-        let schema = match self.db.get(DATA_SCHEMA)? {
-            Some(value) => Schema::read_from_bin(&*value)?,
-            None => panic!("Database does not contain a schema"),
-        };
-
-        Ok(Update::new(schema))
-    }
-
-    fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
-        let batch = update.build()?;
-        self.db.write(batch)?;
-        self.db.compact_range(None, None);
-        self.db.flush(true)?;
-
-        let snapshot = Snapshot::new(self.db.clone());
-        let view = Arc::new(DatabaseView::new(snapshot)?);
-        self.view.store(view.clone());
-
-        Ok(view)
-    }
-
-    fn view(&self) -> Arc<DatabaseView<Arc<DB>>> {
-        self.view.load()
-    }
-
-    fn get_config(&self) -> Config {
-        self.view().config().clone()
-    }
-
-    fn update_config(&self, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{
-        let data = bincode::serialize(&config)?;
-        self.db.put(CONFIG, &data)?;
-
-        let snapshot = Snapshot::new(self.db.clone());
-        let view = Arc::new(DatabaseView::new(snapshot)?);
-        self.view.store(view.clone());
-
-        Ok(view)
-    }
-
-    fn path(&self) -> &Path {
-        self.path.as_path()
-    }
-}
-
-impl Drop for DatabaseIndex {
-    fn drop(&mut self) {
-        if self.must_die.load(Ordering::Relaxed) {
-            if let Err(err) = fs::remove_dir_all(&self.path) {
-                error!("Impossible to remove mdb when Database is dropped; {}", err);
-            }
-        }
-    }
-}
-
-pub struct Database {
-    indexes: Map<String, Arc<DatabaseIndex>>,
-    path: PathBuf,
-}
-
-impl Database {
-    pub fn create<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
-        Ok(Database {
-            indexes: Map::new(),
-            path: path.as_ref().to_path_buf(),
-        })
-    }
-
-    pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
-        let entries = fs::read_dir(&path)?;
-
-        let indexes = Map::new();
-        for entry in entries {
-            let path = match entry {
-                Ok(p) => p.path(),
-                Err(err) => {
-                    warn!("Impossible to retrieve the path from an entry; {}", err);
-                    continue
-                }
-            };
-
-            let name = match path.file_stem().and_then(OsStr::to_str) {
-                Some(name) => name.to_owned(),
-                None => continue
-            };
-
-            let db = match DatabaseIndex::open(path.clone()) {
-                Ok(db) => db,
-                Err(err) => {
-                    warn!("Impossible to open the database; {}", err);
-                    continue
-                }
-            };
-
-            info!("Load database {}", name);
-            indexes.insert(name, Arc::new(db));
-        }
-
-        Ok(Database {
-            indexes: indexes,
-            path: path.as_ref().to_path_buf(),
-        })
-    }
-
-    pub fn create_index(&self, name: &str, schema: &Schema) -> Result<(), Box<Error>> {
-        let index_path = self.path.join(name);
-
-        if index_path.exists() {
-            return Err("Index already exists".into());
-        }
-
-        let index = DatabaseIndex::create(index_path, schema)?;
-        self.indexes.insert(name.to_owned(), Arc::new(index));
-
-        Ok(())
-    }
-
-    pub fn delete_index(&self, name: &str) -> Result<(), Box<Error>> {
-        let index_guard = self.indexes.remove(name).ok_or("Index not found")?;
-        index_guard.val().must_die();
-
-        Ok(())
-    }
-
-    pub fn list_indexes(&self) -> Vec<String> {
-        self.indexes.iter().map(|g| g.key().clone()).collect()
-    }
-
-    pub fn start_update(&self, index: &str) -> Result<IndexUpdate, Box<Error>> {
-        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
-        let update = index_guard.val().start_update()?;
-
-        Ok(IndexUpdate { index: index.to_owned(), update })
-    }
-
-    pub fn commit_update(&self, update: IndexUpdate)-> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
-        let index_guard = self.indexes.get(&update.index).ok_or("Index not found")?;
-
-        index_guard.val().commit_update(update.update)
-    }
-
-    pub fn view(&self, index: &str) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
-        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
-
-        Ok(index_guard.val().view())
-    }
-
-    pub fn get_config(&self, index: &str) -> Result<Config, Box<Error>> {
-        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
-
-        Ok(index_guard.val().get_config())
-    }
-
-    pub fn update_config(&self, index: &str, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{
-        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
-
-        Ok(index_guard.val().update_config(config)?)
-    }
-
-    pub fn path(&self) -> &Path {
-        self.path.as_path()
-    }
-
-    pub fn index_path(&self, index: &str) -> Result<PathBuf, Box<Error>> {
-        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
-        let path = index_guard.val().path();
-        Ok(path.to_path_buf())
-    }
-
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::HashSet;
-    use std::error::Error;
-
-    use serde_derive::{Serialize, Deserialize};
-
-    use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
-    use crate::tokenizer::DefaultBuilder;
-
-    use super::*;
-
-    #[test]
-    fn ingest_one_easy_update() -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let meilidb_path = dir.path().join("meilidb.mdb");
-        let meilidb_index_name = "default";
-
-        #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
-        struct SimpleDoc {
-            id: u64,
-            title: String,
-            description: String,
-            timestamp: u64,
-        }
-
-        let schema = {
-            let mut builder = SchemaBuilder::with_identifier("id");
-            builder.new_attribute("id", STORED);
-            builder.new_attribute("title", STORED | INDEXED);
-            builder.new_attribute("description", STORED | INDEXED);
-            builder.new_attribute("timestamp", STORED);
-            builder.build()
-        };
-
-        let database = Database::create(&meilidb_path)?;
-
-        database.create_index(meilidb_index_name, &schema)?;
-
-        let doc0 = SimpleDoc {
-            id: 0,
-            title: String::from("I am a title"),
-            description: String::from("I am a description"),
-            timestamp: 1234567,
-        };
-        let doc1 = SimpleDoc {
-            id: 1,
-            title: String::from("I am the second title"),
-            description: String::from("I am the second description"),
-            timestamp: 7654321,
-        };
-
-        let tokenizer_builder = DefaultBuilder::new();
-        let mut builder = database.start_update(meilidb_index_name)?;
-
-        let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
-        let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
-
-        let view = database.commit_update(builder)?;
-
-        let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
-        let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
-
-        assert_eq!(doc0, de_doc0);
-        assert_eq!(doc1, de_doc1);
-
-        Ok(dir.close()?)
-    }
-
-    #[test]
-    fn ingest_two_easy_updates() -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let meilidb_path = dir.path().join("meilidb.mdb");
-        let meilidb_index_name = "default";
-
-        #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
-        struct SimpleDoc {
-            id: u64,
-            title: String,
-            description: String,
-            timestamp: u64,
-        }
-
-        let schema = {
-            let mut builder = SchemaBuilder::with_identifier("id");
-            builder.new_attribute("id", STORED);
-            builder.new_attribute("title", STORED | INDEXED);
-            builder.new_attribute("description", STORED | INDEXED);
-            builder.new_attribute("timestamp", STORED);
-            builder.build()
-        };
-
-        let database = Database::create(&meilidb_path)?;
-
-        database.create_index(meilidb_index_name, &schema)?;
-
-        let doc0 = SimpleDoc {
-            id: 0,
-            title: String::from("I am a title"),
-            description: String::from("I am a description"),
-            timestamp: 1234567,
-        };
-        let doc1 = SimpleDoc {
-            id: 1,
-            title: String::from("I am the second title"),
-            description: String::from("I am the second description"),
-            timestamp: 7654321,
-        };
-        let doc2 = SimpleDoc {
-            id: 2,
-            title: String::from("I am the third title"),
-            description: String::from("I am the third description"),
-            timestamp: 7654321,
-        };
-        let doc3 = SimpleDoc {
-            id: 3,
-            title: String::from("I am the fourth title"),
-            description: String::from("I am the fourth description"),
-            timestamp: 7654321,
-        };
-
-        let tokenizer_builder = DefaultBuilder::new();
-
-        let mut builder = database.start_update(meilidb_index_name)?;
-        let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
-        let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
-        database.commit_update(builder)?;
-
-        let mut builder = database.start_update(meilidb_index_name)?;
-        let docid2 = builder.update_document(&doc2, &tokenizer_builder, &stop_words)?;
-        let docid3 = builder.update_document(&doc3, &tokenizer_builder, &stop_words)?;
-        let view = database.commit_update(builder)?;
-
-        let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
-        let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
-
-        assert_eq!(doc0, de_doc0);
-        assert_eq!(doc1, de_doc1);
-
-        let de_doc2: SimpleDoc = view.document_by_id(docid2)?;
-        let de_doc3: SimpleDoc = view.document_by_id(docid3)?;
-
-        assert_eq!(doc2, de_doc2);
-        assert_eq!(doc3, de_doc3);
-
-        Ok(dir.close()?)
-    }
-}
-
-#[cfg(all(feature = "nightly", test))]
-mod bench {
-    extern crate test;
-
-    use std::collections::HashSet;
-    use std::error::Error;
-    use std::iter::repeat_with;
-    use self::test::Bencher;
-
-    use rand::distributions::Alphanumeric;
-    use rand_xorshift::XorShiftRng;
-    use rand::{Rng, SeedableRng};
-    use serde_derive::Serialize;
-    use rand::seq::SliceRandom;
-
-    use crate::tokenizer::DefaultBuilder;
-    use crate::database::schema::*;
-
-    use super::*;
-
-    fn random_sentences<R: Rng>(number: usize, rng: &mut R) -> String {
-        let mut words = String::new();
-
-        for i in 0..number {
-            let word_len = rng.gen_range(1, 12);
-            let iter = repeat_with(|| rng.sample(Alphanumeric)).take(word_len);
-            words.extend(iter);
-
-            if i == number - 1 { // last word
-                let final_ = [".", "?", "!", "..."].choose(rng).cloned();
-                words.extend(final_);
-            } else {
-                let middle = [",", ", "].choose(rng).cloned();
-                words.extend(middle);
-            }
-        }
-
-        words
-    }
-
-    #[bench]
-    fn open_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let index_name = "default";
-
-        let database = Database::create(&db_path)?;
-        database.create_index(index_name, &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = database.start_update(index_name)?;
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..300 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        database.commit_update(builder)?;
-
-        drop(database);
-
-        bench.iter(|| {
-            let database = Database::open(db_path.clone()).unwrap();
-            test::black_box(|| database);
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    fn open_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let index_name = "default";
-
-        let database = Database::create(&db_path)?;
-        database.create_index(index_name, &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = database.start_update(index_name)?;
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..3000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        database.commit_update(builder)?;
-
-        drop(database);
-
-        bench.iter(|| {
-            let database = Database::open(db_path.clone()).unwrap();
-            test::black_box(|| database);
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    #[ignore]
-    fn open_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let index_name = "default";
-
-        let database = Database::create(&db_path)?;
-        database.create_index(index_name, &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = database.start_update(index_name)?;
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..30_000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        database.commit_update(builder)?;
-
-        drop(database);
-
-        bench.iter(|| {
-            let database = Database::open(db_path.clone()).unwrap();
-            test::black_box(|| database);
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    fn search_oneletter_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let index_name = "default";
-
-        let database = Database::create(&db_path)?;
-        database.create_index(index_name, &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = database.start_update(index_name)?;
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..300 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let view = database.commit_update(builder)?;
-
-        bench.iter(|| {
-            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().query(q, 0..20);
-                test::black_box(|| documents);
-            }
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    fn search_oneletter_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let index_name = "default";
-
-        let database = Database::create(&db_path)?;
-        database.create_index(index_name, &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = database.start_update(index_name)?;
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..3000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let view = database.commit_update(builder)?;
-
-        bench.iter(|| {
-            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().query(q, 0..20);
-                test::black_box(|| documents);
-            }
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    #[ignore]
-    fn search_oneletter_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let index_name = "default";
-
-        let database = Database::create(&db_path)?;
-        database.create_index(index_name, &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = database.start_update(index_name)?;
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..30_000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let view = database.commit_update(builder)?;
-
-        bench.iter(|| {
-            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().query(q, 0..20);
-                test::black_box(|| documents);
-            }
-        });
-
-        Ok(())
-    }
-}
--- a/src/database/number.rs
+++ b/src/database/number.rs
@ -1,98 +0,0 @@
-use std::cmp::Ordering;
-use std::str::FromStr;
-use std::fmt;
-
-use serde_derive::{Serialize, Deserialize};
-
-#[derive(Serialize, Deserialize)]
-#[derive(Debug, Copy, Clone)]
-pub enum Number {
-    Unsigned(u64),
-    Signed(i64),
-    Float(f64),
-}
-
-impl FromStr for Number {
-    type Err = ParseNumberError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        if let Ok(unsigned) = u64::from_str(s) {
-            return Ok(Number::Unsigned(unsigned))
-        }
-
-        if let Ok(signed) = i64::from_str(s) {
-            return Ok(Number::Signed(signed))
-        }
-
-        if let Ok(float) = f64::from_str(s) {
-            if float == 0.0 || float.is_normal() {
-                return Ok(Number::Float(float))
-            }
-        }
-
-        Err(ParseNumberError)
-    }
-}
-
-impl PartialOrd for Number {
-    fn partial_cmp(&self, other: &Number) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for Number {
-    fn cmp(&self, other: &Number) -> Ordering {
-        use Number::*;
-        match (self, other) {
-            (Unsigned(s), Unsigned(o)) => s.cmp(o),
-            (Unsigned(s), Signed(o)) => {
-                let s = i128::from(*s);
-                let o = i128::from(*o);
-                s.cmp(&o)
-            },
-            (Unsigned(s), Float(o)) => {
-                let s = *s as f64;
-                s.partial_cmp(&o).unwrap_or(Ordering::Equal)
-            },
-
-            (Signed(s), Unsigned(o)) => {
-                let s = i128::from(*s);
-                let o = i128::from(*o);
-                s.cmp(&o)
-            },
-            (Signed(s), Signed(o)) => s.cmp(o),
-            (Signed(s), Float(o)) => {
-                let s = *s as f64;
-                s.partial_cmp(o).unwrap_or(Ordering::Equal)
-            },
-
-            (Float(s), Unsigned(o)) => {
-                let o = *o as f64;
-                s.partial_cmp(&o).unwrap_or(Ordering::Equal)
-            },
-            (Float(s), Signed(o)) => {
-                let o = *o as f64;
-                s.partial_cmp(&o).unwrap_or(Ordering::Equal)
-            },
-            (Float(s), Float(o)) => {
-                s.partial_cmp(o).unwrap_or(Ordering::Equal)
-            },
-        }
-    }
-}
-
-impl PartialEq for Number {
-    fn eq(&self, other: &Number) -> bool {
-        self.cmp(other) == Ordering::Equal
-    }
-}
-
-impl Eq for Number { }
-
-pub struct ParseNumberError;
-
-impl fmt::Display for ParseNumberError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str("can not parse number")
-    }
-}
--- a/src/database/serde/deserializer.rs
+++ b/src/database/serde/deserializer.rs
@ -1,186 +0,0 @@
-use std::error::Error;
-use std::ops::Deref;
-use std::fmt;
-
-use rocksdb::rocksdb::{DB, Snapshot, SeekKey};
-use rocksdb::rocksdb_options::ReadOptions;
-use serde::forward_to_deserialize_any;
-use serde::de::value::MapDeserializer;
-use serde::de::{self, Visitor, IntoDeserializer};
-
-use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
-use crate::database::schema::Schema;
-use crate::DocumentId;
-
-pub struct Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    snapshot: &'a Snapshot<D>,
-    schema: &'a Schema,
-    document_id: DocumentId,
-}
-
-impl<'a, D> Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    pub fn new(snapshot: &'a Snapshot<D>, schema: &'a Schema, doc: DocumentId) -> Self {
-        Deserializer { snapshot, schema, document_id: doc }
-    }
-}
-
-impl<'de, 'a, 'b, D> de::Deserializer<'de> for &'b mut Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    type Error = DeserializerError;
-
-    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_map(visitor)
-    }
-
-    forward_to_deserialize_any! {
-        bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
-        bytes byte_buf unit_struct tuple_struct
-        identifier tuple ignored_any option newtype_struct enum struct
-    }
-
-    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        let mut options = ReadOptions::new();
-        let lower = DocumentKey::new(self.document_id);
-        let upper = lower.with_attribute_max();
-        options.set_iterate_lower_bound(lower.as_ref());
-        options.set_iterate_upper_bound(upper.as_ref());
-
-        let mut iter = self.snapshot.iter_opt(options);
-        iter.seek(SeekKey::Start);
-
-        if iter.kv().is_none() {
-            // FIXME return an error
-        }
-
-        let iter = iter.map(|(key, value)| {
-            // retrieve the schema attribute name
-            // from the schema attribute number
-            let document_key_attr = DocumentKeyAttr::from_bytes(&key);
-            let schema_attr = document_key_attr.attribute();
-            let attribute_name = self.schema.attribute_name(schema_attr);
-            (attribute_name, Value(value))
-        });
-
-        let map_deserializer = MapDeserializer::new(iter);
-        visitor.visit_map(map_deserializer)
-    }
-}
-
-struct Value(Vec<u8>);
-
-impl<'de> IntoDeserializer<'de, DeserializerError> for Value {
-    type Deserializer = Self;
-
-    fn into_deserializer(self) -> Self::Deserializer {
-        self
-    }
-}
-
-macro_rules! forward_to_bincode_values {
-    ($($ty:ident => $de_method:ident,)*) => {
-        $(
-            fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-                where V: de::Visitor<'de>
-            {
-                match bincode::deserialize::<$ty>(&self.0) {
-                    Ok(val) => val.into_deserializer().$de_method(visitor),
-                    Err(e) => Err(de::Error::custom(e)),
-                }
-            }
-        )*
-    }
-}
-
-impl<'de, 'a> de::Deserializer<'de> for Value {
-    type Error = DeserializerError;
-
-    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.0.into_deserializer().deserialize_any(visitor)
-    }
-
-    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_string(visitor)
-    }
-
-    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        match bincode::deserialize::<String>(&self.0) {
-            Ok(val) => val.into_deserializer().deserialize_string(visitor),
-            Err(e) => Err(de::Error::custom(e)),
-        }
-    }
-
-    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_byte_buf(visitor)
-    }
-
-    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        match bincode::deserialize::<Vec<u8>>(&self.0) {
-            Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor),
-            Err(e) => Err(de::Error::custom(e)),
-        }
-    }
-
-    forward_to_bincode_values! {
-        char => deserialize_char,
-        bool => deserialize_bool,
-
-        u8  => deserialize_u8,
-        u16 => deserialize_u16,
-        u32 => deserialize_u32,
-        u64 => deserialize_u64,
-
-        i8  => deserialize_i8,
-        i16 => deserialize_i16,
-        i32 => deserialize_i32,
-        i64 => deserialize_i64,
-
-        f32 => deserialize_f32,
-        f64 => deserialize_f64,
-    }
-
-    forward_to_deserialize_any! {
-        unit seq map
-        unit_struct tuple_struct
-        identifier tuple ignored_any option newtype_struct enum struct
-    }
-}
-
-#[derive(Debug)]
-pub enum DeserializerError {
-    Custom(String),
-}
-
-impl de::Error for DeserializerError {
-    fn custom<T: fmt::Display>(msg: T) -> Self {
-        DeserializerError::Custom(msg.to_string())
-    }
-}
-
-impl fmt::Display for DeserializerError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            DeserializerError::Custom(s) => f.write_str(&s),
-        }
-    }
-}
-
-impl Error for DeserializerError {}
--- a/src/database/serde/indexer_serializer.rs
+++ b/src/database/serde/indexer_serializer.rs
@ -1,194 +0,0 @@
-use std::collections::HashSet;
-
-use serde::Serialize;
-use serde::ser;
-
-use crate::database::update::DocumentUpdate;
-use crate::database::serde::SerializerError;
-use crate::database::schema::SchemaAttr;
-use crate::tokenizer::TokenizerBuilder;
-use crate::tokenizer::Token;
-use crate::{is_cjk, DocumentId, DocIndex};
-
-pub struct IndexerSerializer<'a, 'b, B> {
-    pub tokenizer_builder: &'a B,
-    pub update: &'a mut DocumentUpdate<'b>,
-    pub document_id: DocumentId,
-    pub attribute: SchemaAttr,
-    pub stop_words: &'a HashSet<String>,
-}
-
-impl<'a, 'b, B> ser::Serializer for IndexerSerializer<'a, 'b, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
-
-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
-
-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
-
-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
-
-        f32 => serialize_f32,
-        f64 => serialize_f64,
-    }
-
-    fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
-        for token in self.tokenizer_builder.build(v) {
-            let Token { word, word_index, char_index } = token;
-            let document_id = self.document_id;
-
-            // FIXME must u32::try_from instead
-            let attribute = self.attribute.0;
-            let word_index = word_index as u16;
-
-            // insert the exact representation
-            let word_lower = word.to_lowercase();
-            let length = word.chars().count() as u16;
-
-            if self.stop_words.contains(&word_lower) { continue }
-
-            // and the unidecoded lowercased version
-            if !word_lower.chars().any(is_cjk) {
-                let word_unidecoded = unidecode::unidecode(word).to_lowercase();
-                let word_unidecoded = word_unidecoded.trim();
-                if word_lower != word_unidecoded {
-                    let char_index = char_index as u16;
-                    let char_length = length;
-
-                    let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
-                    self.update.insert_doc_index(word_unidecoded.as_bytes().to_vec(), doc_index)?;
-                }
-            }
-
-            let char_index = char_index as u16;
-            let char_length = length;
-
-            let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
-            self.update.insert_doc_index(word_lower.into_bytes(), doc_index)?;
-        }
-        Ok(())
-    }
-
-    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
-    }
-
-    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
-    }
-
-    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
-    }
-
-    fn serialize_unit_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str
-    ) -> Result<Self::Ok, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
-    }
-
-    fn serialize_newtype_struct<T: ?Sized>(
-        self,
-        _name: &'static str,
-        value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        value.serialize(self)
-    }
-
-    fn serialize_newtype_variant<T: ?Sized>(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
-    }
-
-    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "seq" })
-    }
-
-    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
-    }
-
-    fn serialize_tuple_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
-    }
-
-    fn serialize_tuple_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
-    }
-
-    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "map" })
-    }
-
-    fn serialize_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct" })
-    }
-
-    fn serialize_struct_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStructVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
-    }
-}
--- a/src/database/serde/mod.rs
+++ b/src/database/serde/mod.rs
@ -1,65 +0,0 @@
-use std::collections::hash_map::DefaultHasher;
-use std::hash::{Hash, Hasher};
-use std::error::Error;
-use std::fmt;
-
-use serde::ser;
-
-macro_rules! forward_to_unserializable_type {
-    ($($ty:ident => $se_method:ident,)*) => {
-        $(
-            fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
-                Err(SerializerError::UnserializableType { name: "$ty" })
-            }
-        )*
-    }
-}
-
-pub mod find_id;
-pub mod key_to_string;
-pub mod value_to_number;
-pub mod serializer;
-pub mod indexer_serializer;
-pub mod deserializer;
-
-pub fn calculate_hash<T: Hash>(t: &T) -> u64 {
-    let mut s = DefaultHasher::new();
-    t.hash(&mut s);
-    s.finish()
-}
-
-#[derive(Debug)]
-pub enum SerializerError {
-    DocumentIdNotFound,
-    UnserializableType { name: &'static str },
-    Custom(String),
-}
-
-impl ser::Error for SerializerError {
-    fn custom<T: fmt::Display>(msg: T) -> Self {
-        SerializerError::Custom(msg.to_string())
-    }
-}
-
-impl fmt::Display for SerializerError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            SerializerError::DocumentIdNotFound => {
-                write!(f, "serialized document does not have an id according to the schema")
-            }
-            SerializerError::UnserializableType { name } => {
-                write!(f, "Only struct and map types are considered valid documents and
-                           can be serialized, not {} types directly.", name)
-            },
-            SerializerError::Custom(s) => f.write_str(&s),
-        }
-    }
-}
-
-impl Error for SerializerError {}
-
-impl From<String> for SerializerError {
-    fn from(value: String) -> SerializerError {
-        SerializerError::Custom(value)
-    }
-}
--- a/src/database/serde/serializer.rs
+++ b/src/database/serde/serializer.rs
@ -1,296 +0,0 @@
-use std::collections::HashSet;
-
-use serde::Serialize;
-use serde::ser;
-
-use crate::database::serde::indexer_serializer::IndexerSerializer;
-use crate::database::serde::key_to_string::KeyToStringSerializer;
-use crate::database::serde::value_to_number::ValueToNumberSerializer;
-use crate::database::update::DocumentUpdate;
-use crate::database::serde::SerializerError;
-use crate::tokenizer::TokenizerBuilder;
-use crate::database::schema::Schema;
-use crate::DocumentId;
-
-pub struct Serializer<'a, 'b, B> {
-    pub schema: &'a Schema,
-    pub update: &'a mut DocumentUpdate<'b>,
-    pub document_id: DocumentId,
-    pub tokenizer_builder: &'a B,
-    pub stop_words: &'a HashSet<String>,
-}
-
-impl<'a, 'b, B> ser::Serializer for Serializer<'a, 'b, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = MapSerializer<'a, 'b, B>;
-    type SerializeStruct = StructSerializer<'a, 'b, B>;
-    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
-
-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
-
-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
-
-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
-
-        f32 => serialize_f32,
-        f64 => serialize_f64,
-    }
-
-    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "str" })
-    }
-
-    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
-    }
-
-    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
-    }
-
-    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
-    }
-
-    fn serialize_unit_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str
-    ) -> Result<Self::Ok, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
-    }
-
-    fn serialize_newtype_struct<T: ?Sized>(
-        self,
-        _name: &'static str,
-        value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        value.serialize(self)
-    }
-
-    fn serialize_newtype_variant<T: ?Sized>(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
-    }
-
-    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
-    }
-
-    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
-    }
-
-    fn serialize_tuple_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
-    }
-
-    fn serialize_tuple_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
-    }
-
-    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Ok(MapSerializer {
-            schema: self.schema,
-            document_id: self.document_id,
-            update: self.update,
-            tokenizer_builder: self.tokenizer_builder,
-            stop_words: self.stop_words,
-            current_key_name: None,
-        })
-    }
-
-    fn serialize_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStruct, Self::Error>
-    {
-        Ok(StructSerializer {
-            schema: self.schema,
-            document_id: self.document_id,
-            update: self.update,
-            tokenizer_builder: self.tokenizer_builder,
-            stop_words: self.stop_words,
-        })
-    }
-
-    fn serialize_struct_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStructVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
-    }
-}
-
-pub struct MapSerializer<'a, 'b, B> {
-    pub schema: &'a Schema,
-    pub document_id: DocumentId,
-    pub update: &'a mut DocumentUpdate<'b>,
-    pub tokenizer_builder: &'a B,
-    pub stop_words: &'a HashSet<String>,
-    pub current_key_name: Option<String>,
-}
-
-impl<'a, 'b, B> ser::SerializeMap for MapSerializer<'a, 'b, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-
-    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
-    where T: Serialize,
-    {
-        let key = key.serialize(KeyToStringSerializer)?;
-        self.current_key_name = Some(key);
-        Ok(())
-    }
-
-    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
-    where T: Serialize,
-    {
-        let key = self.current_key_name.take().unwrap();
-        self.serialize_entry(&key, value)
-    }
-
-    fn serialize_entry<K: ?Sized, V: ?Sized>(
-        &mut self,
-        key: &K,
-        value: &V,
-    ) -> Result<(), Self::Error>
-    where K: Serialize, V: Serialize,
-    {
-        let key = key.serialize(KeyToStringSerializer)?;
-
-        if let Some(attr) = self.schema.attribute(key) {
-            let props = self.schema.props(attr);
-            if props.is_stored() {
-                let value = bincode::serialize(value).unwrap();
-                self.update.insert_attribute_value(attr, &value)?;
-            }
-            if props.is_indexed() {
-                let serializer = IndexerSerializer {
-                    update: self.update,
-                    tokenizer_builder: self.tokenizer_builder,
-                    document_id: self.document_id,
-                    attribute: attr,
-                    stop_words: self.stop_words,
-                };
-                value.serialize(serializer)?;
-            }
-            if props.is_ranked() {
-                let number = value.serialize(ValueToNumberSerializer)?;
-                self.update.register_ranked_attribute(attr, number)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn end(self) -> Result<Self::Ok, Self::Error> {
-        Ok(())
-    }
-}
-
-pub struct StructSerializer<'a, 'b, B> {
-    pub schema: &'a Schema,
-    pub document_id: DocumentId,
-    pub update: &'a mut DocumentUpdate<'b>,
-    pub tokenizer_builder: &'a B,
-    pub stop_words: &'a HashSet<String>,
-}
-
-impl<'a, 'b, B> ser::SerializeStruct for StructSerializer<'a, 'b, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-
-    fn serialize_field<T: ?Sized>(
-        &mut self,
-        key: &'static str,
-        value: &T
-    ) -> Result<(), Self::Error>
-    where T: Serialize,
-    {
-        if let Some(attr) = self.schema.attribute(key) {
-            let props = self.schema.props(attr);
-            if props.is_stored() {
-                let value = bincode::serialize(value).unwrap();
-                self.update.insert_attribute_value(attr, &value)?;
-            }
-            if props.is_indexed() {
-                let serializer = IndexerSerializer {
-                    update: self.update,
-                    tokenizer_builder: self.tokenizer_builder,
-                    document_id: self.document_id,
-                    attribute: attr,
-                    stop_words: self.stop_words,
-                };
-                value.serialize(serializer)?;
-            }
-            if props.is_ranked() {
-                let integer = value.serialize(ValueToNumberSerializer)?;
-                self.update.register_ranked_attribute(attr, integer)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn end(self) -> Result<Self::Ok, Self::Error> {
-        Ok(())
-    }
-}
--- a/src/database/update/index_event.rs
+++ b/src/database/update/index_event.rs
@ -1,55 +0,0 @@
-use std::error::Error;
-
-use byteorder::{ReadBytesExt, WriteBytesExt};
-
-use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
-use crate::write_to_bytes::WriteToBytes;
-use crate::database::Index;
-use crate::data::DocIds;
-
-pub enum WriteIndexEvent<'a> {
-    RemovedDocuments(&'a DocIds),
-    UpdatedDocuments(&'a Index),
-}
-
-impl<'a> WriteToBytes for WriteIndexEvent<'a> {
-    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        match self {
-            WriteIndexEvent::RemovedDocuments(doc_ids) => {
-                let _ = bytes.write_u8(0);
-                doc_ids.write_to_bytes(bytes);
-            },
-            WriteIndexEvent::UpdatedDocuments(index) => {
-                let _ = bytes.write_u8(1);
-                index.write_to_bytes(bytes);
-            }
-        }
-    }
-}
-
-pub enum ReadIndexEvent {
-    RemovedDocuments(DocIds),
-    UpdatedDocuments(Index),
-}
-
-impl ReadIndexEvent {
-    pub fn updated_documents(self) -> Option<Index> {
-        use ReadIndexEvent::*;
-        match self {
-            RemovedDocuments(_) => None,
-            UpdatedDocuments(index) => Some(index),
-        }
-    }
-}
-
-impl FromSharedDataCursor for ReadIndexEvent {
-    type Error = Box<Error>;
-
-    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> {
-        match cursor.read_u8()? {
-            0 => DocIds::from_shared_data_cursor(cursor).map(ReadIndexEvent::RemovedDocuments),
-            1 => Index::from_shared_data_cursor(cursor).map(ReadIndexEvent::UpdatedDocuments),
-            _ => unreachable!(),
-        }
-    }
-}
--- a/src/database/update/mod.rs
+++ b/src/database/update/mod.rs
@ -1,239 +0,0 @@
-use std::collections::{HashSet, BTreeMap};
-use std::error::Error;
-
-use rocksdb::rocksdb::{Writable, WriteBatch};
-use hashbrown::hash_map::HashMap;
-use sdset::{Set, SetBuf};
-use serde::Serialize;
-
-use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
-use crate::database::serde::serializer::Serializer;
-use crate::database::serde::SerializerError;
-use crate::database::schema::SchemaAttr;
-use crate::database::schema::Schema;
-use crate::database::index::IndexBuilder;
-use crate::database::{DATA_INDEX, DATA_RANKED_MAP};
-use crate::database::{RankedMap, Number};
-use crate::tokenizer::TokenizerBuilder;
-use crate::write_to_bytes::WriteToBytes;
-use crate::data::DocIds;
-use crate::{DocumentId, DocIndex};
-
-pub use self::index_event::{ReadIndexEvent, WriteIndexEvent};
-pub use self::ranked_map_event::{ReadRankedMapEvent, WriteRankedMapEvent};
-
-mod index_event;
-mod ranked_map_event;
-
-pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec
-
-pub struct Update {
-    schema: Schema,
-    raw_builder: RawUpdateBuilder,
-}
-
-impl Update {
-    pub(crate) fn new(schema: Schema) -> Update {
-        Update { schema, raw_builder: RawUpdateBuilder::new() }
-    }
-
-    pub fn update_document<T, B>(
-        &mut self,
-        document: T,
-        tokenizer_builder: &B,
-        stop_words: &HashSet<String>,
-    ) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
-          B: TokenizerBuilder,
-    {
-        let document_id = self.schema.document_id(&document)?;
-
-        let serializer = Serializer {
-            schema: &self.schema,
-            document_id: document_id,
-            tokenizer_builder: tokenizer_builder,
-            update: &mut self.raw_builder.document_update(document_id)?,
-            stop_words: stop_words,
-        };
-
-        document.serialize(serializer)?;
-
-        Ok(document_id)
-    }
-
-    pub fn remove_document<T>(&mut self, document: T) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
-    {
-        let document_id = self.schema.document_id(&document)?;
-        self.raw_builder.document_update(document_id)?.remove()?;
-        Ok(document_id)
-    }
-
-    pub(crate) fn build(self) -> Result<WriteBatch, Box<Error>> {
-        self.raw_builder.build()
-    }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq)]
-enum UpdateType {
-    Updated,
-    Deleted,
-}
-
-use UpdateType::{Updated, Deleted};
-
-pub struct RawUpdateBuilder {
-    documents_update: HashMap<DocumentId, UpdateType>,
-    documents_ranked_fields: RankedMap,
-    indexed_words: BTreeMap<Token, Vec<DocIndex>>,
-    batch: WriteBatch,
-}
-
-impl RawUpdateBuilder {
-    pub fn new() -> RawUpdateBuilder {
-        RawUpdateBuilder {
-            documents_update: HashMap::new(),
-            documents_ranked_fields: HashMap::new(),
-            indexed_words: BTreeMap::new(),
-            batch: WriteBatch::new(),
-        }
-    }
-
-    pub fn document_update(&mut self, document_id: DocumentId) -> Result<DocumentUpdate, SerializerError> {
-        use serde::ser::Error;
-
-        match self.documents_update.get(&document_id) {
-            Some(Deleted) | None => Ok(DocumentUpdate { document_id, inner: self }),
-            Some(Updated) => Err(SerializerError::custom(
-                "This document has already been removed and cannot be updated in the same update"
-            )),
-        }
-    }
-
-    pub fn build(self) -> Result<WriteBatch, Box<Error>> {
-        // create the list of all the removed documents
-        let removed_documents = {
-            let mut document_ids = Vec::new();
-            for (id, update_type) in self.documents_update {
-                if update_type == Deleted {
-                    document_ids.push(id);
-                }
-            }
-
-            document_ids.sort_unstable();
-            let setbuf = SetBuf::new_unchecked(document_ids);
-            DocIds::new(&setbuf)
-        };
-
-        // create the Index of all the document updates
-        let index = {
-            let mut builder = IndexBuilder::new();
-            for (key, mut indexes) in self.indexed_words {
-                indexes.sort_unstable();
-                let indexes = Set::new_unchecked(&indexes);
-                builder.insert(key, indexes).unwrap();
-            }
-            builder.build()
-        };
-
-        // WARN: removed documents must absolutely
-        //       be merged *before* document updates
-
-        // === index ===
-
-        if !removed_documents.is_empty() {
-            // remove the documents using the appropriate IndexEvent
-            let event_bytes = WriteIndexEvent::RemovedDocuments(&removed_documents).into_bytes();
-            self.batch.merge(DATA_INDEX, &event_bytes)?;
-        }
-
-        // update the documents using the appropriate IndexEvent
-        let event_bytes = WriteIndexEvent::UpdatedDocuments(&index).into_bytes();
-        self.batch.merge(DATA_INDEX, &event_bytes)?;
-
-        // === ranked map ===
-
-        if !removed_documents.is_empty() {
-            // update the ranked map using the appropriate RankedMapEvent
-            let event_bytes = WriteRankedMapEvent::RemovedDocuments(&removed_documents).into_bytes();
-            self.batch.merge(DATA_RANKED_MAP, &event_bytes)?;
-        }
-
-        // update the documents using the appropriate IndexEvent
-        let event_bytes = WriteRankedMapEvent::UpdatedDocuments(&self.documents_ranked_fields).into_bytes();
-        self.batch.merge(DATA_RANKED_MAP, &event_bytes)?;
-
-        Ok(self.batch)
-    }
-}
-
-pub struct DocumentUpdate<'a> {
-    document_id: DocumentId,
-    inner: &'a mut RawUpdateBuilder,
-}
-
-impl<'a> DocumentUpdate<'a> {
-    pub fn remove(&mut self) -> Result<(), SerializerError> {
-        use serde::ser::Error;
-
-        if let Updated = self.inner.documents_update.entry(self.document_id).or_insert(Deleted) {
-            return Err(SerializerError::custom(
-                "This document has already been updated and cannot be removed in the same update"
-            ));
-        }
-
-        let start = DocumentKey::new(self.document_id).with_attribute_min();
-        let end = DocumentKey::new(self.document_id).with_attribute_max(); // FIXME max + 1
-        self.inner.batch.delete_range(start.as_ref(), end.as_ref())?;
-
-        Ok(())
-    }
-
-    pub fn insert_attribute_value(&mut self, attr: SchemaAttr, value: &[u8]) -> Result<(), SerializerError> {
-        use serde::ser::Error;
-
-        if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
-            return Err(SerializerError::custom(
-                "This document has already been deleted and cannot be updated in the same update"
-            ));
-        }
-
-        let key = DocumentKeyAttr::new(self.document_id, attr);
-        self.inner.batch.put(key.as_ref(), &value)?;
-
-        Ok(())
-    }
-
-    pub fn insert_doc_index(&mut self, token: Token, doc_index: DocIndex) -> Result<(), SerializerError> {
-        use serde::ser::Error;
-
-        if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
-            return Err(SerializerError::custom(
-                "This document has already been deleted and cannot be updated in the same update"
-            ));
-        }
-
-        self.inner.indexed_words.entry(token).or_insert_with(Vec::new).push(doc_index);
-
-        Ok(())
-    }
-
-    pub fn register_ranked_attribute(
-        &mut self,
-        attr: SchemaAttr,
-        number: Number,
-    ) -> Result<(), SerializerError>
-    {
-        use serde::ser::Error;
-
-        if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
-            return Err(SerializerError::custom(
-                "This document has already been deleted, ranked attributes cannot be added in the same update"
-            ));
-        }
-
-        self.inner.documents_ranked_fields.insert((self.document_id, attr), number);
-
-        Ok(())
-    }
-}
--- a/src/database/update/ranked_map_event.rs
+++ b/src/database/update/ranked_map_event.rs
@ -1,58 +0,0 @@
-use std::error::Error;
-
-use byteorder::{ReadBytesExt, WriteBytesExt};
-
-use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
-use crate::write_to_bytes::WriteToBytes;
-use crate::database::RankedMap;
-use crate::data::DocIds;
-
-pub enum WriteRankedMapEvent<'a> {
-    RemovedDocuments(&'a DocIds),
-    UpdatedDocuments(&'a RankedMap),
-}
-
-impl<'a> WriteToBytes for WriteRankedMapEvent<'a> {
-    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        match self {
-            WriteRankedMapEvent::RemovedDocuments(doc_ids) => {
-                let _ = bytes.write_u8(0);
-                doc_ids.write_to_bytes(bytes);
-            },
-            WriteRankedMapEvent::UpdatedDocuments(ranked_map) => {
-                let _ = bytes.write_u8(1);
-                bincode::serialize_into(bytes, ranked_map).unwrap()
-            }
-        }
-    }
-}
-
-pub enum ReadRankedMapEvent {
-    RemovedDocuments(DocIds),
-    UpdatedDocuments(RankedMap),
-}
-
-impl ReadRankedMapEvent {
-    pub fn updated_documents(self) -> Option<RankedMap> {
-        use ReadRankedMapEvent::*;
-        match self {
-            RemovedDocuments(_) => None,
-            UpdatedDocuments(ranked_map) => Some(ranked_map),
-        }
-    }
-}
-
-impl FromSharedDataCursor for ReadRankedMapEvent {
-    type Error = Box<Error>;
-
-    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> {
-        match cursor.read_u8()? {
-            0 => DocIds::from_shared_data_cursor(cursor).map(ReadRankedMapEvent::RemovedDocuments),
-            1 => {
-                let ranked_map = bincode::deserialize_from(cursor)?;
-                Ok(ReadRankedMapEvent::UpdatedDocuments(ranked_map))
-            },
-            _ => unreachable!(),
-        }
-    }
-}
--- a/src/database/view.rs
+++ b/src/database/view.rs
@ -1,201 +0,0 @@
-use std::error::Error;
-use std::path::Path;
-use std::ops::Deref;
-use std::{fmt, marker};
-
-use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions};
-use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter};
-use serde::de::DeserializeOwned;
-
-use crate::database::{retrieve_data_schema, retrieve_data_index, retrieve_data_ranked_map, retrieve_config};
-use crate::database::serde::deserializer::Deserializer;
-use crate::database::{DocumentKey, DocumentKeyAttr};
-use crate::rank::{QueryBuilder, FilterFunc};
-use crate::database::schema::Schema;
-use crate::database::index::Index;
-use crate::database::RankedMap;
-use crate::database::Config;
-use crate::DocumentId;
-
-pub struct DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    snapshot: Snapshot<D>,
-    index: Index,
-    ranked_map: RankedMap,
-    schema: Schema,
-    config: Config,
-}
-
-impl<D> DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    pub fn new(snapshot: Snapshot<D>) -> Result<DatabaseView<D>, Box<Error>> {
-        let schema = retrieve_data_schema(&snapshot)?;
-        let index = retrieve_data_index(&snapshot)?;
-        let ranked_map = retrieve_data_ranked_map(&snapshot)?;
-        let config = retrieve_config(&snapshot)?;
-        Ok(DatabaseView { snapshot, index, ranked_map, schema, config })
-    }
-
-    pub fn schema(&self) -> &Schema {
-        &self.schema
-    }
-
-    pub fn index(&self) -> &Index {
-        &self.index
-    }
-
-    pub fn ranked_map(&self) -> &RankedMap {
-        &self.ranked_map
-    }
-
-    pub fn into_snapshot(self) -> Snapshot<D> {
-        self.snapshot
-    }
-
-    pub fn snapshot(&self) -> &Snapshot<D> {
-        &self.snapshot
-    }
-
-    pub fn config(&self) -> &Config {
-        &self.config
-    }
-
-    pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
-        Ok(self.snapshot.get(key)?)
-    }
-
-    pub fn dump_all<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<Error>> {
-        let path = path.as_ref().to_string_lossy();
-
-        let env_options = EnvOptions::new();
-        let column_family_options = ColumnFamilyOptions::new();
-        let mut file_writer = SstFileWriter::new(env_options, column_family_options);
-        file_writer.open(&path)?;
-
-        let mut iter = self.snapshot.iter();
-        iter.seek(SeekKey::Start);
-
-        for (key, value) in &mut iter {
-            file_writer.put(&key, &value)?;
-        }
-
-        file_writer.finish()?;
-        Ok(())
-    }
-
-    pub fn query_builder(&self) -> QueryBuilder<FilterFunc> {
-        QueryBuilder::new(self.index())
-    }
-
-    pub fn raw_field_by_document_id(
-        &self,
-        name: &str,
-        id: DocumentId
-    ) -> Result<Option<Vec<u8>>, Box<Error>>
-    {
-        let attr = self.schema.attribute(name).ok_or("field not found")?;
-        let key = DocumentKeyAttr::new(id, attr);
-        let vector = self.snapshot.get(key.as_ref())?;
-
-        Ok(vector.map(|v| v.to_vec()))
-    }
-
-    pub fn document_by_id<T>(&self, id: DocumentId) -> Result<T, Box<Error>>
-    where T: DeserializeOwned,
-    {
-        let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id);
-        Ok(T::deserialize(&mut deserializer)?)
-    }
-
-    pub fn documents_by_id<T, I>(&self, ids: I) -> DocumentIter<D, T, I::IntoIter>
-    where T: DeserializeOwned,
-          I: IntoIterator<Item=DocumentId>,
-    {
-        DocumentIter {
-            database_view: self,
-            document_ids: ids.into_iter(),
-            _phantom: marker::PhantomData,
-        }
-    }
-}
-
-impl<D> fmt::Debug for DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let mut options = ReadOptions::new();
-        let lower = DocumentKey::new(DocumentId(0));
-        options.set_iterate_lower_bound(lower.as_ref());
-
-        let mut iter = self.snapshot.iter_opt(options);
-        iter.seek(SeekKey::Start);
-        let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key));
-
-        if f.alternate() {
-            writeln!(f, "DatabaseView(")?;
-        } else {
-            write!(f, "DatabaseView(")?;
-        }
-
-        self.schema.fmt(f)?;
-
-        if f.alternate() {
-            writeln!(f, ",")?;
-        } else {
-            write!(f, ", ")?;
-        }
-
-        f.debug_list().entries(iter).finish()?;
-
-        write!(f, ")")
-    }
-}
-
-// TODO this is just an iter::Map !!!
-pub struct DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>
-{
-    database_view: &'a DatabaseView<D>,
-    document_ids: I,
-    _phantom: marker::PhantomData<T>,
-}
-
-impl<'a, D, T, I> Iterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: Iterator<Item=DocumentId>,
-{
-    type Item = Result<T, Box<Error>>;
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.document_ids.size_hint()
-    }
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.document_ids.next() {
-            Some(id) => Some(self.database_view.document_by_id(id)),
-            None => None
-        }
-    }
-}
-
-impl<'a, D, T, I> ExactSizeIterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: ExactSizeIterator + Iterator<Item=DocumentId>,
-{ }
-
-impl<'a, D, T, I> DoubleEndedIterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: DoubleEndedIterator + Iterator<Item=DocumentId>,
-{
-    fn next_back(&mut self) -> Option<Self::Item> {
-        match self.document_ids.next_back() {
-            Some(id) => Some(self.database_view.document_by_id(id)),
-            None => None
-        }
-    }
-}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,136 +0,0 @@
-#![cfg_attr(feature = "nightly", feature(test))]
-
-pub mod automaton;
-pub mod database;
-pub mod data;
-pub mod rank;
-pub mod tokenizer;
-mod common_words;
-mod shared_data_cursor;
-mod write_to_bytes;
-
-use serde_derive::{Serialize, Deserialize};
-
-pub use rocksdb;
-
-pub use self::tokenizer::Tokenizer;
-pub use self::common_words::CommonWords;
-
-pub fn is_cjk(c: char) -> bool {
-    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
-    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
-    (c >= '\u{3040}' && c <= '\u{309f}') ||
-    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
-    (c >= '\u{3100}' && c <= '\u{312f}') ||
-    (c >= '\u{3200}' && c <= '\u{32ff}') ||
-    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
-    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
-    (c >= '\u{f900}' && c <= '\u{faff}')
-}
-
-/// Represent an internally generated document unique identifier.
-///
-/// It is used to inform the database the document you want to deserialize.
-/// Helpful for custom ranking.
-#[derive(Serialize, Deserialize)]
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
-pub struct DocumentId(u64);
-
-/// This structure represent the position of a word
-/// in a document and its attributes.
-///
-/// This is stored in the map, generated at index time,
-/// extracted and interpreted at search time.
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(C)]
-pub struct DocIndex {
-    /// The document identifier where the word was found.
-    pub document_id: DocumentId,
-
-    /// The attribute in the document where the word was found
-    /// along with the index in it.
-    pub attribute: u16,
-    pub word_index: u16,
-
-    /// The position in bytes where the word was found
-    /// along with the length of it.
-    ///
-    /// It informs on the original word area in the text indexed
-    /// without needing to run the tokenizer again.
-    pub char_index: u16,
-    pub char_length: u16,
-}
-
-/// This structure represent a matching word with informations
-/// on the location of the word in the document.
-///
-/// The order of the field is important because it defines
-/// the way these structures are ordered between themselves.
-///
-/// The word in itself is not important.
-// TODO do data oriented programming ? very arrays ?
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Match {
-    /// The word index in the query sentence.
-    /// Same as the `attribute_index` but for the query words.
-    ///
-    /// Used to retrieve the automaton that match this word.
-    pub query_index: u32,
-
-    /// The distance the word has with the query word
-    /// (i.e. the Levenshtein distance).
-    pub distance: u8,
-
-    /// The attribute in the document where the word was found
-    /// along with the index in it.
-    pub attribute: u16,
-    pub word_index: u16,
-
-    /// Whether the word that match is an exact match or a prefix.
-    pub is_exact: bool,
-
-    /// The position in bytes where the word was found
-    /// along with the length of it.
-    ///
-    /// It informs on the original word area in the text indexed
-    /// without needing to run the tokenizer again.
-    pub char_index: u16,
-    pub char_length: u16,
-}
-
-impl Match {
-    pub fn zero() -> Self {
-        Match {
-            query_index: 0,
-            distance: 0,
-            attribute: 0,
-            word_index: 0,
-            is_exact: false,
-            char_index: 0,
-            char_length: 0,
-        }
-    }
-
-    pub fn max() -> Self {
-        Match {
-            query_index: u32::max_value(),
-            distance: u8::max_value(),
-            attribute: u16::max_value(),
-            word_index: u16::max_value(),
-            is_exact: true,
-            char_index: u16::max_value(),
-            char_length: u16::max_value(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::mem;
-
-    #[test]
-    fn docindex_mem_size() {
-        assert_eq!(mem::size_of::<DocIndex>(), 16);
-    }
-}
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@ -1,259 +0,0 @@
-use std::mem;
-use crate::is_cjk;
-use self::Separator::*;
-
-pub trait TokenizerBuilder {
-    fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=Token<'a>> + 'a>;
-}
-
-pub struct DefaultBuilder;
-
-impl DefaultBuilder {
-    pub fn new() -> DefaultBuilder {
-        DefaultBuilder
-    }
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub struct Token<'a> {
-    pub word: &'a str,
-    pub word_index: usize,
-    pub char_index: usize,
-}
-
-impl TokenizerBuilder for DefaultBuilder {
-    fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=Token<'a>> + 'a> {
-        Box::new(Tokenizer::new(text))
-    }
-}
-
-pub struct Tokenizer<'a> {
-    word_index: usize,
-    char_index: usize,
-    inner: &'a str,
-}
-
-impl<'a> Tokenizer<'a> {
-    pub fn new(string: &str) -> Tokenizer {
-        let mut char_advance = 0;
-        let mut index_advance = 0;
-        for (n, (i, c)) in string.char_indices().enumerate() {
-            char_advance = n;
-            index_advance = i;
-            if detect_separator(c).is_none() { break }
-        }
-
-        Tokenizer {
-            word_index: 0,
-            char_index: char_advance,
-            inner: &string[index_advance..],
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy)]
-enum Separator {
-    Short,
-    Long,
-}
-
-impl Separator {
-    fn add(self, add: Separator) -> Separator {
-        match (self, add) {
-            (_,     Long)  => Long,
-            (Short, Short) => Short,
-            (Long,  Short) => Long,
-        }
-    }
-
-    fn to_usize(self) -> usize {
-        match self {
-            Short => 1,
-            Long => 8,
-        }
-    }
-}
-
-fn detect_separator(c: char) -> Option<Separator> {
-    match c {
-        '.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Long),
-        ' ' | '\'' | '"' => Some(Short),
-        _                => None,
-    }
-}
-
-impl<'a> Iterator for Tokenizer<'a> {
-    type Item = Token<'a>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let mut start_word = None;
-        let mut distance = None;
-
-        for (i, c) in self.inner.char_indices() {
-            match detect_separator(c) {
-                Some(sep) => {
-                    if let Some(start_word) = start_word {
-                        let (prefix, tail) = self.inner.split_at(i);
-                        let (spaces, word) = prefix.split_at(start_word);
-
-                        self.inner = tail;
-                        self.char_index += spaces.chars().count();
-                        self.word_index += distance.map(Separator::to_usize).unwrap_or(0);
-
-                        let token = Token {
-                            word: word,
-                            word_index: self.word_index,
-                            char_index: self.char_index,
-                        };
-
-                        self.char_index += word.chars().count();
-                        return Some(token)
-                    }
-
-                    distance = Some(distance.map_or(sep, |s| s.add(sep)));
-                },
-                None => {
-                    // if this is a Chinese, a Japanese or a Korean character
-                    // See <http://unicode-table.com>
-                    if is_cjk(c) {
-                        match start_word {
-                            Some(start_word) => {
-                                let (prefix, tail) = self.inner.split_at(i);
-                                let (spaces, word) = prefix.split_at(start_word);
-
-                                self.inner = tail;
-                                self.char_index += spaces.chars().count();
-                                self.word_index += distance.map(Separator::to_usize).unwrap_or(0);
-
-                                let token = Token {
-                                    word: word,
-                                    word_index: self.word_index,
-                                    char_index: self.char_index,
-                                };
-
-                                self.word_index += 1;
-                                self.char_index += word.chars().count();
-
-                                return Some(token)
-                            },
-                            None => {
-                                let (prefix, tail) = self.inner.split_at(i + c.len_utf8());
-                                let (spaces, word) = prefix.split_at(i);
-
-                                self.inner = tail;
-                                self.char_index += spaces.chars().count();
-                                self.word_index += distance.map(Separator::to_usize).unwrap_or(0);
-
-                                let token = Token {
-                                    word: word,
-                                    word_index: self.word_index,
-                                    char_index: self.char_index,
-                                };
-
-                                if tail.chars().next().and_then(detect_separator).is_none() {
-                                    self.word_index += 1;
-                                }
-                                self.char_index += 1;
-
-                                return Some(token)
-                            }
-                        }
-                    }
-
-                    if start_word.is_none() { start_word = Some(i) }
-                },
-            }
-        }
-
-        if let Some(start_word) = start_word {
-            let prefix = mem::replace(&mut self.inner, "");
-            let (spaces, word) = prefix.split_at(start_word);
-
-            let token = Token {
-                word: word,
-                word_index: self.word_index + distance.map(Separator::to_usize).unwrap_or(0),
-                char_index: self.char_index + spaces.chars().count(),
-            };
-            return Some(token)
-        }
-
-        None
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn easy() {
-        let mut tokenizer = Tokenizer::new("salut");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "salut", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), None);
-
-        let mut tokenizer = Tokenizer::new("yo    ");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), None);
-    }
-
-    #[test]
-    fn hard() {
-        let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
-        assert_eq!(tokenizer.next(), None);
-
-        let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "wtf", word_index: 16, char_index: 12 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 18 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 32, char_index: 24 }));
-        assert_eq!(tokenizer.next(), None);
-    }
-
-    #[test]
-    fn hard_long_chars() {
-        let mut tokenizer = Tokenizer::new(" .? yo 😂. aïe");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
-        assert_eq!(tokenizer.next(), None);
-
-        let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
-        assert_eq!(tokenizer.next(), None);
-    }
-
-    #[test]
-    fn hard_kanjis() {
-        let mut tokenizer = Tokenizer::new("\u{2ec4}lolilol\u{2ec7}");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 1, char_index: 1 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 2, char_index: 8 }));
-        assert_eq!(tokenizer.next(), None);
-
-        let mut tokenizer = Tokenizer::new("\u{2ec4}\u{2ed3}\u{2ef2} lolilol - hello    \u{2ec7}");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ed3}", word_index: 1, char_index: 1 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ef2}", word_index: 2, char_index: 2 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 3, char_index: 4 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "hello", word_index: 11, char_index: 14 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 12, char_index: 23 }));
-        assert_eq!(tokenizer.next(), None);
-    }
-}