From b3249d515dce8d06eb25829c6e397b4ea82a41a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 15 Nov 2018 17:55:20 +0100 Subject: [PATCH] feat: Introduce an Index system based on RocksDB --- Cargo.toml | 12 +- examples/csv-indexer.rs | 6 +- examples/json-lines-indexer.rs | 6 +- src/blob/negative_blob.rs | 2 +- src/data/doc_ids.rs | 6 +- src/index.rs | 40 ------- src/index/blob_name.rs | 16 +++ src/index/mod.rs | 175 ++++++++++++++++++++++++++++ src/index/schema.rs | 82 +++++++++++++ src/index/search.rs | 5 + src/index/update/mod.rs | 55 +++++++++ src/index/update/negative_update.rs | 59 ++++++++++ src/index/update/positive_update.rs | 124 ++++++++++++++++++++ src/tokenizer/mod.rs | 78 ++++++------- 14 files changed, 569 insertions(+), 97 deletions(-) delete mode 100644 src/index.rs create mode 100644 src/index/blob_name.rs create mode 100644 src/index/mod.rs create mode 100644 src/index/schema.rs create mode 100644 src/index/search.rs create mode 100644 src/index/update/mod.rs create mode 100644 src/index/update/negative_update.rs create mode 100644 src/index/update/positive_update.rs diff --git a/Cargo.toml b/Cargo.toml index 40be43633..775a0950b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,10 +6,11 @@ authors = ["Kerollmops "] [dependencies] byteorder = "1.2" +fnv = "1.0" +fs2 = "0.4" lazy_static = "1.1" sdset = "0.2" -fs2 = "0.4" -fnv = "1.0" +unidecode = "0.3" [dependencies.fst] git = "https://github.com/Kerollmops/fst.git" @@ -27,12 +28,11 @@ git = "https://github.com/pingcap/rust-rocksdb.git" git = "https://github.com/Kerollmops/group-by.git" [dev-dependencies] +csv = "1.0" +elapsed = "0.1" moby-name-gen = "0.1" +serde = "1.0" serde_derive = "1.0" serde_json = "1.0" structopt = "0.2" -unidecode = "0.3" -elapsed = "0.1" -serde = "1.0" warp = "0.1" -csv = "1.0" diff --git a/examples/csv-indexer.rs b/examples/csv-indexer.rs index c9dc83c8a..b231b7932 100644 --- a/examples/csv-indexer.rs +++ b/examples/csv-indexer.rs @@ -79,8 +79,7 @@ impl CsvIndexer { } { - let title = Tokenizer::new(&product.title); - let title = title.iter().filter(|&(_, w)| !self.common_words.contains(w)); + let title = Tokenizer::new(&product.title).filter(|&(_, w)| !self.common_words.contains(w)); insert_document_words(&mut builder, product.id, 1, title); let key = format!("{}-title", product.id); @@ -89,8 +88,7 @@ impl CsvIndexer { } { - let description = Tokenizer::new(&product.description); - let description = description.iter().filter(|&(_, w)| !self.common_words.contains(w)); + let description = Tokenizer::new(&product.description).filter(|&(_, w)| !self.common_words.contains(w)); insert_document_words(&mut builder, product.id, 2, description); let key = format!("{}-description", product.id); diff --git a/examples/json-lines-indexer.rs b/examples/json-lines-indexer.rs index 27314d2af..093c8189b 100644 --- a/examples/json-lines-indexer.rs +++ b/examples/json-lines-indexer.rs @@ -84,8 +84,7 @@ impl JsonLinesIndexer { } { - let title = Tokenizer::new(&product.title); - let title = title.iter().filter(|&(_, w)| !self.common_words.contains(w)); + let title = Tokenizer::new(&product.title).filter(|&(_, w)| !self.common_words.contains(w)); insert_document_words(&mut builder, product.id, 1, title); let key = format!("{}-title", product.id); @@ -94,8 +93,7 @@ impl JsonLinesIndexer { } { - let description = Tokenizer::new(&product.description); - let description = description.iter().filter(|&(_, w)| !self.common_words.contains(w)); + let description = Tokenizer::new(&product.description).filter(|&(_, w)| !self.common_words.contains(w)); insert_document_words(&mut builder, product.id, 2, description); let key = format!("{}-description", product.id); diff --git a/src/blob/negative_blob.rs b/src/blob/negative_blob.rs index ca8679c81..0226b5ae5 100644 --- a/src/blob/negative_blob.rs +++ b/src/blob/negative_blob.rs @@ -40,7 +40,7 @@ impl NegativeBlobBuilder { Self { doc_ids: DocIdsBuilder::new(wrt) } } - pub fn insert(&mut self, doc: DocumentId) { + pub fn insert(&mut self, doc: DocumentId) -> bool { self.doc_ids.insert(doc) } diff --git a/src/data/doc_ids.rs b/src/data/doc_ids.rs index c2c3738a4..00ae2f024 100644 --- a/src/data/doc_ids.rs +++ b/src/data/doc_ids.rs @@ -47,7 +47,7 @@ impl DocIds { } pub struct DocIdsBuilder { - doc_ids: BTreeSet, + doc_ids: BTreeSet, // TODO: prefer a linked-list wrt: W, } @@ -59,8 +59,8 @@ impl DocIdsBuilder { } } - pub fn insert(&mut self, doc: DocumentId) { - self.doc_ids.insert(doc); + pub fn insert(&mut self, doc: DocumentId) -> bool { + self.doc_ids.insert(doc) } pub fn into_inner(mut self) -> io::Result { diff --git a/src/index.rs b/src/index.rs deleted file mode 100644 index 41e0ada03..000000000 --- a/src/index.rs +++ /dev/null @@ -1,40 +0,0 @@ -use std::path::{Path, PathBuf}; -use std::error::Error; -use std::fs::{self, File}; - -use fs2::FileExt; - -use crate::rank::Document; -use crate::blob::Blob; - -pub struct Index { - path: PathBuf, - lock_file: File, - blobs: Vec, -} - -impl Index { - pub fn open>(path: P) -> Result> { - let path = path.into(); - - let lock_file = File::create(path.join(".lock"))?; - lock_file.try_lock_exclusive()?; - - let blobs = Vec::new(); - - Ok(Self { path, lock_file, blobs }) - } - - pub fn create>(path: P) -> Result> { - let path = path.into(); - - fs::create_dir_all(&path)?; - File::create(path.join(".lock"))?; - - Self::open(path) - } - - pub fn blobs(&self) -> &[Blob] { - &self.blobs - } -} diff --git a/src/index/blob_name.rs b/src/index/blob_name.rs new file mode 100644 index 000000000..50bebcaa8 --- /dev/null +++ b/src/index/blob_name.rs @@ -0,0 +1,16 @@ +use std::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct BlobName; + +impl BlobName { + pub fn new() -> BlobName { + unimplemented!() + } +} + +impl fmt::Display for BlobName { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + unimplemented!() + } +} diff --git a/src/index/mod.rs b/src/index/mod.rs new file mode 100644 index 000000000..d7f642de4 --- /dev/null +++ b/src/index/mod.rs @@ -0,0 +1,175 @@ +pub mod blob_name; +pub mod schema; +pub mod search; +pub mod update; + +use std::io; +use std::rc::Rc; +use std::error::Error; +use std::fs::{self, File}; +use std::fmt::{self, Write}; +use std::ops::{Deref, BitOr}; +use std::path::{Path, PathBuf}; +use std::collections::{BTreeSet, BTreeMap}; + +use fs2::FileExt; +use ::rocksdb::{rocksdb, rocksdb_options}; +use ::rocksdb::merge_operator::MergeOperands; + +use crate::rank::Document; +use crate::data::DocIdsBuilder; +use crate::{DocIndex, DocumentId}; +use crate::index::{update::Update, search::Search}; +use crate::blob::{PositiveBlobBuilder, Blob, Sign}; +use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer}; + +fn simple_vec_append(key: &[u8], value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec { + let mut output = Vec::new(); + for bytes in operands.chain(value) { + output.extend_from_slice(bytes); + } + output +} + +pub struct Index { + database: rocksdb::DB, +} + +impl Index { + pub fn open>(path: P) -> Result> { + let path = path.as_ref().to_string_lossy(); + + let mut opts = rocksdb_options::DBOptions::new(); + opts.create_if_missing(true); + + let mut cf_opts = rocksdb_options::ColumnFamilyOptions::new(); + cf_opts.add_merge_operator("blobs order operator", simple_vec_append); + + let database = rocksdb::DB::open_cf(opts, &path, vec![("default", cf_opts)])?; + + // check if index is a valid RocksDB and + // contains the right key-values (i.e. "blobs-order") + + Ok(Self { database }) + } + + pub fn ingest_update(&self, update: Update) -> Result<(), Box> { + let path = update.into_path_buf(); + let path = path.to_string_lossy(); + + let mut options = rocksdb_options::IngestExternalFileOptions::new(); + // options.move_files(true); + + let cf_handle = self.database.cf_handle("default").unwrap(); + self.database.ingest_external_file_optimized(&cf_handle, &options, &[&path])?; + + Ok(()) + } + + pub fn snapshot(&self) -> Snapshot<&rocksdb::DB> { + Snapshot::new(&self.database) + } +} + +impl Search for Index { + fn search(&self, text: &str) -> Vec { + unimplemented!() + } +} + +pub struct Snapshot +where D: Deref, +{ + inner: rocksdb::Snapshot, +} + +impl Snapshot +where D: Deref, +{ + pub fn new(inner: D) -> Snapshot { + Self { inner: rocksdb::Snapshot::new(inner) } + } +} + +impl Search for Snapshot +where D: Deref, +{ + fn search(&self, text: &str) -> Vec { + unimplemented!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::index::schema::Schema; + use crate::index::update::{PositiveUpdateBuilder, NegativeUpdateBuilder}; + + #[test] + fn generate_negative_update() -> Result<(), Box> { + + let schema = Schema::open("/meili/default.sch")?; + let mut builder = NegativeUpdateBuilder::new("update-delete-0001.sst"); + + // you can insert documents in any order, it is sorted internally + builder.remove(1); + builder.remove(5); + builder.remove(2); + + let update = builder.build()?; + + assert_eq!(update.info().sign, Sign::Negative); + + Ok(()) + } + + #[test] + fn generate_positive_update() -> Result<(), Box> { + + let schema = Schema::open("/meili/default.sch")?; + let tokenizer_builder = DefaultBuilder::new(); + let mut builder = PositiveUpdateBuilder::new("update-positive-0001.sst", schema.clone(), tokenizer_builder); + + // you can insert documents in any order, it is sorted internally + let title_field = schema.field("title").unwrap(); + builder.update_field(1, title_field, "hallo!".to_owned()); + builder.update_field(5, title_field, "hello!".to_owned()); + builder.update_field(2, title_field, "hi!".to_owned()); + + let name_field = schema.field("name").unwrap(); + builder.remove_field(4, name_field); + + let update = builder.build()?; + + assert_eq!(update.info().sign, Sign::Positive); + + Ok(()) + } + + #[test] + fn execution() -> Result<(), Box> { + + let index = Index::open("/meili/data")?; + let update = Update::open("update-0001.sst")?; + index.ingest_update(update)?; + // directly apply changes to the database and see new results + let results = index.search("helo"); + + ////////////// + + let index = Index::open("/meili/data")?; + let update = Update::open("update-0001.sst")?; + + // if you create a snapshot before an update + let snapshot = index.snapshot(); + index.ingest_update(update)?; + + // the snapshot does not see the updates + let results = snapshot.search("helo"); + + // the raw index itself see new results + let results = index.search("helo"); + + Ok(()) + } +} diff --git a/src/index/schema.rs b/src/index/schema.rs new file mode 100644 index 000000000..e87459e4d --- /dev/null +++ b/src/index/schema.rs @@ -0,0 +1,82 @@ +use std::error::Error; +use std::path::Path; +use std::ops::BitOr; +use std::fmt; + +pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false }; +pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true }; + +#[derive(Copy, Clone)] +pub struct SchemaProps { + stored: bool, + indexed: bool, +} + +impl SchemaProps { + pub fn is_stored(&self) -> bool { + self.stored + } + + pub fn is_indexed(&self) -> bool { + self.indexed + } +} + +impl BitOr for SchemaProps { + type Output = Self; + + fn bitor(self, other: Self) -> Self::Output { + SchemaProps { + stored: self.stored | other.stored, + indexed: self.indexed | other.indexed, + } + } +} + +pub struct SchemaBuilder; + +impl SchemaBuilder { + pub fn new() -> SchemaBuilder { + unimplemented!() + } + + pub fn field(&mut self, name: &str, props: SchemaProps) -> SchemaField { + unimplemented!() + } + + pub fn build(self) -> Schema { + unimplemented!() + } +} + +#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq)] +pub struct SchemaField(u32); + +impl SchemaField { + pub fn as_u32(&self) -> u32 { + self.0 + } +} + +impl fmt::Display for SchemaField { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +#[derive(Clone)] +pub struct Schema; + +impl Schema { + pub fn open>(path: P) -> Result> { + unimplemented!() + } + + pub fn props(&self, field: SchemaField) -> SchemaProps { + unimplemented!() + } + + pub fn field(&self, name: &str) -> Option { + unimplemented!() + } +} diff --git a/src/index/search.rs b/src/index/search.rs new file mode 100644 index 000000000..712dd1c74 --- /dev/null +++ b/src/index/search.rs @@ -0,0 +1,5 @@ +use crate::rank::Document; + +pub trait Search { + fn search(&self, text: &str) -> Vec; +} diff --git a/src/index/update/mod.rs b/src/index/update/mod.rs new file mode 100644 index 000000000..cc933ddbc --- /dev/null +++ b/src/index/update/mod.rs @@ -0,0 +1,55 @@ +use std::path::PathBuf; +use std::error::Error; + +use ::rocksdb::rocksdb_options; + +use crate::index::blob_name::BlobName; +use crate::blob::Sign; + +mod negative_update; +mod positive_update; + +pub use self::negative_update::{NegativeUpdateBuilder}; +pub use self::positive_update::{PositiveUpdateBuilder, NewState}; + +// These prefixes are here to make sure the documents fields +// and the internal data doesn't collide and the internal data are +// at the top of the sst file. +const FIELD_BLOBS_ORDER: &str = "00-blobs-order"; + +pub struct Update { + path: PathBuf, +} + +impl Update { + pub fn open>(path: P) -> Result> { + let path = path.into(); + + let env_options = rocksdb_options::EnvOptions::new(); + let column_family_options = rocksdb_options::ColumnFamilyOptions::new(); + let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options); + file_writer.open(&path.to_string_lossy())?; + let infos = file_writer.finish()?; + + if infos.smallest_key() != FIELD_BLOBS_ORDER.as_bytes() { + // FIXME return a nice error + panic!("Invalid update file: the blobs-order field is not the smallest key") + } + + Ok(Update { path }) + } + + pub fn into_path_buf(self) -> PathBuf { + self.path + } + + pub fn info(&self) -> UpdateInfo { + unimplemented!() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct UpdateInfo { + pub sign: Sign, + pub id: BlobName, +} diff --git a/src/index/update/negative_update.rs b/src/index/update/negative_update.rs new file mode 100644 index 000000000..e90595a5c --- /dev/null +++ b/src/index/update/negative_update.rs @@ -0,0 +1,59 @@ +use std::path::PathBuf; +use std::error::Error; + +use ::rocksdb::rocksdb_options; + +use crate::index::update::{FIELD_BLOBS_ORDER, Update}; +use crate::index::blob_name::BlobName; +use crate::data::DocIdsBuilder; +use crate::DocumentId; + +pub struct NegativeUpdateBuilder { + path: PathBuf, + doc_ids: DocIdsBuilder>, +} + +impl NegativeUpdateBuilder { + pub fn new>(path: P) -> NegativeUpdateBuilder { + NegativeUpdateBuilder { + path: path.into(), + doc_ids: DocIdsBuilder::new(Vec::new()), + } + } + + pub fn remove(&mut self, id: DocumentId) -> bool { + self.doc_ids.insert(id) + } + + pub fn build(self) -> Result> { + let blob_name = BlobName::new(); + + let env_options = rocksdb_options::EnvOptions::new(); + let column_family_options = rocksdb_options::ColumnFamilyOptions::new(); + let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options); + + file_writer.open(&self.path.to_string_lossy())?; + + // TODO the blob-name must be written in bytes (16 bytes) + // along with the sign + unimplemented!("write the blob sign and name"); + + // write the blob name to be merged + let blob_name = blob_name.to_string(); + file_writer.merge(FIELD_BLOBS_ORDER.as_bytes(), blob_name.as_bytes())?; + + // write the doc ids + let blob_key = format!("0b-{}-doc-ids", blob_name); + let blob_doc_ids = self.doc_ids.into_inner()?; + file_writer.put(blob_key.as_bytes(), &blob_doc_ids)?; + + for id in blob_doc_ids { + let start = format!("5d-{}", id); + let end = format!("5d-{}", id + 1); + file_writer.delete_range(start.as_bytes(), end.as_bytes())?; + } + + file_writer.finish()?; + Update::open(self.path) + } +} diff --git a/src/index/update/positive_update.rs b/src/index/update/positive_update.rs new file mode 100644 index 000000000..77d24b8a7 --- /dev/null +++ b/src/index/update/positive_update.rs @@ -0,0 +1,124 @@ +use std::collections::BTreeMap; +use std::path::PathBuf; +use std::error::Error; +use std::fmt::Write; + +use ::rocksdb::rocksdb_options; + +use crate::index::schema::{SchemaProps, Schema, SchemaField}; +use crate::index::update::{FIELD_BLOBS_ORDER, Update}; +use crate::tokenizer::TokenizerBuilder; +use crate::index::blob_name::BlobName; +use crate::blob::PositiveBlobBuilder; +use crate::{DocIndex, DocumentId}; + +pub enum NewState { + Updated { + value: String, + props: SchemaProps, + }, + Removed, +} + +pub struct PositiveUpdateBuilder { + path: PathBuf, + schema: Schema, + tokenizer_builder: B, + new_states: BTreeMap<(DocumentId, SchemaField), NewState>, +} + +impl PositiveUpdateBuilder { + pub fn new>(path: P, schema: Schema, tokenizer_builder: B) -> PositiveUpdateBuilder { + PositiveUpdateBuilder { + path: path.into(), + schema: schema, + tokenizer_builder: tokenizer_builder, + new_states: BTreeMap::new(), + } + } + + // TODO value must be a field that can be indexed + pub fn update_field(&mut self, id: DocumentId, field: SchemaField, value: String) { + let state = NewState::Updated { value, props: self.schema.props(field) }; + self.new_states.insert((id, field), state); + } + + pub fn remove_field(&mut self, id: DocumentId, field: SchemaField) { + self.new_states.insert((id, field), NewState::Removed); + } +} + +impl PositiveUpdateBuilder +where B: TokenizerBuilder +{ + pub fn build(self) -> Result> { + let blob_name = BlobName::new(); + + let env_options = rocksdb_options::EnvOptions::new(); + let column_family_options = rocksdb_options::ColumnFamilyOptions::new(); + let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options); + + file_writer.open(&self.path.to_string_lossy())?; + + // TODO the blob-name must be written in bytes (16 bytes) + // along with the sign + unimplemented!("write the blob sign and name"); + + // write the blob name to be merged + let blob_name = blob_name.to_string(); + file_writer.put(FIELD_BLOBS_ORDER.as_bytes(), blob_name.as_bytes())?; + + let mut builder = PositiveBlobBuilder::new(Vec::new(), Vec::new()); + for ((document_id, field), state) in &self.new_states { + let value = match state { + NewState::Updated { value, props } if props.is_indexed() => value, + _ => continue, + }; + + for (index, word) in self.tokenizer_builder.build(value) { + let doc_index = DocIndex { + document_id: *document_id, + attribute: field.as_u32() as u8, + attribute_index: index as u32, + }; + // insert the exact representation + let word_lower = word.to_lowercase(); + + // and the unidecoded lowercased version + let word_unidecoded = unidecode::unidecode(word).to_lowercase(); + if word_lower != word_unidecoded { + builder.insert(word_unidecoded, doc_index); + } + + builder.insert(word_lower, doc_index); + } + } + let (blob_fst_map, blob_doc_idx) = builder.into_inner()?; + + // write the fst + let blob_key = format!("0b-{}-fst", blob_name); + file_writer.put(blob_key.as_bytes(), &blob_fst_map)?; + + // write the doc-idx + let blob_key = format!("0b-{}-doc-idx", blob_name); + file_writer.put(blob_key.as_bytes(), &blob_doc_idx)?; + + // write all the documents fields updates + let mut key = String::from("5d-"); + let prefix_len = key.len(); + + for ((id, field), state) in self.new_states { + key.truncate(prefix_len); + write!(&mut key, "{}-{}", id, field)?; + match state { + NewState::Updated { value, props } => if props.is_stored() { + file_writer.put(key.as_bytes(), value.as_bytes())? + }, + NewState::Removed => file_writer.delete(key.as_bytes())?, + } + } + + file_writer.finish()?; + Update::open(self.path) + } +} diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 1fa8051e5..9b075786b 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -1,28 +1,32 @@ use std::mem; use self::Separator::*; +pub trait TokenizerBuilder { + fn build<'a>(&self, text: &'a str) -> Box + 'a>; +} + +pub struct DefaultBuilder; + +impl DefaultBuilder { + pub fn new() -> DefaultBuilder { + DefaultBuilder + } +} + +impl TokenizerBuilder for DefaultBuilder { + fn build<'a>(&self, text: &'a str) -> Box + 'a> { + Box::new(Tokenizer::new(text)) + } +} + pub struct Tokenizer<'a> { + index: usize, inner: &'a str, } impl<'a> Tokenizer<'a> { pub fn new(string: &str) -> Tokenizer { - Tokenizer { inner: string } - } - - pub fn iter(&self) -> Tokens { - Tokens::new(self.inner) - } -} - -pub struct Tokens<'a> { - index: usize, - inner: &'a str, -} - -impl<'a> Tokens<'a> { - fn new(string: &str) -> Tokens { - Tokens { + Tokenizer { index: 0, inner: string.trim_matches(&[' ', '.', ';', ',', '!', '?', '-', '\'', '"'][..]), } @@ -52,7 +56,7 @@ impl Separator { } } -impl<'a> Iterator for Tokens<'a> { +impl<'a> Iterator for Tokenizer<'a> { type Item = (usize, &'a str); fn next(&mut self) -> Option { @@ -101,37 +105,33 @@ mod tests { #[test] fn easy() { - let tokenizer = Tokenizer::new("salut"); - let mut tokens = tokenizer.iter(); + let mut tokenizer = Tokenizer::new("salut"); - assert_eq!(tokens.next(), Some((0, "salut"))); - assert_eq!(tokens.next(), None); + assert_eq!(tokenizer.next(), Some((0, "salut"))); + assert_eq!(tokenizer.next(), None); - let tokenizer = Tokenizer::new("yo "); - let mut tokens = tokenizer.iter(); + let mut tokenizer = Tokenizer::new("yo "); - assert_eq!(tokens.next(), Some((0, "yo"))); - assert_eq!(tokens.next(), None); + assert_eq!(tokenizer.next(), Some((0, "yo"))); + assert_eq!(tokenizer.next(), None); } #[test] fn hard() { - let tokenizer = Tokenizer::new(" .? yo lolo. aïe"); - let mut tokens = tokenizer.iter(); + let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe"); - assert_eq!(tokens.next(), Some((0, "yo"))); - assert_eq!(tokens.next(), Some((1, "lolo"))); - assert_eq!(tokens.next(), Some((9, "aïe"))); - assert_eq!(tokens.next(), None); + assert_eq!(tokenizer.next(), Some((0, "yo"))); + assert_eq!(tokenizer.next(), Some((1, "lolo"))); + assert_eq!(tokenizer.next(), Some((9, "aïe"))); + assert_eq!(tokenizer.next(), None); - let tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,"); - let mut tokens = tokenizer.iter(); + let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,"); - assert_eq!(tokens.next(), Some((0, "yo"))); - assert_eq!(tokens.next(), Some((8, "lolo"))); - assert_eq!(tokens.next(), Some((16, "wtf"))); - assert_eq!(tokens.next(), Some((24, "lol"))); - assert_eq!(tokens.next(), Some((32, "aïe"))); - assert_eq!(tokens.next(), None); + assert_eq!(tokenizer.next(), Some((0, "yo"))); + assert_eq!(tokenizer.next(), Some((8, "lolo"))); + assert_eq!(tokenizer.next(), Some((16, "wtf"))); + assert_eq!(tokenizer.next(), Some((24, "lol"))); + assert_eq!(tokenizer.next(), Some((32, "aïe"))); + assert_eq!(tokenizer.next(), None); } }