fix tests

This commit is contained in:
marin postma 2021-10-24 14:41:36 +02:00
parent 0f86d6b28f
commit 2e62925a6e
No known key found for this signature in database
GPG Key ID: 6088B7721C3E39F9
8 changed files with 49 additions and 55 deletions

View File

@ -1,5 +1,4 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::HashMap;
use std::io; use std::io;
use std::io::Cursor; use std::io::Cursor;
use std::io::Write; use std::io::Write;
@ -18,18 +17,6 @@ use super::{ByteCounter, DocumentsBatchIndex, DocumentsMetadata, Error};
/// ///
/// The writer used by the DocumentBatchBuilder can be read using a `DocumentBatchReader` to /// The writer used by the DocumentBatchBuilder can be read using a `DocumentBatchReader` to
/// iterate other the documents. /// iterate other the documents.
///
/// ## example:
/// ```
/// use milli::documents::DocumentBatchBuilder;
/// use serde_json::json;
/// use std::io::Cursor;
///
/// let mut writer = Cursor::new(Vec::new());
/// let mut builder = DocumentBatchBuilder::new(&mut writer).unwrap();
/// builder.add_documents(json!({"id": 1, "name": "foo"})).unwrap();
/// builder.finish().unwrap();
/// ```
pub struct DocumentBatchBuilder<W> { pub struct DocumentBatchBuilder<W> {
inner: ByteCounter<W>, inner: ByteCounter<W>,
index: DocumentsBatchIndex, index: DocumentsBatchIndex,
@ -100,7 +87,7 @@ impl<W: io::Write + io::Seek> DocumentBatchBuilder<W> {
count: &mut self.count, count: &mut self.count,
}; };
de.deserialize_any(&mut visitor).unwrap(); de.deserialize_any(&mut visitor).map_err(Error::JsonError)?;
Ok(()) Ok(())
} }
@ -112,10 +99,11 @@ impl<W: io::Write + io::Seek> DocumentBatchBuilder<W> {
/// optimizations. /// optimizations.
/// ///
/// From csv takes care to call finish in the end. /// From csv takes care to call finish in the end.
pub fn from_csv<R: io::Read>(mut self, reader: R) -> Result<(), Error> { pub fn from_csv<R: io::Read>(reader: R, writer: W) -> Result<Self, Error> {
let mut this = Self::new(writer)?;
// Ensure that this is the first and only addition made with this builder // Ensure that this is the first and only addition made with this builder
debug_assert!(self.index.is_empty()); debug_assert!(this.index.is_empty());
let mut records = csv::Reader::from_reader(reader); let mut records = csv::Reader::from_reader(reader);
@ -124,40 +112,37 @@ impl<W: io::Write + io::Seek> DocumentBatchBuilder<W> {
.unwrap() .unwrap()
.into_iter() .into_iter()
.map(parse_csv_header) .map(parse_csv_header)
.map(|(k, t)| (self.index.insert(&k), t)) .map(|(k, t)| (this.index.insert(&k), t))
.collect::<HashMap<_, _>>(); .collect::<BTreeMap<_, _>>();
let records = records.into_records(); let records = records.into_records();
dbg!(&headers);
for record in records { for record in records {
match record { match record {
Ok(record) => { Ok(record) => {
let mut writer = obkv::KvWriter::new(Cursor::new(&mut self.obkv_buffer)); let mut writer = obkv::KvWriter::new(Cursor::new(&mut this.obkv_buffer));
for (value, (fid, ty)) in record.into_iter().zip(headers.iter()) { for (value, (fid, ty)) in record.into_iter().zip(headers.iter()) {
let value = match ty { let value = match ty {
AllowedType::Number => value.parse::<f64>().map(Value::from).unwrap(), AllowedType::Number => value.parse::<f64>().map(Value::from).unwrap(),
AllowedType::String => Value::String(value.to_string()), AllowedType::String => Value::String(value.to_string()),
}; };
serde_json::to_writer(Cursor::new(&mut self.value_buffer), dbg!(&value)).unwrap(); serde_json::to_writer(Cursor::new(&mut this.value_buffer), &value).unwrap();
writer.insert(*fid, &self.value_buffer)?; writer.insert(*fid, &this.value_buffer)?;
self.value_buffer.clear(); this.value_buffer.clear();
} }
self.inner.write_u32::<BigEndian>(self.obkv_buffer.len() as u32)?; this.inner.write_u32::<BigEndian>(this.obkv_buffer.len() as u32)?;
self.inner.write_all(&self.obkv_buffer)?; this.inner.write_all(&this.obkv_buffer)?;
self.obkv_buffer.clear(); this.obkv_buffer.clear();
self.count += 1; this.count += 1;
}, },
Err(_) => panic!(), Err(_) => panic!(),
} }
} }
self.finish()?; Ok(this)
Ok(())
} }
} }
@ -265,18 +250,16 @@ mod test {
#[test] #[test]
fn add_documents_csv() { fn add_documents_csv() {
let mut cursor = Cursor::new(Vec::new()); let mut cursor = Cursor::new(Vec::new());
let builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
let csv = "id:number,field:string\n1,hello!\n2,blabla"; let csv = "id:number,field:string\n1,hello!\n2,blabla";
builder.from_csv(Cursor::new(csv.as_bytes())).unwrap(); let builder = DocumentBatchBuilder::from_csv(Cursor::new(csv.as_bytes()), &mut cursor).unwrap();
builder.finish().unwrap();
cursor.set_position(0); cursor.set_position(0);
let mut reader = DocumentBatchReader::from_reader(cursor).unwrap(); let mut reader = DocumentBatchReader::from_reader(cursor).unwrap();
dbg!(reader.len());
let (index, document) = reader.next_document_with_index().unwrap().unwrap(); let (index, document) = reader.next_document_with_index().unwrap().unwrap();
assert_eq!(index.len(), 2); assert_eq!(index.len(), 2);
assert_eq!(document.iter().count(), 2); assert_eq!(document.iter().count(), 2);

View File

@ -135,6 +135,8 @@ macro_rules! documents {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::io::Cursor;
use serde_json::{json, Value}; use serde_json::{json, Value};
use super::*; use super::*;
@ -151,13 +153,14 @@ mod test {
"bool": true "bool": true
}); });
let json = serde_json::to_vec(&json).unwrap();
let mut v = Vec::new(); let mut v = Vec::new();
let mut cursor = io::Cursor::new(&mut v); let mut cursor = io::Cursor::new(&mut v);
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
todo!(); builder.extend_from_json(Cursor::new(json)).unwrap();
//builder.add_documents(json).unwrap();
builder.finish().unwrap(); builder.finish().unwrap();
@ -181,14 +184,16 @@ mod test {
"toto": false, "toto": false,
}); });
let doc1 = serde_json::to_vec(&doc1).unwrap();
let doc2 = serde_json::to_vec(&doc2).unwrap();
let mut v = Vec::new(); let mut v = Vec::new();
let mut cursor = io::Cursor::new(&mut v); let mut cursor = io::Cursor::new(&mut v);
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
todo!(); builder.extend_from_json(Cursor::new(doc1)).unwrap();
//builder.add_documents(doc1).unwrap(); builder.extend_from_json(Cursor::new(doc2)).unwrap();
//builder.add_documents(doc2).unwrap();
builder.finish().unwrap(); builder.finish().unwrap();
@ -211,13 +216,14 @@ mod test {
{ "tata": "hello" }, { "tata": "hello" },
]); ]);
let docs = serde_json::to_vec(&docs).unwrap();
let mut v = Vec::new(); let mut v = Vec::new();
let mut cursor = io::Cursor::new(&mut v); let mut cursor = io::Cursor::new(&mut v);
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
todo!(); builder.extend_from_json(Cursor::new(docs)).unwrap();
//builder.add_documents(docs).unwrap();
builder.finish().unwrap(); builder.finish().unwrap();
@ -245,13 +251,13 @@ mod test {
{ "tata": "hello" }, { "tata": "hello" },
]]); ]]);
todo!(); let docs = serde_json::to_vec(&docs).unwrap();
//assert!(builder.add_documents(docs).is_err()); assert!(builder.extend_from_json(Cursor::new(docs)).is_err());
let docs = json!("hello"); let docs = json!("hello");
let docs = serde_json::to_vec(&docs).unwrap();
todo!(); assert!(builder.extend_from_json(Cursor::new(docs)).is_err());
//assert!(builder.add_documents(docs).is_err());
} }
#[test] #[test]

View File

@ -98,6 +98,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
self.inner.write_all(reader).unwrap(); self.inner.write_all(reader).unwrap();
*self.count += 1; *self.count += 1;
self.values.clear();
Ok(()) Ok(())
} }

View File

@ -953,6 +953,7 @@ pub(crate) mod tests {
{ "id": 1, "name": "kevin", "has_dog": true }, { "id": 1, "name": "kevin", "has_dog": true },
{ "id": 2, "name": "bob" } { "id": 2, "name": "bob" }
]); ]);
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index, 0); let builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.execute(content, |_, _| ()).unwrap(); builder.execute(content, |_, _| ()).unwrap();

View File

@ -68,8 +68,9 @@ mod test {
"txts": sample_txts[..(rng.gen_range(0..3))], "txts": sample_txts[..(rng.gen_range(0..3))],
"cat-ints": sample_ints[..(rng.gen_range(0..3))], "cat-ints": sample_ints[..(rng.gen_range(0..3))],
}); });
todo!()
//builder.add_documents(doc).unwrap(); let doc = Cursor::new(serde_json::to_vec(&doc).unwrap());
builder.extend_from_json(doc).unwrap();
} }
builder.finish().unwrap(); builder.finish().unwrap();

View File

@ -877,8 +877,8 @@ mod tests {
let mut cursor = Cursor::new(Vec::new()); let mut cursor = Cursor::new(Vec::new());
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
todo!(); let big_object = Cursor::new(serde_json::to_vec(&big_object).unwrap());
//builder.add_documents(big_object).unwrap(); builder.extend_from_json(big_object).unwrap();
builder.finish().unwrap(); builder.finish().unwrap();
cursor.set_position(0); cursor.set_position(0);
let content = DocumentBatchReader::from_reader(cursor).unwrap(); let content = DocumentBatchReader::from_reader(cursor).unwrap();

View File

@ -61,10 +61,12 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut cursor = Cursor::new(Vec::new()); let mut cursor = Cursor::new(Vec::new());
let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
let reader = Cursor::new(CONTENT.as_bytes()); let reader = Cursor::new(CONTENT.as_bytes());
todo!();
//for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() { for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() {
//documents_builder.add_documents(doc.unwrap()).unwrap(); let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap());
//} documents_builder.extend_from_json(doc).unwrap();
}
documents_builder.finish().unwrap(); documents_builder.finish().unwrap();
cursor.set_position(0); cursor.set_position(0);

View File

@ -409,8 +409,8 @@ fn criteria_ascdesc() {
"age": age, "age": age,
}); });
todo!(); let json = Cursor::new(serde_json::to_vec(&json).unwrap());
//batch_builder.add_documents(json).unwrap(); batch_builder.extend_from_json(json).unwrap();
}); });
batch_builder.finish().unwrap(); batch_builder.finish().unwrap();