Fix the benchmarks

This commit is contained in:
Kerollmops 2022-06-14 18:17:48 +02:00
parent f29114f94a
commit a97d4d63b9
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -1,13 +1,13 @@
#![allow(dead_code)] #![allow(dead_code)]
use std::fs::{create_dir_all, remove_dir_all, File}; use std::fs::{create_dir_all, remove_dir_all, File};
use std::io::{self, BufRead, BufReader, Cursor, Read, Seek}; use std::io::{self, BufReader, Cursor, Read, Seek};
use std::num::ParseFloatError; use std::num::ParseFloatError;
use std::path::Path; use std::path::Path;
use criterion::BenchmarkId; use criterion::BenchmarkId;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::documents::DocumentBatchReader; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{ use milli::update::{
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
}; };
@ -150,39 +150,38 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<imp
"jsonl" => documents_from_jsonl(reader).unwrap(), "jsonl" => documents_from_jsonl(reader).unwrap(),
otherwise => panic!("invalid update format {:?}", otherwise), otherwise => panic!("invalid update format {:?}", otherwise),
}; };
DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap() DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
} }
fn documents_from_jsonl(mut reader: impl BufRead) -> anyhow::Result<Vec<u8>> { fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new()); let mut documents = DocumentsBatchBuilder::new(Vec::new());
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
let mut buf = String::new(); for result in serde_json::Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
let object = result?;
while reader.read_line(&mut buf)? > 0 { documents.append_json_object(&object)?;
documents.extend_from_json(&mut buf.as_bytes())?;
buf.clear();
} }
documents.finish()?;
Ok(writer.into_inner()) documents.into_inner().map_err(Into::into)
} }
fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> { fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new()); let mut documents = DocumentsBatchBuilder::new(Vec::new());
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; let list: Vec<Map<String, Value>> = serde_json::from_reader(reader)?;
documents.extend_from_json(reader)?; for object in list {
documents.finish()?; documents.append_json_object(&object)?;
}
Ok(writer.into_inner()) documents.into_inner().map_err(Into::into)
} }
fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> { fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new()); let csv = csv::Reader::from_reader(reader);
milli::documents::DocumentBatchBuilder::from_csv(reader, &mut writer)?.finish()?;
Ok(writer.into_inner()) let mut documents = DocumentsBatchBuilder::new(Vec::new());
documents.append_csv(csv)?;
documents.into_inner().map_err(Into::into)
} }
enum AllowedType { enum AllowedType {