Clean-up search example

This commit is contained in:
Louis Dureuil 2023-03-27 17:49:43 +02:00
parent 9b83b1deb0
commit af65fe201a
No known key found for this signature in database

View File

@ -1,125 +1,113 @@
// use crate::allocator::ALLOC;
use std::error::Error; use std::error::Error;
use std::io::stdin; use std::io::stdin;
use std::time::Instant; use std::time::Instant;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::{ use milli::{
execute_search, DefaultSearchLogger, Index, Search, SearchContext, TermsMatchingStrategy, execute_search, DefaultSearchLogger, Index, SearchContext, SearchLogger, TermsMatchingStrategy,
}; };
#[global_allocator] #[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
// TODO: command line
let mut args = std::env::args(); let mut args = std::env::args();
let _ = args.next().unwrap(); let program_name = args.next().expect("No program name");
let dataset = args.next().unwrap(); let dataset = args.next().unwrap_or_else(|| {
format!(
"Missing path to index. Usage: {} <PATH-TO-INDEX> [<logger-dir>] [print-documents]",
program_name
)
});
let detailed_logger = args.next();
let print_documents: bool =
if let Some(arg) = args.next() { arg == "print-documents" } else { false };
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
// Query:
// disp: 20
//
// dasp: 70 words
// dosp: 80
// dasc: 80
//
//
// daspouyerf
// daspojewkfb
let index = Index::new(options, dataset)?; let index = Index::new(options, dataset)?;
let txn = index.read_txn()?; let txn = index.read_txn()?;
let mut query = String::new(); let mut query = String::new();
while stdin().read_line(&mut query)? > 0 { while stdin().read_line(&mut query)? > 0 {
for _ in 0..2 { for _ in 0..2 {
let start = Instant::now(); let mut default_logger = DefaultSearchLogger;
let mut s = Search::new(&txn, &index); // FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible.
s.query( // Workaround'd here by recreating the logger on each iteration of the loop
// "which a the releases from poison by the government", let mut detailed_logger = detailed_logger
// "sun flower s are the best", .as_ref()
query.trim(), .map(|logger_dir| milli::DetailedSearchLogger::new(logger_dir));
); let logger: &mut dyn SearchLogger<_> =
s.terms_matching_strategy(TermsMatchingStrategy::Last); if let Some(detailed_logger) = detailed_logger.as_mut() {
s.offset(0); detailed_logger
// s.limit(1); } else {
// s.criterion_implementation_strategy( &mut default_logger
// milli::CriterionImplementationStrategy::OnlySetBased, };
// );
let docs = s.execute().unwrap();
let elapsed = start.elapsed();
println!("old: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
let start = Instant::now(); let start = Instant::now();
// let mut logger = milli::DetailedSearchLogger::new("log");
let mut ctx = SearchContext::new(&index, &txn); let mut ctx = SearchContext::new(&index, &txn);
let docs = execute_search( let docs = execute_search(
&mut ctx, &mut ctx,
query.trim(), &(!query.trim().is_empty()).then(|| query.trim().to_owned()),
// what a the from which when there is // what a the from which when there is
TermsMatchingStrategy::Last, TermsMatchingStrategy::Last,
None, &None,
0, 0,
20, 20,
None,
&mut DefaultSearchLogger, &mut DefaultSearchLogger,
&mut DefaultSearchLogger, logger,
// &mut logger,
)?; )?;
// logger.write_d2_description(&mut ctx); if let Some(logger) = &detailed_logger {
logger.write_d2_description(&mut ctx);
}
let elapsed = start.elapsed(); let elapsed = start.elapsed();
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids); println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
if print_documents {
let documents = index
.documents(&txn, docs.documents_ids.iter().copied())
.unwrap()
.into_iter()
.map(|(id, obkv)| {
let mut object = serde_json::Map::default();
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
let value = obkv.get(fid).unwrap();
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
object.insert(fid_name.to_owned(), value);
}
(id, serde_json::to_string_pretty(&object).unwrap())
})
.collect::<Vec<_>>();
// let documents = index for (id, document) in documents {
// .documents(&txn, docs.documents_ids.iter().copied()) println!("{id}:");
// .unwrap() println!("{document}");
// .into_iter() }
// .map(|(id, obkv)| {
// let mut object = serde_json::Map::default();
// for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
// let value = obkv.get(fid).unwrap();
// let value: serde_json::Value = serde_json::from_slice(value).unwrap();
// object.insert(fid_name.to_owned(), value);
// }
// (id, serde_json::to_string_pretty(&object).unwrap())
// })
// .collect::<Vec<_>>();
// println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids); let documents = index
// for (id, document) in documents { .documents(&txn, docs.documents_ids.iter().copied())
// println!("{id}:"); .unwrap()
// println!("{document}"); .into_iter()
// } .map(|(id, obkv)| {
let mut object = serde_json::Map::default();
// let documents = index for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
// .documents(&txn, docs.documents_ids.iter().copied()) let value = obkv.get(fid).unwrap();
// .unwrap() let value: serde_json::Value = serde_json::from_slice(value).unwrap();
// .into_iter() object.insert(fid_name.to_owned(), value);
// .map(|(id, obkv)| { }
// let mut object = serde_json::Map::default(); (id, serde_json::to_string_pretty(&object).unwrap())
// for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() { })
// let value = obkv.get(fid).unwrap(); .collect::<Vec<_>>();
// let value: serde_json::Value = serde_json::from_slice(value).unwrap(); println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
// object.insert(fid_name.to_owned(), value); for (id, document) in documents {
// } println!("{id}:");
// (id, serde_json::to_string_pretty(&object).unwrap()) println!("{document}");
// }) }
// .collect::<Vec<_>>(); }
// println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
// for (id, document) in documents {
// println!("{id}:");
// println!("{document}");
// }
} }
query.clear(); query.clear();
} }
// for (id, document) in documents {
// println!("{id}:");
// // println!("{document}");
// }
Ok(()) Ok(())
} }