From af65fe201aaac2e6e89e49a14babd683884cecfc Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 27 Mar 2023 17:49:43 +0200 Subject: [PATCH] Clean-up search example --- milli/examples/search.rs | 152 ++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 82 deletions(-) diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 57aac5a02..ecc7f9cb8 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -1,125 +1,113 @@ -// use crate::allocator::ALLOC; use std::error::Error; use std::io::stdin; use std::time::Instant; use heed::EnvOpenOptions; use milli::{ - execute_search, DefaultSearchLogger, Index, Search, SearchContext, TermsMatchingStrategy, + execute_search, DefaultSearchLogger, Index, SearchContext, SearchLogger, TermsMatchingStrategy, }; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; fn main() -> Result<(), Box> { - // TODO: command line let mut args = std::env::args(); - let _ = args.next().unwrap(); - let dataset = args.next().unwrap(); + let program_name = args.next().expect("No program name"); + let dataset = args.next().unwrap_or_else(|| { + format!( + "Missing path to index. Usage: {} [] [print-documents]", + program_name + ) + }); + let detailed_logger = args.next(); + let print_documents: bool = + if let Some(arg) = args.next() { arg == "print-documents" } else { false }; let mut options = EnvOpenOptions::new(); options.map_size(100 * 1024 * 1024 * 1024); // 100 GB - // Query: - // disp: 20 - // - // dasp: 70 words - // dosp: 80 - // dasc: 80 - // - // - // daspouyerf - // daspojewkfb - let index = Index::new(options, dataset)?; let txn = index.read_txn()?; let mut query = String::new(); while stdin().read_line(&mut query)? > 0 { for _ in 0..2 { - let start = Instant::now(); - let mut s = Search::new(&txn, &index); - s.query( - // "which a the releases from poison by the government", - // "sun flower s are the best", - query.trim(), - ); - s.terms_matching_strategy(TermsMatchingStrategy::Last); - s.offset(0); - // s.limit(1); - // s.criterion_implementation_strategy( - // milli::CriterionImplementationStrategy::OnlySetBased, - // ); - - let docs = s.execute().unwrap(); - let elapsed = start.elapsed(); - println!("old: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids); + let mut default_logger = DefaultSearchLogger; + // FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible. + // Workaround'd here by recreating the logger on each iteration of the loop + let mut detailed_logger = detailed_logger + .as_ref() + .map(|logger_dir| milli::DetailedSearchLogger::new(logger_dir)); + let logger: &mut dyn SearchLogger<_> = + if let Some(detailed_logger) = detailed_logger.as_mut() { + detailed_logger + } else { + &mut default_logger + }; let start = Instant::now(); - // let mut logger = milli::DetailedSearchLogger::new("log"); + let mut ctx = SearchContext::new(&index, &txn); let docs = execute_search( &mut ctx, - query.trim(), + &(!query.trim().is_empty()).then(|| query.trim().to_owned()), // what a the from which when there is TermsMatchingStrategy::Last, - None, + &None, 0, 20, + None, &mut DefaultSearchLogger, - &mut DefaultSearchLogger, - // &mut logger, + logger, )?; - // logger.write_d2_description(&mut ctx); + if let Some(logger) = &detailed_logger { + logger.write_d2_description(&mut ctx); + } let elapsed = start.elapsed(); println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids); + if print_documents { + let documents = index + .documents(&txn, docs.documents_ids.iter().copied()) + .unwrap() + .into_iter() + .map(|(id, obkv)| { + let mut object = serde_json::Map::default(); + for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() { + let value = obkv.get(fid).unwrap(); + let value: serde_json::Value = serde_json::from_slice(value).unwrap(); + object.insert(fid_name.to_owned(), value); + } + (id, serde_json::to_string_pretty(&object).unwrap()) + }) + .collect::>(); - // let documents = index - // .documents(&txn, docs.documents_ids.iter().copied()) - // .unwrap() - // .into_iter() - // .map(|(id, obkv)| { - // let mut object = serde_json::Map::default(); - // for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() { - // let value = obkv.get(fid).unwrap(); - // let value: serde_json::Value = serde_json::from_slice(value).unwrap(); - // object.insert(fid_name.to_owned(), value); - // } - // (id, serde_json::to_string_pretty(&object).unwrap()) - // }) - // .collect::>(); + for (id, document) in documents { + println!("{id}:"); + println!("{document}"); + } - // println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids); - // for (id, document) in documents { - // println!("{id}:"); - // println!("{document}"); - // } - - // let documents = index - // .documents(&txn, docs.documents_ids.iter().copied()) - // .unwrap() - // .into_iter() - // .map(|(id, obkv)| { - // let mut object = serde_json::Map::default(); - // for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() { - // let value = obkv.get(fid).unwrap(); - // let value: serde_json::Value = serde_json::from_slice(value).unwrap(); - // object.insert(fid_name.to_owned(), value); - // } - // (id, serde_json::to_string_pretty(&object).unwrap()) - // }) - // .collect::>(); - // println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids); - // for (id, document) in documents { - // println!("{id}:"); - // println!("{document}"); - // } + let documents = index + .documents(&txn, docs.documents_ids.iter().copied()) + .unwrap() + .into_iter() + .map(|(id, obkv)| { + let mut object = serde_json::Map::default(); + for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() { + let value = obkv.get(fid).unwrap(); + let value: serde_json::Value = serde_json::from_slice(value).unwrap(); + object.insert(fid_name.to_owned(), value); + } + (id, serde_json::to_string_pretty(&object).unwrap()) + }) + .collect::>(); + println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids); + for (id, document) in documents { + println!("{id}:"); + println!("{document}"); + } + } } query.clear(); } - // for (id, document) in documents { - // println!("{id}:"); - // // println!("{document}"); - // } Ok(()) }