mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Add a little bit more debug
This commit is contained in:
parent
dff68a339a
commit
5404776f7a
@ -48,6 +48,8 @@ struct MtblKvStore(Option<File>);
|
|||||||
|
|
||||||
impl MtblKvStore {
|
impl MtblKvStore {
|
||||||
fn from_indexed(mut indexed: Indexed) -> anyhow::Result<MtblKvStore> {
|
fn from_indexed(mut indexed: Indexed) -> anyhow::Result<MtblKvStore> {
|
||||||
|
eprintln!("{:?}: Creating an MTBL store from an Indexed...", rayon::current_thread_index());
|
||||||
|
|
||||||
let outfile = tempfile::tempfile()?;
|
let outfile = tempfile::tempfile()?;
|
||||||
let mut out = Writer::new(outfile, None)?;
|
let mut out = Writer::new(outfile, None)?;
|
||||||
|
|
||||||
@ -73,10 +75,10 @@ impl MtblKvStore {
|
|||||||
// We must write the prefix postings ids
|
// We must write the prefix postings ids
|
||||||
key[0] = 2;
|
key[0] = 2;
|
||||||
let mut stream = indexed.fst.stream();
|
let mut stream = indexed.fst.stream();
|
||||||
while let Some(word) = stream.next() {
|
while let Some(prefix) = stream.next() {
|
||||||
key.truncate(1);
|
key.truncate(1);
|
||||||
key.extend_from_slice(word);
|
key.extend_from_slice(prefix);
|
||||||
if let Some(ids) = indexed.prefix_postings_ids.remove(word) {
|
if let Some(ids) = indexed.prefix_postings_ids.remove(prefix) {
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
ids.serialize_into(&mut buffer)?;
|
ids.serialize_into(&mut buffer)?;
|
||||||
out.add(&key, &buffer).unwrap();
|
out.add(&key, &buffer).unwrap();
|
||||||
@ -93,10 +95,14 @@ impl MtblKvStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let out = out.into_inner()?;
|
let out = out.into_inner()?;
|
||||||
|
|
||||||
|
eprintln!("{:?}: MTBL store created!", rayon::current_thread_index());
|
||||||
Ok(MtblKvStore(Some(out)))
|
Ok(MtblKvStore(Some(out)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_with(self, other: MtblKvStore) -> anyhow::Result<MtblKvStore> {
|
fn merge_with(self, other: MtblKvStore) -> anyhow::Result<MtblKvStore> {
|
||||||
|
eprintln!("{:?}: Merging two MTBL stores...", rayon::current_thread_index());
|
||||||
|
|
||||||
let (left, right) = match (self.0, other.0) {
|
let (left, right) = match (self.0, other.0) {
|
||||||
(Some(left), Some(right)) => (left, right),
|
(Some(left), Some(right)) => (left, right),
|
||||||
(Some(left), None) => return Ok(MtblKvStore(Some(left))),
|
(Some(left), None) => return Ok(MtblKvStore(Some(left))),
|
||||||
@ -159,11 +165,15 @@ impl MtblKvStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let out = out.into_inner()?;
|
let out = out.into_inner()?;
|
||||||
|
|
||||||
|
eprintln!("{:?}: MTBL stores merged!", rayon::current_thread_index());
|
||||||
Ok(MtblKvStore(Some(out)))
|
Ok(MtblKvStore(Some(out)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn index_csv(mut rdr: csv::Reader<File>) -> anyhow::Result<MtblKvStore> {
|
fn index_csv(mut rdr: csv::Reader<File>) -> anyhow::Result<MtblKvStore> {
|
||||||
|
eprintln!("{:?}: Indexing into an Indexed...", rayon::current_thread_index());
|
||||||
|
|
||||||
const MAX_POSITION: usize = 1000;
|
const MAX_POSITION: usize = 1000;
|
||||||
const MAX_ATTRIBUTES: usize = u32::max_value() as usize / MAX_POSITION;
|
const MAX_ATTRIBUTES: usize = u32::max_value() as usize / MAX_POSITION;
|
||||||
|
|
||||||
@ -189,8 +199,8 @@ fn index_csv(mut rdr: csv::Reader<File>) -> anyhow::Result<MtblKvStore> {
|
|||||||
postings_ids.entry(SmallVec32::from(word.as_bytes()))
|
postings_ids.entry(SmallVec32::from(word.as_bytes()))
|
||||||
.or_insert_with(RoaringBitmap::new)
|
.or_insert_with(RoaringBitmap::new)
|
||||||
.insert(document_id);
|
.insert(document_id);
|
||||||
if let Some(prefix) = word.as_bytes().get(0..word.len().min(4)) {
|
if let Some(prefix) = word.as_bytes().get(0..word.len().min(5)) {
|
||||||
for i in 0..prefix.len() {
|
for i in 0..=prefix.len() {
|
||||||
prefix_postings_ids.entry(SmallVec32::from(&prefix[..i]))
|
prefix_postings_ids.entry(SmallVec32::from(&prefix[..i]))
|
||||||
.or_insert_with(RoaringBitmap::new)
|
.or_insert_with(RoaringBitmap::new)
|
||||||
.insert(document_id);
|
.insert(document_id);
|
||||||
@ -216,6 +226,7 @@ fn index_csv(mut rdr: csv::Reader<File>) -> anyhow::Result<MtblKvStore> {
|
|||||||
let new_words_fst = fst::Set::from_iter(new_words.iter().map(SmallVec32::as_ref))?;
|
let new_words_fst = fst::Set::from_iter(new_words.iter().map(SmallVec32::as_ref))?;
|
||||||
|
|
||||||
let indexed = Indexed { fst: new_words_fst, headers, postings_ids, prefix_postings_ids, documents };
|
let indexed = Indexed { fst: new_words_fst, headers, postings_ids, prefix_postings_ids, documents };
|
||||||
|
eprintln!("{:?}: Indexed created!", rayon::current_thread_index());
|
||||||
|
|
||||||
MtblKvStore::from_indexed(indexed)
|
MtblKvStore::from_indexed(indexed)
|
||||||
}
|
}
|
||||||
@ -274,19 +285,17 @@ fn main() -> anyhow::Result<()> {
|
|||||||
.open(opt.database)?;
|
.open(opt.database)?;
|
||||||
|
|
||||||
let index = Index::new(&env)?;
|
let index = Index::new(&env)?;
|
||||||
let res = opt.files_to_index
|
let mtbl_store = opt.files_to_index
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.try_fold(MtblKvStore::default, |acc, path| {
|
.try_fold(MtblKvStore::default, |acc, path| {
|
||||||
let rdr = csv::Reader::from_path(path)?;
|
let rdr = csv::Reader::from_path(path)?;
|
||||||
let mtbl_store = index_csv(rdr)?;
|
let store = index_csv(rdr)?;
|
||||||
acc.merge_with(mtbl_store)
|
acc.merge_with(store)
|
||||||
})
|
})
|
||||||
.inspect(|_| {
|
.inspect(|_| {
|
||||||
eprintln!("Total number of documents seen so far is {}", ID_GENERATOR.load(Ordering::Relaxed))
|
eprintln!("Total number of documents seen so far is {}", ID_GENERATOR.load(Ordering::Relaxed))
|
||||||
})
|
})
|
||||||
.try_reduce(MtblKvStore::default, MtblKvStore::merge_with);
|
.try_reduce(MtblKvStore::default, MtblKvStore::merge_with)?;
|
||||||
|
|
||||||
let mtbl_store = res?;
|
|
||||||
|
|
||||||
eprintln!("We are writing into LMDB...");
|
eprintln!("We are writing into LMDB...");
|
||||||
let mut wtxn = env.write_txn()?;
|
let mut wtxn = env.write_txn()?;
|
||||||
|
11
src/lib.rs
11
src/lib.rs
@ -79,18 +79,21 @@ impl Index {
|
|||||||
|
|
||||||
let mut union_result = RoaringBitmap::default();
|
let mut union_result = RoaringBitmap::default();
|
||||||
if word.len() <= 4 {
|
if word.len() <= 4 {
|
||||||
if let Some(ids) = self.prefix_postings_ids.get(rtxn, &word[..word.len().min(4)])? {
|
if let Some(ids) = self.prefix_postings_ids.get(rtxn, &word[..word.len().min(5)])? {
|
||||||
union_result = RoaringBitmap::deserialize_from(ids)?;
|
union_result = RoaringBitmap::deserialize_from(ids)?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
let mut count = 0;
|
||||||
let mut stream = fst.search(dfa).into_stream();
|
let mut stream = fst.search(dfa).into_stream();
|
||||||
while let Some(word) = stream.next() {
|
while let Some(word) = stream.next() {
|
||||||
|
count += 1;
|
||||||
let word = std::str::from_utf8(word)?;
|
let word = std::str::from_utf8(word)?;
|
||||||
if let Some(ids) = self.postings_ids.get(rtxn, word)? {
|
if let Some(ids) = self.postings_ids.get(rtxn, word)? {
|
||||||
let right = RoaringBitmap::deserialize_from(ids)?;
|
let right = RoaringBitmap::deserialize_from(ids)?;
|
||||||
union_result.union_with(&right);
|
union_result.union_with(&right);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
eprint!("with {:?} words ", count);
|
||||||
}
|
}
|
||||||
eprintln!("union for {:?} took {:.02?}", word, before.elapsed());
|
eprintln!("union for {:?} took {:.02?}", word, before.elapsed());
|
||||||
|
|
||||||
@ -99,14 +102,16 @@ impl Index {
|
|||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
let left_len = left.len();
|
let left_len = left.len();
|
||||||
left.intersect_with(&union_result);
|
left.intersect_with(&union_result);
|
||||||
eprintln!("intersect between {:?} and {:?} took {:.02?}",
|
eprintln!("intersect between {:?} and {:?} gives {:?} took {:.02?}",
|
||||||
left_len, union_result.len(), before.elapsed());
|
left_len, union_result.len(), left.len(), before.elapsed());
|
||||||
Some(left)
|
Some(left)
|
||||||
},
|
},
|
||||||
None => Some(union_result),
|
None => Some(union_result),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eprintln!("{} candidates", intersect_result.as_ref().map_or(0, |r| r.len()));
|
||||||
|
|
||||||
Ok(intersect_result.unwrap_or_default().iter().take(20).collect())
|
Ok(intersect_result.unwrap_or_default().iter().take(20).collect())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user