mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-29 08:35:15 +08:00
test: Add some tests to DocIndexes
This commit is contained in:
parent
5829d08bc0
commit
9a67db0989
@ -6,7 +6,7 @@ use std::error::Error;
|
|||||||
use fst::{map, Map, Streamer, IntoStreamer};
|
use fst::{map, Map, Streamer, IntoStreamer};
|
||||||
|
|
||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
use crate::data::{DocIndexes, RawDocIndexesBuilder};
|
use crate::data::{DocIndexes, DocIndexesBuilder};
|
||||||
use serde::ser::{Serialize, Serializer, SerializeTuple};
|
use serde::ser::{Serialize, Serializer, SerializeTuple};
|
||||||
use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
|
use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
|
||||||
|
|
||||||
@ -135,7 +135,7 @@ impl<'de> Deserialize<'de> for PositiveBlob {
|
|||||||
|
|
||||||
pub struct PositiveBlobBuilder<W, X> {
|
pub struct PositiveBlobBuilder<W, X> {
|
||||||
map: fst::MapBuilder<W>,
|
map: fst::MapBuilder<W>,
|
||||||
indexes: RawDocIndexesBuilder<X>,
|
indexes: DocIndexesBuilder<X>,
|
||||||
value: u64,
|
value: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,7 +143,7 @@ impl PositiveBlobBuilder<Vec<u8>, Vec<u8>> {
|
|||||||
pub fn memory() -> Self {
|
pub fn memory() -> Self {
|
||||||
PositiveBlobBuilder {
|
PositiveBlobBuilder {
|
||||||
map: fst::MapBuilder::memory(),
|
map: fst::MapBuilder::memory(),
|
||||||
indexes: RawDocIndexesBuilder::memory(),
|
indexes: DocIndexesBuilder::memory(),
|
||||||
value: 0,
|
value: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -153,7 +153,7 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
|
|||||||
pub fn new(map: W, indexes: X) -> Result<Self, Box<Error>> {
|
pub fn new(map: W, indexes: X) -> Result<Self, Box<Error>> {
|
||||||
Ok(PositiveBlobBuilder {
|
Ok(PositiveBlobBuilder {
|
||||||
map: fst::MapBuilder::new(map)?,
|
map: fst::MapBuilder::new(map)?,
|
||||||
indexes: RawDocIndexesBuilder::new(indexes),
|
indexes: DocIndexesBuilder::new(indexes),
|
||||||
value: 0,
|
value: 0,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -106,7 +106,8 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
|
|||||||
|
|
||||||
let mut builder = SdOpBuilder::with_capacity(ivalues.len());
|
let mut builder = SdOpBuilder::with_capacity(ivalues.len());
|
||||||
for ivalue in ivalues {
|
for ivalue in ivalues {
|
||||||
let indexes = self.indexes[ivalue.index].get(ivalue.value).expect("");
|
let indexes = self.indexes[ivalue.index].get(ivalue.value);
|
||||||
|
let indexes = indexes.expect("BUG: could not find document indexes");
|
||||||
let set = Set::new_unchecked(indexes);
|
let set = Set::new_unchecked(indexes);
|
||||||
builder.push(set);
|
builder.push(set);
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::collections::btree_map::{BTreeMap, Iter, Entry};
|
|
||||||
use std::slice::from_raw_parts;
|
use std::slice::from_raw_parts;
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@ -12,6 +11,7 @@ use serde::ser::{Serialize, Serializer, SerializeTuple};
|
|||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
use crate::data::Data;
|
use crate::data::Data;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
struct Range {
|
struct Range {
|
||||||
start: u64,
|
start: u64,
|
||||||
@ -43,7 +43,7 @@ impl DocIndexes {
|
|||||||
fn from_data(data: Data) -> io::Result<Self> {
|
fn from_data(data: Data) -> io::Result<Self> {
|
||||||
let ranges_len_offset = data.len() - mem::size_of::<u64>();
|
let ranges_len_offset = data.len() - mem::size_of::<u64>();
|
||||||
let ranges_len = (&data[ranges_len_offset..]).read_u64::<LittleEndian>()?;
|
let ranges_len = (&data[ranges_len_offset..]).read_u64::<LittleEndian>()?;
|
||||||
let ranges_len = ranges_len as usize * mem::size_of::<Range>();
|
let ranges_len = ranges_len as usize;
|
||||||
|
|
||||||
let ranges_offset = ranges_len_offset - ranges_len;
|
let ranges_offset = ranges_len_offset - ranges_len;
|
||||||
let ranges = data.range(ranges_offset, ranges_len);
|
let ranges = data.range(ranges_offset, ranges_len);
|
||||||
@ -85,20 +85,20 @@ impl Serialize for DocIndexes {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct RawDocIndexesBuilder<W> {
|
pub struct DocIndexesBuilder<W> {
|
||||||
ranges: Vec<Range>,
|
ranges: Vec<Range>,
|
||||||
wtr: W,
|
wtr: W,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RawDocIndexesBuilder<Vec<u8>> {
|
impl DocIndexesBuilder<Vec<u8>> {
|
||||||
pub fn memory() -> Self {
|
pub fn memory() -> Self {
|
||||||
RawDocIndexesBuilder::new(Vec::new())
|
DocIndexesBuilder::new(Vec::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<W: Write> RawDocIndexesBuilder<W> {
|
impl<W: Write> DocIndexesBuilder<W> {
|
||||||
pub fn new(wtr: W) -> Self {
|
pub fn new(wtr: W) -> Self {
|
||||||
RawDocIndexesBuilder {
|
DocIndexesBuilder {
|
||||||
ranges: Vec::new(),
|
ranges: Vec::new(),
|
||||||
wtr: wtr,
|
wtr: wtr,
|
||||||
}
|
}
|
||||||
@ -106,7 +106,7 @@ impl<W: Write> RawDocIndexesBuilder<W> {
|
|||||||
|
|
||||||
pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> {
|
pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> {
|
||||||
let len = indexes.len() as u64;
|
let len = indexes.len() as u64;
|
||||||
let start = self.ranges.last().map(|r| r.start).unwrap_or(0);
|
let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
|
||||||
let range = Range { start, end: start + len };
|
let range = Range { start, end: start + len };
|
||||||
self.ranges.push(range);
|
self.ranges.push(range);
|
||||||
|
|
||||||
@ -132,95 +132,36 @@ impl<W: Write> RawDocIndexesBuilder<W> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocIndexesBuilder<W> {
|
|
||||||
keys: BTreeMap<String, u64>,
|
|
||||||
indexes: Vec<Vec<DocIndex>>,
|
|
||||||
number_docs: usize,
|
|
||||||
wtr: W,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<W: Write> DocIndexesBuilder<W> {
|
|
||||||
pub fn new(wtr: W) -> Self {
|
|
||||||
Self {
|
|
||||||
keys: BTreeMap::new(),
|
|
||||||
indexes: Vec::new(),
|
|
||||||
number_docs: 0,
|
|
||||||
wtr: wtr,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn number_doc_indexes(&self) -> usize {
|
|
||||||
self.number_docs
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert(&mut self, key: String, value: DocIndex) {
|
|
||||||
match self.keys.entry(key) {
|
|
||||||
Entry::Vacant(e) => {
|
|
||||||
let index = self.indexes.len() as u64;
|
|
||||||
self.indexes.push(vec![value]);
|
|
||||||
e.insert(index);
|
|
||||||
},
|
|
||||||
Entry::Occupied(e) => {
|
|
||||||
let index = *e.get();
|
|
||||||
let vec = &mut self.indexes[index as usize];
|
|
||||||
vec.push(value);
|
|
||||||
},
|
|
||||||
}
|
|
||||||
self.number_docs += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn keys(&self) -> Iter<String, u64> {
|
|
||||||
self.keys.iter()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn finish(self) -> io::Result<()> {
|
|
||||||
self.into_inner().map(drop)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_inner(mut self) -> io::Result<W> {
|
|
||||||
for vec in &mut self.indexes {
|
|
||||||
vec.sort_unstable();
|
|
||||||
}
|
|
||||||
|
|
||||||
let (ranges, values) = into_sliced_ranges(self.indexes, self.number_docs);
|
|
||||||
|
|
||||||
// write values first
|
|
||||||
let slice = unsafe { into_u8_slice(values.as_slice()) };
|
|
||||||
self.wtr.write_all(slice)?;
|
|
||||||
|
|
||||||
// write ranges after
|
|
||||||
let slice = unsafe { into_u8_slice(ranges.as_slice()) };
|
|
||||||
self.wtr.write_all(slice)?;
|
|
||||||
|
|
||||||
// write the length of the ranges
|
|
||||||
let len = ranges.len() as u64;
|
|
||||||
self.wtr.write_u64::<LittleEndian>(len)?;
|
|
||||||
|
|
||||||
self.wtr.flush()?;
|
|
||||||
Ok(self.wtr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_sliced_ranges<T>(vecs: Vec<Vec<T>>, number_docs: usize) -> (Vec<Range>, Vec<T>) {
|
|
||||||
let cap = vecs.len();
|
|
||||||
let mut ranges = Vec::with_capacity(cap);
|
|
||||||
let mut values = Vec::with_capacity(number_docs);
|
|
||||||
|
|
||||||
for v in &vecs {
|
|
||||||
let len = v.len() as u64;
|
|
||||||
let start = ranges.last().map(|&Range { end, .. }| end).unwrap_or(0);
|
|
||||||
|
|
||||||
let range = Range { start, end: start + len };
|
|
||||||
ranges.push(range);
|
|
||||||
}
|
|
||||||
|
|
||||||
values.extend(vecs.into_iter().flatten());
|
|
||||||
|
|
||||||
(ranges, values)
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] {
|
unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] {
|
||||||
let ptr = slice.as_ptr() as *const u8;
|
let ptr = slice.as_ptr() as *const u8;
|
||||||
let len = slice.len() * mem::size_of::<T>();
|
let len = slice.len() * mem::size_of::<T>();
|
||||||
from_raw_parts(ptr, len)
|
from_raw_parts(ptr, len)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::error::Error;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialize_deserialize() -> Result<(), Box<Error>> {
|
||||||
|
let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
|
||||||
|
let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
|
||||||
|
let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
|
||||||
|
|
||||||
|
let mut builder = DocIndexesBuilder::memory();
|
||||||
|
|
||||||
|
builder.insert(&[a])?;
|
||||||
|
builder.insert(&[a, b, c])?;
|
||||||
|
builder.insert(&[a, c])?;
|
||||||
|
|
||||||
|
let bytes = builder.into_inner()?;
|
||||||
|
let docs = DocIndexes::from_bytes(bytes)?;
|
||||||
|
|
||||||
|
assert_eq!(docs.get(0).unwrap(), &[a]);
|
||||||
|
assert_eq!(docs.get(1).unwrap(), &[a, b, c]);
|
||||||
|
assert_eq!(docs.get(2).unwrap(), &[a, c]);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -7,7 +7,7 @@ use std::sync::Arc;
|
|||||||
use fst::raw::MmapReadOnly;
|
use fst::raw::MmapReadOnly;
|
||||||
|
|
||||||
pub use self::doc_ids::DocIds;
|
pub use self::doc_ids::DocIds;
|
||||||
pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder, RawDocIndexesBuilder};
|
pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
enum Data {
|
enum Data {
|
||||||
|
@ -120,87 +120,3 @@ impl Index {
|
|||||||
Ok(documents)
|
Ok(documents)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use tempfile::NamedTempFile;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use crate::index::schema::{Schema, SchemaBuilder, STORED, INDEXED};
|
|
||||||
use crate::index::update::{PositiveUpdateBuilder, NegativeUpdateBuilder};
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn generate_negative_update() -> Result<(), Box<Error>> {
|
|
||||||
let path = NamedTempFile::new()?.into_temp_path();
|
|
||||||
let mut builder = NegativeUpdateBuilder::new(&path);
|
|
||||||
|
|
||||||
// you can insert documents in any order,
|
|
||||||
// it is sorted internally
|
|
||||||
builder.remove(1);
|
|
||||||
builder.remove(5);
|
|
||||||
builder.remove(2);
|
|
||||||
|
|
||||||
let update = builder.build()?;
|
|
||||||
|
|
||||||
assert_eq!(update.info().sign, Sign::Negative);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn generate_positive_update() -> Result<(), Box<Error>> {
|
|
||||||
let title;
|
|
||||||
let description;
|
|
||||||
let schema = {
|
|
||||||
let mut builder = SchemaBuilder::new();
|
|
||||||
title = builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
description = builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
builder.build()
|
|
||||||
};
|
|
||||||
|
|
||||||
let sst_path = NamedTempFile::new()?.into_temp_path();
|
|
||||||
let tokenizer_builder = DefaultBuilder::new();
|
|
||||||
let mut builder = PositiveUpdateBuilder::new(&sst_path, schema.clone(), tokenizer_builder);
|
|
||||||
|
|
||||||
// you can insert documents in any order,
|
|
||||||
// it is sorted internally
|
|
||||||
builder.update_field(1, title, "hallo!".to_owned());
|
|
||||||
builder.update_field(5, title, "hello!".to_owned());
|
|
||||||
builder.update_field(2, title, "hi!".to_owned());
|
|
||||||
|
|
||||||
builder.remove_field(4, description);
|
|
||||||
|
|
||||||
let update = builder.build()?;
|
|
||||||
|
|
||||||
assert_eq!(update.info().sign, Sign::Positive);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn execution() -> Result<(), Box<Error>> {
|
|
||||||
|
|
||||||
let index = Index::open("/meili/data")?;
|
|
||||||
let update = Update::open("update-0001.sst")?;
|
|
||||||
index.ingest_update(update)?;
|
|
||||||
// directly apply changes to the database and see new results
|
|
||||||
let results = index.search("helo");
|
|
||||||
|
|
||||||
//////////////
|
|
||||||
|
|
||||||
// let index = Index::open("/meili/data")?;
|
|
||||||
// let update = Update::open("update-0001.sst")?;
|
|
||||||
|
|
||||||
// // if you create a snapshot before an update
|
|
||||||
// let snapshot = index.snapshot();
|
|
||||||
// index.ingest_update(update)?;
|
|
||||||
|
|
||||||
// // the snapshot does not see the updates
|
|
||||||
// let results = snapshot.search("helo");
|
|
||||||
|
|
||||||
// // the raw index itself see new results
|
|
||||||
// let results = index.search("helo");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user