feat: Use sdset Set primitives everywhere possible

This commit is contained in:
Clément Renault 2018-12-09 14:18:23 +01:00
parent 06ba82cfa8
commit 6cb1bfd815
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
8 changed files with 41 additions and 35 deletions

View File

@ -4,6 +4,7 @@ use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use std::{io, mem}; use std::{io, mem};
use sdset::Set;
use fst::raw::MmapReadOnly; use fst::raw::MmapReadOnly;
use serde::ser::{Serialize, Serializer}; use serde::ser::{Serialize, Serializer};
@ -42,11 +43,12 @@ impl DocIds {
self.doc_ids().binary_search(&doc).is_ok() self.doc_ids().binary_search(&doc).is_ok()
} }
pub fn doc_ids(&self) -> &[DocumentId] { pub fn doc_ids(&self) -> &Set<DocumentId> {
let slice = &self.data; let slice = &self.data;
let ptr = slice.as_ptr() as *const DocumentId; let ptr = slice.as_ptr() as *const DocumentId;
let len = slice.len() / mem::size_of::<DocumentId>(); let len = slice.len() / mem::size_of::<DocumentId>();
unsafe { from_raw_parts(ptr, len) } let slice = unsafe { from_raw_parts(ptr, len) };
Set::new_unchecked(slice)
} }
} }

View File

@ -5,8 +5,9 @@ use std::ops::Index;
use std::path::Path; use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use fst::raw::MmapReadOnly;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use fst::raw::MmapReadOnly;
use sdset::Set;
use crate::DocIndex; use crate::DocIndex;
use crate::data::Data; use crate::data::Data;
@ -64,11 +65,12 @@ impl DocIndexes {
bytes bytes
} }
pub fn get(&self, index: usize) -> Option<&[DocIndex]> { pub fn get(&self, index: usize) -> Option<&Set<DocIndex>> {
self.ranges().get(index as usize).map(|Range { start, end }| { self.ranges().get(index as usize).map(|Range { start, end }| {
let start = *start as usize; let start = *start as usize;
let end = *end as usize; let end = *end as usize;
&self.indexes()[start..end] let slice = &self.indexes()[start..end];
Set::new_unchecked(slice)
}) })
} }
@ -117,7 +119,7 @@ impl<W: Write> DocIndexesBuilder<W> {
} }
} }
pub fn insert(&mut self, indexes: &[DocIndex]) -> io::Result<()> { pub fn insert(&mut self, indexes: &Set<DocIndex>) -> io::Result<()> {
let len = indexes.len() as u64; let len = indexes.len() as u64;
let start = self.ranges.last().map(|r| r.end).unwrap_or(0); let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
let range = Range { start, end: start + len }; let range = Range { start, end: start + len };
@ -164,16 +166,16 @@ mod tests {
let mut builder = DocIndexesBuilder::memory(); let mut builder = DocIndexesBuilder::memory();
builder.insert(&[a])?; builder.insert(Set::new(&[a])?)?;
builder.insert(&[a, b, c])?; builder.insert(Set::new(&[a, b, c])?)?;
builder.insert(&[a, c])?; builder.insert(Set::new(&[a, c])?)?;
let bytes = builder.into_inner()?; let bytes = builder.into_inner()?;
let docs = DocIndexes::from_bytes(bytes)?; let docs = DocIndexes::from_bytes(bytes)?;
assert_eq!(docs.get(0), Some(&[a][..])); assert_eq!(docs.get(0), Some(Set::new(&[a])?));
assert_eq!(docs.get(1), Some(&[a, b, c][..])); assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?));
assert_eq!(docs.get(2), Some(&[a, c][..])); assert_eq!(docs.get(2), Some(Set::new(&[a, c])?));
assert_eq!(docs.get(3), None); assert_eq!(docs.get(3), None);
Ok(()) Ok(())
@ -187,9 +189,9 @@ mod tests {
let mut builder = DocIndexesBuilder::memory(); let mut builder = DocIndexesBuilder::memory();
builder.insert(&[a])?; builder.insert(Set::new(&[a])?)?;
builder.insert(&[a, b, c])?; builder.insert(Set::new(&[a, b, c])?)?;
builder.insert(&[a, c])?; builder.insert(Set::new(&[a, c])?)?;
let builder_bytes = builder.into_inner()?; let builder_bytes = builder.into_inner()?;
let docs = DocIndexes::from_bytes(builder_bytes.clone())?; let docs = DocIndexes::from_bytes(builder_bytes.clone())?;

View File

@ -2,6 +2,7 @@ use std::error::Error;
use std::path::Path; use std::path::Path;
use std::fmt; use std::fmt;
use sdset::Set;
use serde::de::{self, Deserialize, Deserializer}; use serde::de::{self, Deserialize, Deserializer};
use serde::ser::{Serialize, Serializer}; use serde::ser::{Serialize, Serializer};
use crate::data::DocIds; use crate::data::DocIds;
@ -38,8 +39,8 @@ impl NegativeBlob {
} }
} }
impl AsRef<[DocumentId]> for NegativeBlob { impl AsRef<Set<DocumentId>> for NegativeBlob {
fn as_ref(&self) -> &[DocumentId] { fn as_ref(&self) -> &Set<DocumentId> {
self.as_ids().doc_ids() self.as_ids().doc_ids()
} }
} }
@ -47,7 +48,7 @@ impl AsRef<[DocumentId]> for NegativeBlob {
impl fmt::Debug for NegativeBlob { impl fmt::Debug for NegativeBlob {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "NegativeBlob(")?; write!(f, "NegativeBlob(")?;
f.debug_list().entries(self.as_ref()).finish()?; f.debug_list().entries(self.as_ref().as_slice()).finish()?;
write!(f, ")") write!(f, ")")
} }
} }

View File

@ -1,9 +1,9 @@
use std::error::Error; use std::error::Error;
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use group_by::GroupBy;
use sdset::duo::DifferenceByKey; use sdset::duo::DifferenceByKey;
use sdset::{Set, SetOperation}; use sdset::{Set, SetOperation};
use group_by::GroupBy;
use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob}; use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
use crate::database::blob::{positive, negative}; use crate::database::blob::{positive, negative};
@ -89,18 +89,16 @@ impl OpBuilder {
}; };
let mut builder = PositiveBlobBuilder::memory(); let mut builder = PositiveBlobBuilder::memory();
let doc_ids = Set::new_unchecked(negative.as_ref());
let op_builder = positive::OpBuilder::new().add(&base).add(&positive); let op_builder = positive::OpBuilder::new().add(&base).add(&positive);
let mut stream = op_builder.union().into_stream(); let mut stream = op_builder.union().into_stream();
while let Some((input, doc_indexes)) = stream.next() { while let Some((input, doc_indexes)) = stream.next() {
let doc_indexes = Set::new_unchecked(doc_indexes); let op = DifferenceByKey::new(doc_indexes, negative.as_ref(), |x| x.document_id, |x| *x);
let op = DifferenceByKey::new(doc_indexes, doc_ids, |x| x.document_id, |x| *x);
buffer.clear(); buffer.clear();
op.extend_vec(&mut buffer); op.extend_vec(&mut buffer);
if !buffer.is_empty() { if !buffer.is_empty() {
builder.insert(input, &buffer)?; builder.insert(input, Set::new_unchecked(&buffer))?;
} }
} }

View File

@ -4,6 +4,7 @@ use std::path::Path;
use std::error::Error; use std::error::Error;
use fst::{map, Map, Streamer, IntoStreamer}; use fst::{map, Map, Streamer, IntoStreamer};
use sdset::Set;
use crate::DocIndex; use crate::DocIndex;
use crate::data::{DocIndexes, DocIndexesBuilder}; use crate::data::{DocIndexes, DocIndexesBuilder};
@ -177,7 +178,7 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
/// then an error is returned. Similarly, if there was a problem writing /// then an error is returned. Similarly, if there was a problem writing
/// to the underlying writer, an error is returned. /// to the underlying writer, an error is returned.
// FIXME what if one write doesn't work but the other do ? // FIXME what if one write doesn't work but the other do ?
pub fn insert<K>(&mut self, key: K, doc_indexes: &[DocIndex]) -> Result<(), Box<Error>> pub fn insert<K>(&mut self, key: K, doc_indexes: &Set<DocIndex>) -> Result<(), Box<Error>>
where K: AsRef<[u8]>, where K: AsRef<[u8]>,
{ {
self.map.insert(key, self.value)?; self.map.insert(key, self.value)?;
@ -210,9 +211,9 @@ mod tests {
let mut builder = PositiveBlobBuilder::memory(); let mut builder = PositiveBlobBuilder::memory();
builder.insert("aaa", &[a])?; builder.insert("aaa", Set::new(&[a])?)?;
builder.insert("aab", &[a, b, c])?; builder.insert("aab", Set::new(&[a, b, c])?)?;
builder.insert("aac", &[a, c])?; builder.insert("aac", Set::new(&[a, c])?)?;
let (map_bytes, indexes_bytes) = builder.into_inner()?; let (map_bytes, indexes_bytes) = builder.into_inner()?;
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?; let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
@ -233,9 +234,9 @@ mod tests {
let mut builder = PositiveBlobBuilder::memory(); let mut builder = PositiveBlobBuilder::memory();
builder.insert("aaa", &[a])?; builder.insert("aaa", Set::new(&[a])?)?;
builder.insert("aab", &[a, b, c])?; builder.insert("aab", Set::new(&[a, b, c])?)?;
builder.insert("aac", &[a, c])?; builder.insert("aac", Set::new(&[a, c])?)?;
let (map_bytes, indexes_bytes) = builder.into_inner()?; let (map_bytes, indexes_bytes) = builder.into_inner()?;
let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?; let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;

View File

@ -74,7 +74,7 @@ impl<'m> $name<'m> {
} }
impl<'m, 'a> fst::Streamer<'a> for $name<'m> { impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
type Item = (&'a [u8], &'a [DocIndex]); type Item = (&'a [u8], &'a Set<DocIndex>);
fn next(&'a mut self) -> Option<Self::Item> { fn next(&'a mut self) -> Option<Self::Item> {
// loop { // loop {
@ -114,7 +114,7 @@ impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
builder.$operation().extend_vec(&mut self.outs); builder.$operation().extend_vec(&mut self.outs);
if self.outs.is_empty() { return None } if self.outs.is_empty() { return None }
return Some((input, &self.outs)) return Some((input, Set::new_unchecked(&self.outs)))
}, },
None => None None => None
} }

View File

@ -48,9 +48,9 @@ impl NegativeUpdateBuilder {
Blob::Positive(_) => unreachable!(), Blob::Positive(_) => unreachable!(),
}; };
for &document_id in negative_blob.as_ref() { for &document_id in negative_blob.as_ref().as_slice() {
let start = DocumentKey::new(document_id); let start = DocumentKey::new(document_id);
let end = DocumentKey::new(document_id + 1); let end = start.with_attribute_max();
file_writer.delete_range(start.as_ref(), end.as_ref())?; file_writer.delete_range(start.as_ref(), end.as_ref())?;
} }

View File

@ -4,6 +4,8 @@ use std::collections::BTreeMap;
use std::error::Error; use std::error::Error;
use std::io::Write; use std::io::Write;
use sdset::Set;
use crate::database::blob::positive::PositiveBlobBuilder; use crate::database::blob::positive::PositiveBlobBuilder;
use crate::DocIndex; use crate::DocIndex;
@ -40,7 +42,7 @@ impl<W: Write, X: Write> UnorderedPositiveBlobBuilder<W, X> {
pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> { pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> {
for (key, mut doc_indexes) in self.map { for (key, mut doc_indexes) in self.map {
doc_indexes.sort_unstable(); doc_indexes.sort_unstable();
self.builder.insert(&key, &doc_indexes)?; self.builder.insert(&key, Set::new_unchecked(&doc_indexes))?;
} }
self.builder.into_inner() self.builder.into_inner()
} }