2018-05-05 16:59:03 +08:00
|
|
|
#[macro_use] extern crate serde_derive;
|
2018-04-22 23:34:41 +08:00
|
|
|
extern crate bincode;
|
2018-04-22 21:54:34 +08:00
|
|
|
extern crate fst;
|
2018-05-05 16:59:03 +08:00
|
|
|
extern crate serde;
|
2018-04-22 21:54:34 +08:00
|
|
|
|
2018-05-05 16:59:03 +08:00
|
|
|
mod fst_map;
|
|
|
|
|
2018-05-05 19:50:51 +08:00
|
|
|
use std::ops::Range;
|
2018-04-23 04:05:01 +08:00
|
|
|
use std::io::{Write, BufReader};
|
2018-04-22 23:34:41 +08:00
|
|
|
use std::fs::File;
|
|
|
|
use std::path::Path;
|
|
|
|
use std::str::from_utf8_unchecked;
|
2018-05-05 16:59:03 +08:00
|
|
|
use fst::Automaton;
|
2018-04-22 21:54:34 +08:00
|
|
|
|
2018-05-05 16:59:03 +08:00
|
|
|
pub use self::fst_map::{FstMap, FstMapBuilder};
|
|
|
|
use self::fst_map::Values;
|
2018-04-22 23:34:41 +08:00
|
|
|
|
2018-05-05 16:59:03 +08:00
|
|
|
pub struct StreamBuilder<'a, T: 'a, A: Automaton> {
|
2018-04-23 02:06:56 +08:00
|
|
|
inner: fst::map::StreamBuilder<'a, A>,
|
2018-05-05 16:59:03 +08:00
|
|
|
values: &'a Values<T>,
|
2018-04-23 02:06:56 +08:00
|
|
|
}
|
|
|
|
|
2018-05-05 16:59:03 +08:00
|
|
|
impl<'a, T: 'a, A: Automaton> fst::IntoStreamer<'a> for StreamBuilder<'a, T, A> {
|
|
|
|
type Item = (&'a str, &'a [T]);
|
2018-04-23 02:06:56 +08:00
|
|
|
|
2018-05-05 16:59:03 +08:00
|
|
|
type Into = Stream<'a, T, A>;
|
2018-04-23 02:06:56 +08:00
|
|
|
|
|
|
|
fn into_stream(self) -> Self::Into {
|
|
|
|
Stream {
|
|
|
|
inner: self.inner.into_stream(),
|
|
|
|
values: self.values,
|
|
|
|
}
|
|
|
|
}
|
2018-04-22 21:54:34 +08:00
|
|
|
}
|
|
|
|
|
2018-05-05 16:59:03 +08:00
|
|
|
pub struct Stream<'a, T: 'a, A: Automaton = fst::automaton::AlwaysMatch> {
|
2018-04-22 23:34:41 +08:00
|
|
|
inner: fst::map::Stream<'a, A>,
|
2018-05-05 16:59:03 +08:00
|
|
|
values: &'a Values<T>,
|
2018-04-22 23:34:41 +08:00
|
|
|
}
|
|
|
|
|
2018-05-05 16:59:03 +08:00
|
|
|
impl<'a, 'm, T: 'a, A: Automaton> fst::Streamer<'a> for Stream<'m, T, A> {
|
|
|
|
type Item = (&'a str, &'a [T]);
|
2018-04-22 23:34:41 +08:00
|
|
|
|
|
|
|
fn next(&'a mut self) -> Option<Self::Item> {
|
|
|
|
// Here we can't just `map` because of some borrow rules
|
|
|
|
match self.inner.next() {
|
|
|
|
Some((key, i)) => {
|
|
|
|
let key = unsafe { from_utf8_unchecked(key) };
|
2018-05-05 16:59:03 +08:00
|
|
|
let values = unsafe { self.values.get_unchecked(i as usize) };
|
|
|
|
Some((key, values))
|
2018-04-22 23:34:41 +08:00
|
|
|
},
|
|
|
|
None => None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|