feat: HTTP server example can use stopwords

This commit is contained in:
Quentin de Quelen 2019-01-14 15:34:56 +01:00
parent b13e61f40a
commit e0e5e87ed3

View File

@ -3,12 +3,14 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use log::{error, info}; use log::{error, info};
use std::error::Error; use std::error::Error;
use std::ffi::OsStr;
use std::fmt; use std::fmt;
use std::fs::{self, File};
use std::io::{self, BufRead, BufReader};
use std::net::SocketAddr;
use std::path::{PathBuf, Path}; use std::path::{PathBuf, Path};
use std::sync::Arc; use std::sync::Arc;
use std::time::SystemTime; use std::time::SystemTime;
use std::fs;
use std::ffi::OsStr;
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
use chashmap::CHashMap; use chashmap::CHashMap;
@ -30,9 +32,13 @@ pub struct Opt {
#[structopt(parse(from_os_str))] #[structopt(parse(from_os_str))]
pub database_path: PathBuf, pub database_path: PathBuf,
/// The address and port to bind the server to.
#[structopt(short = "l", default_value = "127.0.0.1:8080")]
pub listen_addr: SocketAddr,
/// The path to the list of stop words (one by line). /// The path to the list of stop words (one by line).
#[structopt(long = "stop-words", parse(from_os_str))] #[structopt(long = "stop-words", parse(from_os_str))]
pub stop_words_path: Option<PathBuf>, pub stop_words: PathBuf,
} }
// //
@ -73,14 +79,16 @@ impl From<Box<Error>> for DatabaseError {
pub struct MultiDatabase { pub struct MultiDatabase {
databases: CHashMap<String, Database>, databases: CHashMap<String, Database>,
db_path: PathBuf, db_path: PathBuf,
stop_words: HashSet<String>,
} }
impl MultiDatabase { impl MultiDatabase {
pub fn new(path: PathBuf) -> MultiDatabase { pub fn new(path: PathBuf, stop_words: HashSet<String>) -> MultiDatabase {
MultiDatabase { MultiDatabase {
databases: CHashMap::new(), databases: CHashMap::new(),
db_path: path db_path: path,
stop_words: stop_words
} }
} }
@ -178,6 +186,20 @@ fn get_file_name_from_path(path: &str) -> Option<&str> {
Path::new(path).file_stem().and_then(OsStr::to_str) Path::new(path).file_stem().and_then(OsStr::to_str)
} }
fn retrieve_stop_words(path: &Path) -> io::Result<HashSet<String>> {
let f = File::open(path)?;
let reader = BufReader::new(f);
let mut words = HashSet::new();
for line in reader.lines() {
let line = line?;
let word = line.trim().to_string();
words.insert(word);
}
Ok(words)
}
// //
// PARAMS & BODY FOR HTTPS HANDLERS // PARAMS & BODY FOR HTTPS HANDLERS
// //
@ -271,7 +293,7 @@ fn ingest(index_name: String, body: IngestBody, db: Arc<MultiDatabase>) -> Resul
} }
} }
let stop_words = HashSet::new(); let stop_words = &db.stop_words;
if let Some(documents) = body.insert { if let Some(documents) = body.insert {
for doc in documents { for doc in documents {
if let Err(e) = update.update_document(doc, &tokenizer_builder, &stop_words) { if let Err(e) = update.update_document(doc, &tokenizer_builder, &stop_words) {
@ -355,7 +377,7 @@ fn search(index_name: String, query: SearchQuery, db: Arc<MultiDatabase>) -> Res
Ok(response) Ok(response)
} }
fn start_server(db: Arc<MultiDatabase>) { fn start_server(listen_addr: SocketAddr, db: Arc<MultiDatabase>) {
let index_path = warp::path("index").and(warp::path::param::<String>()); let index_path = warp::path("index").and(warp::path::param::<String>());
let db = warp::any().map(move || db.clone()); let db = warp::any().map(move || db.clone());
@ -390,19 +412,24 @@ fn start_server(db: Arc<MultiDatabase>) {
let routes = api.with(logs).with(headers); let routes = api.with(logs).with(headers);
warp::serve(routes) info!("Server is started on {}", listen_addr);
.run(([127, 0, 0, 1], 8080)); warp::serve(routes).run(listen_addr);
} }
fn main() { fn main() {
env_logger::init(); env_logger::init();
let opt = Opt::from_args(); let opt = Opt::from_args();
let db = Arc::new(MultiDatabase::new(opt.database_path.clone())); let stop_words = match retrieve_stop_words(&opt.stop_words) {
Ok(s) => s,
Err(_) => HashSet::new(),
};
let db = Arc::new(MultiDatabase::new(opt.database_path.clone(), stop_words));
db.load_existing(); db.load_existing();
start_server(db); start_server(opt.listen_addr, db);
} }