rebase from master

This commit is contained in:
Quentin de Quelen 2020-02-02 22:59:19 +01:00 committed by qdequele
parent 2143226f04
commit dc6907e748
No known key found for this signature in database
GPG Key ID: B3F0A000EBF11745
29 changed files with 92 additions and 105 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

1
Cargo.lock generated
View File

@ -1022,7 +1022,6 @@ version = "0.8.4"
dependencies = [
"assert-json-diff 1.0.1 (git+https://github.com/qdequele/assert-json-diff)",
"async-std 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-channel 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -14,6 +14,7 @@ use meilisearch_types::DocIndex;
use sdset::{Set, SetBuf, exponential_search};
use slice_group_by::{GroupBy, GroupByMut};
use crate::error::Error;
use crate::criterion::{Criteria, Context, ContextMut};
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
use crate::raw_document::RawDocument;
@ -163,7 +164,7 @@ where
let schema = main_store.schema(reader)?.ok_or(Error::SchemaMissing)?;
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
let iter = iter.map(|rd| Document::from_raw(rd, &automatons, &arena, searchable_attrs.as_ref(), &schema));
let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
let documents = iter.collect();
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
@ -349,7 +350,7 @@ where
};
if distinct_accepted && seen.len() > range.start {
documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref()));
documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
if documents.len() == range.len() {
break;
}

View File

@ -743,12 +743,12 @@ mod tests {
assert!(document.is_none());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(7900334843754999545))
.document(&reader, None, DocumentId(7_900_334_843_754_999_545))
.unwrap();
assert!(document.is_some());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(8367468610878465872))
.document(&reader, None, DocumentId(8_367_468_610_878_465_872))
.unwrap();
assert!(document.is_some());
}
@ -820,12 +820,12 @@ mod tests {
assert!(document.is_none());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(7900334843754999545))
.document(&reader, None, DocumentId(7_900_334_843_754_999_545))
.unwrap();
assert!(document.is_some());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(8367468610878465872))
.document(&reader, None, DocumentId(8_367_468_610_878_465_872))
.unwrap();
assert!(document.is_some());
@ -862,7 +862,7 @@ mod tests {
let reader = db.main_read_txn().unwrap();
let document: Option<serde_json::Value> = index
.document(&reader, None, DocumentId(7900334843754999545))
.document(&reader, None, DocumentId(7_900_334_843_754_999_545))
.unwrap();
let new_doc1 = serde_json::json!({
@ -873,7 +873,7 @@ mod tests {
assert_eq!(document, Some(new_doc1));
let document: Option<serde_json::Value> = index
.document(&reader, None, DocumentId(8367468610878465872))
.document(&reader, None, DocumentId(8_367_468_610_878_465_872))
.unwrap();
let new_doc2 = serde_json::json!({
@ -1039,14 +1039,14 @@ mod tests {
assert_matches!(
iter.next(),
Some(Document {
id: DocumentId(7900334843754999545),
id: DocumentId(7_900_334_843_754_999_545),
..
})
);
assert_matches!(
iter.next(),
Some(Document {
id: DocumentId(8367468610878465872),
id: DocumentId(8_367_468_610_878_465_872),
..
})
);

View File

@ -2,6 +2,10 @@ use crate::serde::{DeserializerError, SerializerError};
use serde_json::Error as SerdeJsonError;
use std::{error, fmt, io};
pub use heed::Error as HeedError;
pub use fst::Error as FstError;
pub use bincode::Error as BincodeError;
pub type MResult<T> = Result<T, Error>;
#[derive(Debug)]
@ -35,14 +39,14 @@ impl From<meilisearch_schema::Error> for Error {
}
}
impl From<heed::Error> for Error {
fn from(error: heed::Error) -> Error {
impl From<HeedError> for Error {
fn from(error: HeedError) -> Error {
Error::Zlmdb(error)
}
}
impl From<fst::Error> for Error {
fn from(error: fst::Error) -> Error {
impl From<FstError> for Error {
fn from(error: FstError) -> Error {
Error::Fst(error)
}
}
@ -53,8 +57,8 @@ impl From<SerdeJsonError> for Error {
}
}
impl From<bincode::Error> for Error {
fn from(error: bincode::Error) -> Error {
impl From<BincodeError> for Error {
fn from(error: BincodeError) -> Error {
Error::Bincode(error)
}
}

View File

@ -23,18 +23,20 @@ pub mod serde;
pub mod store;
pub use self::database::{BoxUpdateFn, Database, MainT, UpdateT};
pub use self::error::{Error, MResult};
pub use self::error::{Error, HeedError, FstError, MResult};
pub use self::number::{Number, ParseNumberError};
pub use self::ranked_map::RankedMap;
pub use self::raw_document::RawDocument;
pub use self::store::Index;
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
pub use meilisearch_schema::Schema;
pub use query_words_mapper::QueryWordsMapper;
use std::convert::TryFrom;
use std::collections::HashMap;
use compact_arena::SmallArena;
use log::{error, trace};
use crate::bucket_sort::PostingsListView;
use crate::levenshtein::prefix_damerau_levenshtein;
@ -92,7 +94,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
};
let highlight = Highlight {
attribute: attribute,
attribute,
char_index: di.char_index,
char_length: covered_area,
};

View File

@ -312,7 +312,7 @@ mod tests {
for ((docid, attr, _), count) in fields_counts {
let prev = index
.documents_fields_counts
.document_field_count(&mut writer, docid, IndexedPos(attr))
.document_field_count(&writer, docid, IndexedPos(attr))
.unwrap();
let prev = prev.unwrap_or(0);

View File

@ -180,7 +180,7 @@ pub fn create_query_tree(
) -> MResult<(Operation, HashMap<QueryId, Range<usize>>)>
{
let words = split_query_string(query).map(str::to_lowercase);
let words: Vec<_> = words.into_iter().enumerate().collect();
let words: Vec<_> = words.enumerate().collect();
let mut mapper = QueryWordsMapper::new(words.iter().map(|(_, w)| w));

View File

@ -22,10 +22,10 @@ fn validate_number(value: &Number) -> Option<String> {
if value.is_f64() {
return None
}
return Some(value.to_string())
Some(value.to_string())
}
fn validate_string(value: &String) -> Option<String> {
fn validate_string(value: &str) -> Option<String> {
if value.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
Some(value.to_string())
} else {

View File

@ -306,7 +306,6 @@ where
T: ser::Serialize,
{
let field_id = schema.get_or_create(&attribute)?;
serialize_value_with_id(
txn,
field_id,

View File

@ -49,7 +49,7 @@ impl Settings {
};
Ok(SettingsUpdate {
ranking_rules: ranking_rules,
ranking_rules,
ranking_distinct: settings.ranking_distinct.into(),
identifier: settings.identifier.into(),
searchable_attributes: settings.searchable_attributes.into(),

View File

@ -29,7 +29,7 @@ use std::{mem, ptr};
use heed::Result as ZResult;
use heed::{BytesEncode, BytesDecode};
use meilisearch_schema::{Schema, SchemaAttr};
use meilisearch_schema::{IndexedPos, FieldId};
use sdset::{Set, SetBuf};
use serde::de::{self, Deserialize};
use zerocopy::{AsBytes, FromBytes};
@ -38,6 +38,7 @@ use crate::criterion::Criteria;
use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::serde::Deserializer;
use crate::settings::SettingsUpdate;
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
type BEU64 = zerocopy::U64<byteorder::BigEndian>;

View File

@ -19,7 +19,7 @@ pub struct PrefixKey {
impl PrefixKey {
pub fn new(prefix: [u8; 4], index: u64, docid: u64) -> PrefixKey {
PrefixKey {
prefix: prefix,
prefix,
index: BEU64::new(index),
docid: BEU64::new(docid),
}

View File

@ -109,7 +109,7 @@ pub fn apply_documents_addition<'a, 'b>(
) -> MResult<()> {
let mut documents_additions = HashMap::new();
let schema = match index.main.schema(writer)? {
let mut schema = match index.main.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
@ -147,7 +147,7 @@ pub fn apply_documents_addition<'a, 'b>(
for (document_id, document) in documents_additions {
let serializer = Serializer {
txn: writer,
schema: &schema,
schema: &mut schema,
document_store: index.documents_fields,
document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer,
@ -166,7 +166,7 @@ pub fn apply_documents_addition<'a, 'b>(
indexer,
)?;
compute_short_prefixes(writer, index)?;
index.main.put_schema(writer, &schema)?;
Ok(())
}
@ -178,7 +178,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
) -> MResult<()> {
let mut documents_additions = HashMap::new();
let mut schema = match index.main.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
@ -233,7 +233,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
for (document_id, document) in documents_additions {
let serializer = Serializer {
txn: writer,
schema: &schema,
schema: &mut schema,
document_store: index.documents_fields,
document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer,
@ -252,7 +252,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
indexer,
)?;
compute_short_prefixes(writer, index)?;
index.main.put_schema(writer, &schema)?;
Ok(())
}
@ -292,7 +292,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
for document_id in documents_ids {
for result in index.documents_fields.document_fields(writer, *document_id)? {
let (attr, bytes) = result?;
let (field_id, bytes) = result?;
let value: serde_json::Value = serde_json::from_slice(bytes)?;
ram_store.insert((document_id, field_id), value);
}
@ -322,7 +322,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
)?;
}
compute_short_prefixes(writer, index)?;
index.main.put_schema(writer, &schema)?;
Ok(())
}

View File

@ -130,22 +130,10 @@ pub fn apply_settings_update(
_ => (),
}
let main_store = index.main;
let documents_fields_store = index.documents_fields;
let documents_fields_counts_store = index.documents_fields_counts;
let postings_lists_store = index.postings_lists;
let docs_words_store = index.docs_words;
if must_reindex {
reindex_all_documents(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
)?;
reindex_all_documents(writer, index)?;
}
if let UpdateState::Clear = settings.identifier {
index.main.delete_schema(writer)?;
}
@ -158,10 +146,7 @@ pub fn apply_stop_words_update(
stop_words: BTreeSet<String>,
) -> MResult<bool> {
let main_store = index.main;
let mut must_reindex = false;
let old_stop_words: BTreeSet<String> = main_store
let old_stop_words: BTreeSet<String> = index.main
.stop_words_fst(writer)?
.unwrap_or_default()
.stream()
@ -184,10 +169,9 @@ pub fn apply_stop_words_update(
index,
deletion
)?;
must_reindex = true;
return Ok(true)
}
Ok(must_reindex)
Ok(false)
}
fn apply_stop_words_addition(
@ -256,8 +240,6 @@ fn apply_stop_words_deletion(
deletion: BTreeSet<String>,
) -> MResult<()> {
let main_store = index.main;
let mut stop_words_builder = SetBuilder::memory();
for word in deletion {
@ -271,7 +253,7 @@ fn apply_stop_words_deletion(
.unwrap();
// now we delete all of these stop words from the main store
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default();
let op = OpBuilder::new()
.add(&stop_words_fst)
@ -285,7 +267,7 @@ fn apply_stop_words_deletion(
.and_then(fst::Set::from_bytes)
.unwrap();
Ok(main_store.put_stop_words_fst(writer, &stop_words_fst)?)
Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?)
}
pub fn apply_synonyms_update(

BIN
meilisearch-http/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -15,17 +15,19 @@ path = "src/main.rs"
[dependencies]
async-std = { version = "1.0.1", features = ["attributes"] }
bincode = "1.2.0"
chrono = { version = "0.4.9", features = ["serde"] }
crossbeam-channel = "0.4.0"
env_logger = "0.7.1"
futures = "0.3.1"
heed = "0.6.1"
http = "0.1.19"
http-service = "0.4.0"
indexmap = { version = "1.3.0", features = ["serde-1"] }
log = "0.4.8"
main_error = "0.1.0"
meilisearch-core = { path = "../meilisearch-core", version = "0.8.4" }
meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.4" }
mime = "0.3.16"
pretty-bytes = "0.2.2"
rand = "0.7.2"
rayon = "1.2.0"
@ -39,9 +41,6 @@ tide = "0.6.0"
ureq = { version = "0.11.2", features = ["tls"], default-features = false }
walkdir = "2.2.9"
whoami = "0.6"
http-service = "0.4.0"
futures = "0.3.1"
mime = "0.3.16"
[dev-dependencies]
http-service-mock = "0.4.0"

View File

@ -5,6 +5,7 @@ use log::{error, warn};
use serde::{Deserialize, Serialize};
use tide::IntoResponse;
use tide::Response;
use meilisearch_core::{HeedError, FstError};
use crate::helpers::meilisearch::Error as SearchError;
@ -139,14 +140,14 @@ impl From<meilisearch_core::Error> for ResponseError {
}
}
impl From<heed::Error> for ResponseError {
fn from(err: heed::Error) -> ResponseError {
impl From<HeedError> for ResponseError {
fn from(err: HeedError) -> ResponseError {
ResponseError::internal(err)
}
}
impl From<meilisearch_core::FstError> for ResponseError {
fn from(err: meilisearch_core::FstError) -> ResponseError {
impl From<FstError> for ResponseError {
fn from(err: FstError) -> ResponseError {
ResponseError::internal(err)
}
}

View File

@ -38,7 +38,7 @@ pub fn load_routes(app: &mut tide::Server<Data>) {
}
});
app.at("/indexes/")
app.at("/indexes")
.get(|ctx| into_response(index::list_indexes(ctx)))
.post(|ctx| into_response(index::create_index(ctx)));
@ -95,7 +95,7 @@ pub fn load_routes(app: &mut tide::Server<Data>) {
.post(|ctx| into_response(setting::update_searchable(ctx)))
.delete(|ctx| into_response(setting::delete_searchable(ctx)));
app.at("/indexes/:index/settings/displayed-attribute")
app.at("/indexes/:index/settings/displayed-attributes")
.get(|ctx| into_response(setting::displayed(ctx)))
.post(|ctx| into_response(setting::update_displayed(ctx)))
.delete(|ctx| into_response(setting::delete_displayed(ctx)));

BIN
meilisearch-http/tests/.DS_Store vendored Normal file

Binary file not shown.

BIN
meilisearch-http/tests/assets/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -36,6 +36,7 @@ pub fn enrich_server_with_movies_index(
) -> Result<(), Box<dyn Error>> {
let body = json!({
"uid": "movies",
"identifier": "id",
})
.to_string()
.into_bytes();
@ -114,7 +115,7 @@ pub fn enrich_server_with_movies_documents(
.unwrap();
let _res = server.simulate(req).unwrap();
block_on(sleep(Duration::from_secs(5)));
block_on(sleep(Duration::from_secs(10)));
Ok(())
}

View File

@ -902,8 +902,8 @@ fn search_with_settings_synonyms() {
"Action",
"Science Fiction"
],
"poster_path": "https://image.tmdb.org/t/p/w500/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg",
"vote_count": 16056
"vote_count": 16056,
"poster_path": "https://image.tmdb.org/t/p/w500/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg"
},
{
"id": 299534,
@ -919,25 +919,25 @@ fn search_with_settings_synonyms() {
"Science Fiction",
"Action"
],
"poster_path": "https://image.tmdb.org/t/p/w500/or06FN3Dka5tukK1e9sl16pB3iy.jpg",
"vote_count": 10497
"vote_count": 10497,
"poster_path": "https://image.tmdb.org/t/p/w500/or06FN3Dka5tukK1e9sl16pB3iy.jpg"
},
{
"id": 271110,
"popularity": 37.431,
"vote_average": 7.4,
"title": "Captain America: Civil War",
"tagline": "Divided We Fall",
"overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.",
"director": "Anthony Russo",
"id": 99861,
"popularity": 33.938,
"vote_average": 7.3,
"title": "Avengers: Age of Ultron",
"tagline": "A New Age Has Come.",
"overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earths Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.",
"director": "Joss Whedon",
"producer": "Kevin Feige",
"genres": [
"Adventure",
"Action",
"Adventure",
"Science Fiction"
],
"poster_path": "https://image.tmdb.org/t/p/w500/kSBXou5Ac7vEqKd97wotJumyJvU.jpg",
"vote_count": 15079
"vote_count": 14661,
"poster_path": "https://image.tmdb.org/t/p/w500/t90Y3G8UGQp0f0DrP60wRu9gfrH.jpg"
}
]);

View File

@ -50,9 +50,9 @@ fn write_all_and_delete() {
"dsc(rank)",
],
"rankingDistinct": "movie_id",
"identifier": "uid",
"identifier": "id",
"searchableAttributes": [
"uid",
"id",
"movie_id",
"title",
"description",

View File

@ -73,7 +73,7 @@ fn write_all_and_delete() {
let mut buf = Vec::new();
block_on(res.into_body().read_to_end(&mut buf)).unwrap();
let res_value: Value = serde_json::from_slice(&buf).unwrap();
println!("1: {:?} vs {:?}", json, res_value);
assert_json_eq!(json, res_value, ordered: false);
// 4 - Delete all settings
@ -102,7 +102,7 @@ fn write_all_and_delete() {
"rankingRules": null,
"rankingDistinct": null,
});
println!("2: {:?} vs {:?}", json, res_value);
assert_json_eq!(json, res_value, ordered: false);
}

View File

@ -24,7 +24,7 @@ impl FieldsMap {
if let Some(id) = self.name_map.get(name) {
return Ok(*id)
}
let id = self.next_id.into();
let id = self.next_id;
self.next_id = self.next_id.next()?;
self.name_map.insert(name.to_string(), id);
self.id_map.insert(id, name.to_string());
@ -39,7 +39,7 @@ impl FieldsMap {
}
pub fn id(&self, name: &str) -> Option<FieldId> {
self.name_map.get(name).map(|s| *s)
self.name_map.get(name).copied()
}
pub fn name<I: Into<FieldId>>(&self, id: I) -> Option<&str> {

View File

@ -21,7 +21,7 @@ pub struct Schema {
impl Schema {
pub fn with_identifier(name: &str) -> Schema {
let mut fields_map = FieldsMap::default();
let field_id = fields_map.insert(name.into()).unwrap();
let field_id = fields_map.insert(name).unwrap();
Schema {
fields_map,
@ -57,7 +57,7 @@ impl Schema {
}
pub fn contains(&self, name: &str) -> bool {
self.fields_map.id(name.into()).is_some()
self.fields_map.id(name).is_some()
}
pub fn get_or_create_empty(&mut self, name: &str) -> SResult<FieldId> {
@ -65,16 +65,16 @@ impl Schema {
}
pub fn get_or_create(&mut self, name: &str) -> SResult<FieldId> {
match self.fields_map.id(name.clone()) {
match self.fields_map.id(name) {
Some(id) => {
Ok(id)
}
None => {
if self.index_new_fields {
self.set_indexed(name.clone())?;
self.set_indexed(name)?;
self.set_displayed(name)
} else {
self.fields_map.insert(name.clone())
self.fields_map.insert(name)
}
}
}
@ -105,19 +105,19 @@ impl Schema {
}
pub fn set_ranked(&mut self, name: &str) -> SResult<FieldId> {
let id = self.fields_map.insert(name.into())?;
let id = self.fields_map.insert(name)?;
self.ranked.insert(id);
Ok(id)
}
pub fn set_displayed(&mut self, name: &str) -> SResult<FieldId> {
let id = self.fields_map.insert(name.into())?;
let id = self.fields_map.insert(name)?;
self.displayed.insert(id);
Ok(id)
}
pub fn set_indexed(&mut self, name: &str) -> SResult<(FieldId, IndexedPos)> {
let id = self.fields_map.insert(name.into())?;
let id = self.fields_map.insert(name)?;
if let Some(indexed_pos) = self.indexed_map.get(&id) {
return Ok((id, *indexed_pos))
};
@ -128,19 +128,19 @@ impl Schema {
}
pub fn remove_ranked(&mut self, name: &str) {
if let Some(id) = self.fields_map.id(name.into()) {
if let Some(id) = self.fields_map.id(name) {
self.ranked.remove(&id);
}
}
pub fn remove_displayed(&mut self, name: &str) {
if let Some(id) = self.fields_map.id(name.into()) {
if let Some(id) = self.fields_map.id(name) {
self.displayed.remove(&id);
}
}
pub fn remove_indexed(&mut self, name: &str) {
if let Some(id) = self.fields_map.id(name.into()) {
if let Some(id) = self.fields_map.id(name) {
self.indexed_map.remove(&id);
self.indexed.retain(|x| *x != id);
}