3577: Avoid fetching an LMDB value with an empty string r=ManyTheFish a=Kerollmops

# Pull Request

## Related issue
Fixes #3574 

## What does this PR do?
This PR fixes a bug where an empty key fetches an entry in the database. LMDB throws an error if an empty or too-long key is used to fetch an entry. This empty string seems to have been generated by the Charabia tokenizer.

Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2023-03-09 14:35:25 +00:00 committed by GitHub
commit 48a51e5cd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 0 deletions

View File

@ -192,6 +192,31 @@ async fn test_kanji_language_detection() {
.await; .await;
} }
#[cfg(feature = "default")]
#[actix_rt::test]
async fn test_thai_language() {
let server = Server::new().await;
let index = server.index("test");
// We don't need documents, the issue is on the query side only.
let documents = json!([
{ "id": 0, "title": "สบู่สมุนไพรดอกดาวเรือง 100 กรัม จำนวน 6 ก้อน" },
{ "id": 1, "title": "สบู่สมุนไพรชาเขียว 100 กรัม จำนวน 6 ก้อน" },
{ "id": 2, "title": "สบู่สมุนไพรฝางแดงผสมว่านหางจรเข้ 100 กรัม จำนวน 6 ก้อน" }
]);
index.add_documents(documents, None).await;
index.wait_task(0).await;
index.update_settings(json!({"rankingRules": ["exactness"]})).await;
index.wait_task(1).await;
index
.search(json!({"q": "สบู"}), |response, code| {
assert_eq!(code, 200, "{}", response);
})
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn search_multiple_params() { async fn search_multiple_params() {
let server = Server::new().await; let server = Server::new().await;

View File

@ -792,6 +792,10 @@ where
let mut peekable = query.peekable(); let mut peekable = query.peekable();
while let Some(token) = peekable.next() { while let Some(token) = peekable.next() {
if token.lemma().is_empty() {
continue;
}
// early return if word limit is exceeded // early return if word limit is exceeded
if primitive_query.len() >= parts_limit { if primitive_query.len() >= parts_limit {
return primitive_query; return primitive_query;