mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-24 19:15:05 +08:00
Compare commits
11 Commits
c3b402a699
...
a2268b05ed
Author | SHA1 | Date | |
---|---|---|---|
|
a2268b05ed | ||
|
9a08757a70 | ||
|
1e694ae432 | ||
|
71807cac6d | ||
|
21a2264782 | ||
|
510ca99996 | ||
|
8924d486db | ||
|
e0c3f3d560 | ||
|
cd796b0f4b | ||
|
72ba353498 | ||
|
0dd321afc7 |
@ -69,9 +69,13 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str(&format!(
|
||||
"### Processing batch {:?}:\n",
|
||||
processing.batch.map(|batch| batch.uid)
|
||||
processing.batch.as_ref().map(|batch| batch.uid)
|
||||
));
|
||||
snap.push_str(&snapshot_bitmap(&processing.processing));
|
||||
if let Some(ref batch) = processing.batch {
|
||||
snap.push('\n');
|
||||
snap.push_str(&snapshot_batch(&batch.to_batch()));
|
||||
}
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Tasks:\n");
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"dumpCreation":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
@ -5,6 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[3,]
|
||||
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
@ -62,10 +62,22 @@ impl ProcessingBatch {
|
||||
/// Update itself with the content of the task and update the batch id in the task.
|
||||
pub fn processing<'a>(&mut self, tasks: impl IntoIterator<Item = &'a mut Task>) {
|
||||
for task in tasks.into_iter() {
|
||||
self.stats.total_nb_tasks += 1;
|
||||
|
||||
task.batch_uid = Some(self.uid);
|
||||
// We don't store the statuses since they're all enqueued.
|
||||
// We don't store the statuses in the map since they're all enqueued but we must
|
||||
// still store them in the stats since that can be displayed.
|
||||
*self.stats.status.entry(task.status).or_default() += 1;
|
||||
|
||||
self.kinds.insert(task.kind.as_kind());
|
||||
*self.stats.types.entry(task.kind.as_kind()).or_default() += 1;
|
||||
self.indexes.extend(task.indexes().iter().map(|s| s.to_string()));
|
||||
if let Some(index_uid) = task.index_uid() {
|
||||
*self.stats.index_uids.entry(index_uid.to_string()).or_default() += 1;
|
||||
}
|
||||
if let Some(ref details) = task.details {
|
||||
self.details.accumulate(&DetailsView::from(details.clone()));
|
||||
}
|
||||
if let Some(canceled_by) = task.canceled_by {
|
||||
self.canceled_by.insert(canceled_by);
|
||||
}
|
||||
@ -82,6 +94,8 @@ impl ProcessingBatch {
|
||||
|
||||
/// Must be called once the batch has finished processing.
|
||||
pub fn finished(&mut self) {
|
||||
self.details = DetailsView::default();
|
||||
self.stats = BatchStats::default();
|
||||
self.finished_at = Some(OffsetDateTime::now_utc());
|
||||
|
||||
// Initially we inserted ourselves as a processing batch, that's not the case anymore.
|
||||
|
@ -46,7 +46,12 @@ async fn list_batches() {
|
||||
.await;
|
||||
let (response, code) = index.list_batches().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 2);
|
||||
assert_eq!(
|
||||
response["results"].as_array().unwrap().len(),
|
||||
2,
|
||||
"{}",
|
||||
serde_json::to_string_pretty(&response).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -4,6 +4,58 @@ use super::*;
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_formatted_from_sdk() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
index
|
||||
.update_settings(
|
||||
json!({ "filterableAttributes": ["genre"], "searchableAttributes": ["title"] }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let documents = json!([
|
||||
{ "id": 123, "title": "Pride and Prejudice", "genre": "romance" },
|
||||
{ "id": 456, "title": "Le Petit Prince", "genre": "adventure" },
|
||||
{ "id": 1, "title": "Alice In Wonderland", "genre": "adventure" },
|
||||
{ "id": 2, "title": "Le Rouge et le Noir", "genre": "romance" },
|
||||
{ "id": 1344, "title": "The Hobbit", "genre": "adventure" },
|
||||
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "genre": "fantasy" },
|
||||
{ "id": 7, "title": "Harry Potter and the Chamber of Secrets", "genre": "fantasy" },
|
||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy" }
|
||||
]);
|
||||
let (response, _) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({ "q":"prince",
|
||||
"attributesToCrop": ["title"],
|
||||
"cropLength": 2,
|
||||
"filter": "genre = adventure",
|
||||
"attributesToHighlight": ["title"],
|
||||
"attributesToRetrieve": ["title"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
{
|
||||
"title": "Le Petit Prince",
|
||||
"_formatted": {
|
||||
"title": "…Petit <em>Prince</em>"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn formatted_contain_wildcard() {
|
||||
let server = Server::new_shared();
|
||||
|
@ -15,6 +15,7 @@ mod pagination;
|
||||
mod restrict_searchable;
|
||||
mod search_queue;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@ -62,6 +63,71 @@ async fn simple_search() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_stop_word() {
|
||||
// related to https://github.com/meilisearch/meilisearch/issues/4984
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_, code) = index
|
||||
.update_settings(json!({"stopWords": ["the", "The", "a", "an", "to", "in", "of"]}))
|
||||
.await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// prefix search
|
||||
index
|
||||
.search(json!({"q": "to the", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
})
|
||||
.await;
|
||||
|
||||
// non-prefix search
|
||||
index
|
||||
.search(json!({"q": "to the ", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"_formatted": {
|
||||
"title": "Shazam!"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"_formatted": {
|
||||
"title": "Captain Marvel"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"_formatted": {
|
||||
"title": "Escape Room"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"_formatted": {
|
||||
"title": "How to Train Your Dragon: The Hidden World"
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"_formatted": {
|
||||
"title": "Gläss"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn phrase_search_with_stop_word() {
|
||||
// related to https://github.com/meilisearch/meilisearch/issues/3521
|
||||
|
@ -367,3 +367,50 @@ async fn search_on_exact_field() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn phrase_search_on_title() {
|
||||
let server = Server::new().await;
|
||||
let documents = json!([
|
||||
{ "id": 8, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 5, "desc": "Document Review", "title": "Document Review Attorney" },
|
||||
{ "id": 4, "desc": "Document Review", "title": "Document Review Manager - Cyber Incident Response (Remote)" },
|
||||
{ "id": 3, "desc": "Document Review", "title": "Document Review Paralegal" },
|
||||
{ "id": 2, "desc": "Document Review", "title": "Document Controller (Saudi National)" },
|
||||
{ "id": 1, "desc": "Document Review", "title": "Document Reviewer" },
|
||||
{ "id": 7, "desc": "Document Review", "title": "Document Review Specialist II" },
|
||||
{ "id": 6, "desc": "Document Review", "title": "Document Review (Entry Level)" }
|
||||
]);
|
||||
let index = index_with_documents(&server, &documents).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "\"Document Review\"", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["title"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Attorney"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Manager - Cyber Incident Response (Remote)"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Paralegal"
|
||||
},
|
||||
{
|
||||
"title": "Document Review Specialist II"
|
||||
},
|
||||
{
|
||||
"title": "Document Review (Entry Level)"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -275,7 +275,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
last_match_last_token_position_plus_one
|
||||
} else {
|
||||
// we have matched the end of possible tokens, there's nothing to advance
|
||||
tokens.len() - 1
|
||||
tokens.len()
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
|
||||
if words.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut candidates = None;
|
||||
for word in words.iter().flatten().copied() {
|
||||
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
|
||||
candidates |= word_docids;
|
||||
if let Some(candidates) = candidates.as_mut() {
|
||||
*candidates &= word_docids;
|
||||
} else {
|
||||
candidates = Some(word_docids);
|
||||
}
|
||||
} else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
}
|
||||
|
||||
let Some(mut candidates) = candidates else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
let winsize = words.len().min(3);
|
||||
|
||||
for win in words.windows(winsize) {
|
||||
|
@ -57,9 +57,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let old_dictionary: Option<Vec<_>> =
|
||||
settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let del_builder =
|
||||
let mut del_builder =
|
||||
tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
|
||||
let del_tokenizer = del_builder.into_tokenizer();
|
||||
let del_tokenizer = del_builder.build();
|
||||
|
||||
let new_stop_words = settings_diff.new.stop_words.as_ref();
|
||||
let new_separators: Option<Vec<_>> = settings_diff
|
||||
@ -69,9 +69,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let new_dictionary: Option<Vec<_>> =
|
||||
settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let add_builder =
|
||||
let mut add_builder =
|
||||
tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
|
||||
let add_tokenizer = add_builder.into_tokenizer();
|
||||
let add_tokenizer = add_builder.build();
|
||||
|
||||
// iterate over documents.
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
Loading…
Reference in New Issue
Block a user