mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 19:45:05 +08:00
Compare commits
12 Commits
7dd55587f5
...
88b3d2547c
Author | SHA1 | Date | |
---|---|---|---|
|
88b3d2547c | ||
|
1fcd5f091e | ||
|
6094bb299a | ||
|
a5d7ae23bd | ||
|
03886d0012 | ||
|
b427b9e88f | ||
|
8b95f5ccc6 | ||
|
da59a043ba | ||
|
da4d47b5d0 | ||
|
d0b1ba20cb | ||
|
c79ca9679b | ||
|
a934b0ac6a |
@ -148,7 +148,6 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
enqueued_at: _,
|
||||
started_at: _,
|
||||
finished_at: _,
|
||||
progress: _,
|
||||
error,
|
||||
canceled_by,
|
||||
details,
|
||||
|
@ -978,12 +978,7 @@ impl IndexScheduler {
|
||||
Ok((
|
||||
ret.map(|task| {
|
||||
if processing.contains(task.uid) {
|
||||
Task {
|
||||
status: Status::Processing,
|
||||
progress: progress.clone(),
|
||||
started_at: Some(started_at),
|
||||
..task
|
||||
}
|
||||
Task { status: Status::Processing, started_at: Some(started_at), ..task }
|
||||
} else {
|
||||
task
|
||||
}
|
||||
@ -1025,7 +1020,6 @@ impl IndexScheduler {
|
||||
enqueued_at: OffsetDateTime::now_utc(),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
progress: None,
|
||||
error: None,
|
||||
canceled_by: None,
|
||||
details: kind.default_details(),
|
||||
@ -1606,8 +1600,6 @@ impl<'a> Dump<'a> {
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
/// FIXME: should we update dump to contain progress information? 🤔
|
||||
progress: None,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
|
@ -345,8 +345,6 @@ impl IndexScheduler {
|
||||
enqueued_at,
|
||||
started_at,
|
||||
finished_at,
|
||||
/// FIXME: assert something here? ask tamo 🤔
|
||||
progress: _,
|
||||
error: _,
|
||||
canceled_by,
|
||||
details,
|
||||
|
@ -4,9 +4,7 @@ use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{
|
||||
serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId, TaskProgress,
|
||||
};
|
||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@ -29,8 +27,6 @@ pub struct TaskView {
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub progress: Option<TaskProgress>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
@ -47,7 +43,6 @@ impl TaskView {
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
progress: task.progress.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,8 +31,6 @@ pub struct Task {
|
||||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
pub progress: Option<TaskProgress>,
|
||||
|
||||
pub error: Option<ResponseError>,
|
||||
pub canceled_by: Option<TaskId>,
|
||||
pub details: Option<Details>,
|
||||
|
@ -49,4 +49,18 @@ lazy_static! {
|
||||
pub static ref MEILISEARCH_IS_INDEXING: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_is_indexing", "Meilisearch Is Indexing"))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_SEARCH_QUEUE_SIZE: IntGauge = register_int_gauge!(opts!(
|
||||
"meilisearch_search_queue_size",
|
||||
"Meilisearch Search Queue Size"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_SEARCHES_RUNNING: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_searches_running", "Meilisearch Searches Running"))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_SEARCHES_WAITING_TO_BE_PROCESSED: IntGauge =
|
||||
register_int_gauge!(opts!(
|
||||
"meilisearch_searches_waiting_to_be_processed",
|
||||
"Meilisearch Searches Being Processed"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ use prometheus::{Encoder, TextEncoder};
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::routes::create_all_stats;
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
pub fn configure(config: &mut web::ServiceConfig) {
|
||||
config.service(web::resource("").route(web::get().to(get_metrics)));
|
||||
@ -18,6 +19,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
||||
pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: Data<AuthController>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_metrics()?;
|
||||
let auth_filters = index_scheduler.filters();
|
||||
@ -35,6 +37,11 @@ pub async fn get_metrics(
|
||||
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
|
||||
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
||||
|
||||
crate::metrics::MEILISEARCH_SEARCH_QUEUE_SIZE.set(search_queue.capacity() as i64);
|
||||
crate::metrics::MEILISEARCH_SEARCHES_RUNNING.set(search_queue.searches_running() as i64);
|
||||
crate::metrics::MEILISEARCH_SEARCHES_WAITING_TO_BE_PROCESSED
|
||||
.set(search_queue.searches_waiting() as i64);
|
||||
|
||||
for (index, value) in response.indexes.iter() {
|
||||
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
|
||||
.with_label_values(&[index])
|
||||
|
@ -18,6 +18,8 @@
|
||||
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
@ -33,6 +35,8 @@ pub struct SearchQueue {
|
||||
/// If we have waited longer than this to get a permit, we should abort the search request entirely.
|
||||
/// The client probably already closed the connection, but we have no way to find out.
|
||||
time_to_abort: Duration,
|
||||
searches_running: Arc<AtomicUsize>,
|
||||
searches_waiting_to_be_processed: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
/// You should only run search requests while holding this permit.
|
||||
@ -68,14 +72,41 @@ impl SearchQueue {
|
||||
// so let's not allocate any RAM and keep a capacity of 1.
|
||||
let (sender, receiver) = mpsc::channel(1);
|
||||
|
||||
tokio::task::spawn(Self::run(capacity, paralellism, receiver));
|
||||
Self { sender, capacity, time_to_abort: Duration::from_secs(60) }
|
||||
let instance = Self {
|
||||
sender,
|
||||
capacity,
|
||||
time_to_abort: Duration::from_secs(60),
|
||||
searches_running: Default::default(),
|
||||
searches_waiting_to_be_processed: Default::default(),
|
||||
};
|
||||
|
||||
tokio::task::spawn(Self::run(
|
||||
capacity,
|
||||
paralellism,
|
||||
receiver,
|
||||
Arc::clone(&instance.searches_running),
|
||||
Arc::clone(&instance.searches_waiting_to_be_processed),
|
||||
));
|
||||
|
||||
instance
|
||||
}
|
||||
|
||||
pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
|
||||
Self { time_to_abort, ..self }
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.capacity
|
||||
}
|
||||
|
||||
pub fn searches_running(&self) -> usize {
|
||||
self.searches_running.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn searches_waiting(&self) -> usize {
|
||||
self.searches_waiting_to_be_processed.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
|
||||
/// how many should executes at the same time.
|
||||
///
|
||||
@ -84,6 +115,8 @@ impl SearchQueue {
|
||||
capacity: usize,
|
||||
parallelism: NonZeroUsize,
|
||||
mut receive_new_searches: mpsc::Receiver<oneshot::Sender<Permit>>,
|
||||
metric_searches_running: Arc<AtomicUsize>,
|
||||
metric_searches_waiting: Arc<AtomicUsize>,
|
||||
) {
|
||||
let mut queue: Vec<oneshot::Sender<Permit>> = Default::default();
|
||||
let mut rng: StdRng = StdRng::from_entropy();
|
||||
@ -133,6 +166,9 @@ impl SearchQueue {
|
||||
queue.push(search_request);
|
||||
},
|
||||
}
|
||||
|
||||
metric_searches_running.store(searches_running, Ordering::Relaxed);
|
||||
metric_searches_waiting.store(queue.len(), Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -389,3 +389,25 @@ pub static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
},
|
||||
])
|
||||
});
|
||||
|
||||
pub async fn shared_index_with_test_set() -> &'static Index<'static, Shared> {
|
||||
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
|
||||
INDEX
|
||||
.get_or_init(|| async {
|
||||
let server = Server::new_shared();
|
||||
let index = server._index("SHARED_TEST_SET").to_shared();
|
||||
let url = format!("/indexes/{}/documents", urlencoding::encode(index.uid.as_ref()));
|
||||
let (response, code) = index
|
||||
.service
|
||||
.post_str(
|
||||
url,
|
||||
include_str!("../assets/test_set.json"),
|
||||
vec![("content-type", "application/json")],
|
||||
)
|
||||
.await;
|
||||
assert_eq!(code, 202);
|
||||
index.wait_task(response.uid()).await;
|
||||
index
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
@ -4,24 +4,27 @@ use meili_snap::*;
|
||||
use urlencoding::encode as urlencode;
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::{GetAllDocumentsOptions, Server, Value};
|
||||
use crate::common::{
|
||||
shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set,
|
||||
GetAllDocumentsOptions, Server, Value,
|
||||
};
|
||||
use crate::json;
|
||||
|
||||
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
|
||||
// transplant
|
||||
#[actix_rt::test]
|
||||
async fn get_unexisting_index_single_document() {
|
||||
let server = Server::new().await;
|
||||
let (_response, code) = server.index("test").get_document(1, None).await;
|
||||
let (_response, code) = shared_does_not_exists_index().await.get_document(1, None).await;
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_get_unexisting_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index.wait_task(0).await;
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (task, _code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.get_document(1, None).await;
|
||||
|
||||
let expected_response = json!({
|
||||
@ -37,18 +40,19 @@ async fn error_get_unexisting_document() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (task, _code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 0,
|
||||
"nested": { "content": "foobar" },
|
||||
}
|
||||
]);
|
||||
let (_, code) = index.add_documents(documents, None).await;
|
||||
let (task, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
index.wait_task(1).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
let (response, code) = index.get_document(0, None).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
@ -81,12 +85,11 @@ async fn get_document() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_get_unexisting_index_all_documents() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) =
|
||||
server.index("test").get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
let index = shared_does_not_exists_index().await;
|
||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"message": "Index `DOES_NOT_EXISTS` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
@ -98,12 +101,7 @@ async fn error_get_unexisting_index_all_documents() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_no_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.create(None).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(0).await;
|
||||
let index = shared_empty_index().await;
|
||||
|
||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
assert_eq!(code, 200);
|
||||
@ -112,14 +110,12 @@ async fn get_no_document() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_all_documents_no_options() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.load_test_set().await;
|
||||
let index = shared_index_with_test_set().await;
|
||||
|
||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
assert_eq!(code, 200);
|
||||
let arr = response["results"].as_array().unwrap();
|
||||
assert_eq!(arr.len(), 20);
|
||||
let results = response["results"].as_array().unwrap();
|
||||
assert_eq!(results.len(), 20);
|
||||
let first = json!({
|
||||
"id":0,
|
||||
"isActive":false,
|
||||
@ -138,19 +134,16 @@ async fn get_all_documents_no_options() {
|
||||
"longitude":-145.725388,
|
||||
"tags":["bug"
|
||||
,"bug"]});
|
||||
assert_eq!(first, arr[0]);
|
||||
assert_eq!(first, results[0]);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_all_documents_no_options_with_response_compression() {
|
||||
let server = Server::new().await;
|
||||
let index_uid = "test";
|
||||
let index = server.index(index_uid);
|
||||
index.load_test_set().await;
|
||||
let index = shared_index_with_test_set().await;
|
||||
|
||||
let app = server.init_web_app().await;
|
||||
let app = Server::new_shared().init_web_app().await;
|
||||
let req = test::TestRequest::get()
|
||||
.uri(&format!("/indexes/{}/documents?", urlencode(index_uid)))
|
||||
.uri(&format!("/indexes/{}/documents?", urlencode(&index.uid)))
|
||||
.insert_header((ACCEPT_ENCODING, "gzip"))
|
||||
.to_request();
|
||||
|
||||
@ -169,9 +162,7 @@ async fn get_all_documents_no_options_with_response_compression() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_get_all_documents_limit() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.load_test_set().await;
|
||||
let index = shared_index_with_test_set().await;
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { limit: Some(5), ..Default::default() })
|
||||
@ -186,9 +177,7 @@ async fn test_get_all_documents_limit() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_get_all_documents_offset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.load_test_set().await;
|
||||
let index = shared_index_with_test_set().await;
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { offset: Some(5), ..Default::default() })
|
||||
@ -203,9 +192,7 @@ async fn test_get_all_documents_offset() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_get_all_documents_attributes_to_retrieve() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.load_test_set().await;
|
||||
let index = shared_index_with_test_set().await;
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
@ -286,9 +273,11 @@ async fn test_get_all_documents_attributes_to_retrieve() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_s_nested_attributes_to_retrieve() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (task, _code) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 0,
|
||||
@ -302,9 +291,9 @@ async fn get_document_s_nested_attributes_to_retrieve() {
|
||||
},
|
||||
},
|
||||
]);
|
||||
let (_, code) = index.add_documents(documents, None).await;
|
||||
let (task, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
index.wait_task(1).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await;
|
||||
assert_eq!(code, 200);
|
||||
@ -343,10 +332,10 @@ async fn get_document_s_nested_attributes_to_retrieve() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_documents_displayed_attributes_is_ignored() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
index.load_test_set().await;
|
||||
index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
|
||||
|
||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
assert_eq!(code, 200);
|
||||
@ -366,10 +355,10 @@ async fn get_documents_displayed_attributes_is_ignored() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_by_filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
index.update_settings_filterable_attributes(json!(["color"])).await;
|
||||
index
|
||||
let (task, _code) = index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
@ -380,7 +369,7 @@ async fn get_document_by_filter() {
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({})).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("").await;
|
||||
@ -552,7 +541,7 @@ async fn get_document_with_vectors() {
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response.uid()).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||
@ -560,7 +549,7 @@ async fn get_document_with_vectors() {
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await;
|
||||
index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
// by default you shouldn't see the `_vectors` object
|
||||
let (documents, _code) = index.get_all_documents(Default::default()).await;
|
||||
|
@ -6,14 +6,14 @@ use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn formatted_contain_wildcard() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
index.update_settings(json!({ "displayedAttributes": ["id", "cattos"] })).await;
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, _) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }),
|
||||
|response, code|
|
||||
@ -135,12 +135,7 @@ async fn formatted_contain_wildcard() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn format_nested() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
let index = shared_index_with_nested_documents().await;
|
||||
|
||||
index
|
||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
||||
@ -340,15 +335,15 @@ async fn format_nested() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn displayedattr_2_smol() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
// not enough displayed for the other settings
|
||||
index.update_settings(json!({ "displayedAttributes": ["id"] })).await;
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, _) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
index
|
||||
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|
||||
@ -538,15 +533,15 @@ async fn displayedattr_2_smol() {
|
||||
#[cfg(feature = "default")]
|
||||
#[actix_rt::test]
|
||||
async fn test_cjk_highlight() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = json!([
|
||||
{ "id": 0, "title": "この度、クーポンで無料で頂きました。" },
|
||||
{ "id": 1, "title": "大卫到了扫罗那里" },
|
||||
]);
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, _) = index.add_documents(documents, None).await;
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| {
|
||||
|
@ -2,12 +2,11 @@ use std::marker::PhantomData;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
|
||||
use hashbrown::HashMap;
|
||||
use heed::types::Bytes;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::extract::FacetKind;
|
||||
use super::StdResult;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::update::new::KvReaderFieldId;
|
||||
use crate::vector::Embedding;
|
||||
use crate::{DocumentId, Index};
|
||||
@ -87,7 +86,7 @@ pub enum ArroyOperation {
|
||||
embedding: Embedding,
|
||||
},
|
||||
Finish {
|
||||
user_provided: HashMap<String, RoaringBitmap>,
|
||||
configs: Vec<IndexEmbeddingConfig>,
|
||||
},
|
||||
}
|
||||
|
||||
@ -418,12 +417,9 @@ impl EmbeddingSender<'_> {
|
||||
}
|
||||
|
||||
/// Marks all embedders as "to be built"
|
||||
pub fn finish(
|
||||
self,
|
||||
user_provided: HashMap<String, RoaringBitmap>,
|
||||
) -> StdResult<(), SendError<()>> {
|
||||
pub fn finish(self, configs: Vec<IndexEmbeddingConfig>) -> StdResult<(), SendError<()>> {
|
||||
self.0
|
||||
.send(WriterOperation::ArroyOperation(ArroyOperation::Finish { user_provided }))
|
||||
.send(WriterOperation::ArroyOperation(ArroyOperation::Finish { configs }))
|
||||
.map_err(|_| SendError(()))
|
||||
}
|
||||
}
|
||||
|
@ -85,8 +85,13 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
for change in changes {
|
||||
let change = change?;
|
||||
match change {
|
||||
DocumentChange::Deletion(_deletion) => {
|
||||
// handled by document sender
|
||||
DocumentChange::Deletion(deletion) => {
|
||||
// vector deletion is handled by document sender,
|
||||
// we still need to accomodate deletion from user_provided
|
||||
for chunks in &mut all_chunks {
|
||||
// regenerate: true means we delete from user_provided
|
||||
chunks.set_regenerate(deletion.docid(), true);
|
||||
}
|
||||
}
|
||||
DocumentChange::Update(update) => {
|
||||
let old_vectors = update.current_vectors(
|
||||
@ -423,9 +428,9 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
let user_provided = user_provided.0.entry_ref(self.embedder_name).or_default();
|
||||
if regenerate {
|
||||
// regenerate == !user_provided
|
||||
user_provided.del.get_or_insert(Default::default()).insert(docid);
|
||||
user_provided.insert_del_u32(docid);
|
||||
} else {
|
||||
user_provided.add.get_or_insert(Default::default()).insert(docid);
|
||||
user_provided.insert_add_u32(docid);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -342,35 +342,28 @@ where
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
let index_embeddings = index.embedding_configs(&rtxn)?;
|
||||
let mut index_embeddings = index.embedding_configs(&rtxn)?;
|
||||
if index_embeddings.is_empty() {
|
||||
break 'vectors;
|
||||
}
|
||||
|
||||
let embedding_sender = extractor_sender.embeddings();
|
||||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||
let datastore = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||
let (finished_steps, step_name) = steps::extract_embeddings();
|
||||
|
||||
|
||||
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, finished_steps, total_steps, step_name)?;
|
||||
|
||||
|
||||
let mut user_provided = HashMap::new();
|
||||
for data in datastore {
|
||||
let data = data.into_inner().0;
|
||||
for (embedder, deladd) in data.into_iter() {
|
||||
let user_provided = user_provided.entry(embedder).or_insert(Default::default());
|
||||
if let Some(del) = deladd.del {
|
||||
*user_provided -= del;
|
||||
}
|
||||
if let Some(add) = deladd.add {
|
||||
*user_provided |= add;
|
||||
}
|
||||
for config in &mut index_embeddings {
|
||||
'data: for data in datastore.iter_mut() {
|
||||
let data = &mut data.get_mut().0;
|
||||
let Some(deladd) = data.remove(&config.name) else { continue 'data; };
|
||||
deladd.apply_to(&mut config.user_provided);
|
||||
}
|
||||
}
|
||||
|
||||
embedding_sender.finish(user_provided).unwrap();
|
||||
embedding_sender.finish(index_embeddings).unwrap();
|
||||
}
|
||||
|
||||
// TODO THIS IS TOO MUCH
|
||||
@ -472,7 +465,7 @@ where
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_item(wtxn, docid, &embedding)?;
|
||||
}
|
||||
ArroyOperation::Finish { mut user_provided } => {
|
||||
ArroyOperation::Finish { configs } => {
|
||||
let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
|
||||
let _entered = span.enter();
|
||||
|
||||
@ -497,14 +490,6 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
let mut configs = index.embedding_configs(wtxn)?;
|
||||
|
||||
for config in &mut configs {
|
||||
if let Some(user_provided) = user_provided.remove(&config.name) {
|
||||
config.user_provided = user_provided;
|
||||
}
|
||||
}
|
||||
|
||||
index.put_embedding_configs(wtxn, configs)?;
|
||||
}
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user