mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 03:55:07 +08:00
Compare commits
12 Commits
7dd55587f5
...
88b3d2547c
Author | SHA1 | Date | |
---|---|---|---|
|
88b3d2547c | ||
|
1fcd5f091e | ||
|
6094bb299a | ||
|
a5d7ae23bd | ||
|
03886d0012 | ||
|
b427b9e88f | ||
|
8b95f5ccc6 | ||
|
da59a043ba | ||
|
da4d47b5d0 | ||
|
d0b1ba20cb | ||
|
c79ca9679b | ||
|
a934b0ac6a |
@ -148,7 +148,6 @@ pub fn snapshot_task(task: &Task) -> String {
|
|||||||
enqueued_at: _,
|
enqueued_at: _,
|
||||||
started_at: _,
|
started_at: _,
|
||||||
finished_at: _,
|
finished_at: _,
|
||||||
progress: _,
|
|
||||||
error,
|
error,
|
||||||
canceled_by,
|
canceled_by,
|
||||||
details,
|
details,
|
||||||
|
@ -978,12 +978,7 @@ impl IndexScheduler {
|
|||||||
Ok((
|
Ok((
|
||||||
ret.map(|task| {
|
ret.map(|task| {
|
||||||
if processing.contains(task.uid) {
|
if processing.contains(task.uid) {
|
||||||
Task {
|
Task { status: Status::Processing, started_at: Some(started_at), ..task }
|
||||||
status: Status::Processing,
|
|
||||||
progress: progress.clone(),
|
|
||||||
started_at: Some(started_at),
|
|
||||||
..task
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
task
|
task
|
||||||
}
|
}
|
||||||
@ -1025,7 +1020,6 @@ impl IndexScheduler {
|
|||||||
enqueued_at: OffsetDateTime::now_utc(),
|
enqueued_at: OffsetDateTime::now_utc(),
|
||||||
started_at: None,
|
started_at: None,
|
||||||
finished_at: None,
|
finished_at: None,
|
||||||
progress: None,
|
|
||||||
error: None,
|
error: None,
|
||||||
canceled_by: None,
|
canceled_by: None,
|
||||||
details: kind.default_details(),
|
details: kind.default_details(),
|
||||||
@ -1606,8 +1600,6 @@ impl<'a> Dump<'a> {
|
|||||||
enqueued_at: task.enqueued_at,
|
enqueued_at: task.enqueued_at,
|
||||||
started_at: task.started_at,
|
started_at: task.started_at,
|
||||||
finished_at: task.finished_at,
|
finished_at: task.finished_at,
|
||||||
/// FIXME: should we update dump to contain progress information? 🤔
|
|
||||||
progress: None,
|
|
||||||
error: task.error,
|
error: task.error,
|
||||||
canceled_by: task.canceled_by,
|
canceled_by: task.canceled_by,
|
||||||
details: task.details,
|
details: task.details,
|
||||||
|
@ -345,8 +345,6 @@ impl IndexScheduler {
|
|||||||
enqueued_at,
|
enqueued_at,
|
||||||
started_at,
|
started_at,
|
||||||
finished_at,
|
finished_at,
|
||||||
/// FIXME: assert something here? ask tamo 🤔
|
|
||||||
progress: _,
|
|
||||||
error: _,
|
error: _,
|
||||||
canceled_by,
|
canceled_by,
|
||||||
details,
|
details,
|
||||||
|
@ -4,9 +4,7 @@ use time::{Duration, OffsetDateTime};
|
|||||||
|
|
||||||
use crate::error::ResponseError;
|
use crate::error::ResponseError;
|
||||||
use crate::settings::{Settings, Unchecked};
|
use crate::settings::{Settings, Unchecked};
|
||||||
use crate::tasks::{
|
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||||
serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId, TaskProgress,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
@ -29,8 +27,6 @@ pub struct TaskView {
|
|||||||
pub started_at: Option<OffsetDateTime>,
|
pub started_at: Option<OffsetDateTime>,
|
||||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||||
pub finished_at: Option<OffsetDateTime>,
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub progress: Option<TaskProgress>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TaskView {
|
impl TaskView {
|
||||||
@ -47,7 +43,6 @@ impl TaskView {
|
|||||||
enqueued_at: task.enqueued_at,
|
enqueued_at: task.enqueued_at,
|
||||||
started_at: task.started_at,
|
started_at: task.started_at,
|
||||||
finished_at: task.finished_at,
|
finished_at: task.finished_at,
|
||||||
progress: task.progress.clone(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -31,8 +31,6 @@ pub struct Task {
|
|||||||
#[serde(with = "time::serde::rfc3339::option")]
|
#[serde(with = "time::serde::rfc3339::option")]
|
||||||
pub finished_at: Option<OffsetDateTime>,
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
|
|
||||||
pub progress: Option<TaskProgress>,
|
|
||||||
|
|
||||||
pub error: Option<ResponseError>,
|
pub error: Option<ResponseError>,
|
||||||
pub canceled_by: Option<TaskId>,
|
pub canceled_by: Option<TaskId>,
|
||||||
pub details: Option<Details>,
|
pub details: Option<Details>,
|
||||||
|
@ -49,4 +49,18 @@ lazy_static! {
|
|||||||
pub static ref MEILISEARCH_IS_INDEXING: IntGauge =
|
pub static ref MEILISEARCH_IS_INDEXING: IntGauge =
|
||||||
register_int_gauge!(opts!("meilisearch_is_indexing", "Meilisearch Is Indexing"))
|
register_int_gauge!(opts!("meilisearch_is_indexing", "Meilisearch Is Indexing"))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_SEARCH_QUEUE_SIZE: IntGauge = register_int_gauge!(opts!(
|
||||||
|
"meilisearch_search_queue_size",
|
||||||
|
"Meilisearch Search Queue Size"
|
||||||
|
))
|
||||||
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_SEARCHES_RUNNING: IntGauge =
|
||||||
|
register_int_gauge!(opts!("meilisearch_searches_running", "Meilisearch Searches Running"))
|
||||||
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_SEARCHES_WAITING_TO_BE_PROCESSED: IntGauge =
|
||||||
|
register_int_gauge!(opts!(
|
||||||
|
"meilisearch_searches_waiting_to_be_processed",
|
||||||
|
"Meilisearch Searches Being Processed"
|
||||||
|
))
|
||||||
|
.expect("Can't create a metric");
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,7 @@ use prometheus::{Encoder, TextEncoder};
|
|||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
use crate::routes::create_all_stats;
|
use crate::routes::create_all_stats;
|
||||||
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
pub fn configure(config: &mut web::ServiceConfig) {
|
pub fn configure(config: &mut web::ServiceConfig) {
|
||||||
config.service(web::resource("").route(web::get().to(get_metrics)));
|
config.service(web::resource("").route(web::get().to(get_metrics)));
|
||||||
@ -18,6 +19,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
|||||||
pub async fn get_metrics(
|
pub async fn get_metrics(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||||
auth_controller: Data<AuthController>,
|
auth_controller: Data<AuthController>,
|
||||||
|
search_queue: web::Data<SearchQueue>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
index_scheduler.features().check_metrics()?;
|
index_scheduler.features().check_metrics()?;
|
||||||
let auth_filters = index_scheduler.filters();
|
let auth_filters = index_scheduler.filters();
|
||||||
@ -35,6 +37,11 @@ pub async fn get_metrics(
|
|||||||
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
|
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
|
||||||
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
||||||
|
|
||||||
|
crate::metrics::MEILISEARCH_SEARCH_QUEUE_SIZE.set(search_queue.capacity() as i64);
|
||||||
|
crate::metrics::MEILISEARCH_SEARCHES_RUNNING.set(search_queue.searches_running() as i64);
|
||||||
|
crate::metrics::MEILISEARCH_SEARCHES_WAITING_TO_BE_PROCESSED
|
||||||
|
.set(search_queue.searches_waiting() as i64);
|
||||||
|
|
||||||
for (index, value) in response.indexes.iter() {
|
for (index, value) in response.indexes.iter() {
|
||||||
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
|
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
|
||||||
.with_label_values(&[index])
|
.with_label_values(&[index])
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
||||||
|
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use rand::rngs::StdRng;
|
use rand::rngs::StdRng;
|
||||||
@ -33,6 +35,8 @@ pub struct SearchQueue {
|
|||||||
/// If we have waited longer than this to get a permit, we should abort the search request entirely.
|
/// If we have waited longer than this to get a permit, we should abort the search request entirely.
|
||||||
/// The client probably already closed the connection, but we have no way to find out.
|
/// The client probably already closed the connection, but we have no way to find out.
|
||||||
time_to_abort: Duration,
|
time_to_abort: Duration,
|
||||||
|
searches_running: Arc<AtomicUsize>,
|
||||||
|
searches_waiting_to_be_processed: Arc<AtomicUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// You should only run search requests while holding this permit.
|
/// You should only run search requests while holding this permit.
|
||||||
@ -68,14 +72,41 @@ impl SearchQueue {
|
|||||||
// so let's not allocate any RAM and keep a capacity of 1.
|
// so let's not allocate any RAM and keep a capacity of 1.
|
||||||
let (sender, receiver) = mpsc::channel(1);
|
let (sender, receiver) = mpsc::channel(1);
|
||||||
|
|
||||||
tokio::task::spawn(Self::run(capacity, paralellism, receiver));
|
let instance = Self {
|
||||||
Self { sender, capacity, time_to_abort: Duration::from_secs(60) }
|
sender,
|
||||||
|
capacity,
|
||||||
|
time_to_abort: Duration::from_secs(60),
|
||||||
|
searches_running: Default::default(),
|
||||||
|
searches_waiting_to_be_processed: Default::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
tokio::task::spawn(Self::run(
|
||||||
|
capacity,
|
||||||
|
paralellism,
|
||||||
|
receiver,
|
||||||
|
Arc::clone(&instance.searches_running),
|
||||||
|
Arc::clone(&instance.searches_waiting_to_be_processed),
|
||||||
|
));
|
||||||
|
|
||||||
|
instance
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
|
pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
|
||||||
Self { time_to_abort, ..self }
|
Self { time_to_abort, ..self }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn capacity(&self) -> usize {
|
||||||
|
self.capacity
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn searches_running(&self) -> usize {
|
||||||
|
self.searches_running.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn searches_waiting(&self) -> usize {
|
||||||
|
self.searches_waiting_to_be_processed.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
|
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
|
||||||
/// how many should executes at the same time.
|
/// how many should executes at the same time.
|
||||||
///
|
///
|
||||||
@ -84,6 +115,8 @@ impl SearchQueue {
|
|||||||
capacity: usize,
|
capacity: usize,
|
||||||
parallelism: NonZeroUsize,
|
parallelism: NonZeroUsize,
|
||||||
mut receive_new_searches: mpsc::Receiver<oneshot::Sender<Permit>>,
|
mut receive_new_searches: mpsc::Receiver<oneshot::Sender<Permit>>,
|
||||||
|
metric_searches_running: Arc<AtomicUsize>,
|
||||||
|
metric_searches_waiting: Arc<AtomicUsize>,
|
||||||
) {
|
) {
|
||||||
let mut queue: Vec<oneshot::Sender<Permit>> = Default::default();
|
let mut queue: Vec<oneshot::Sender<Permit>> = Default::default();
|
||||||
let mut rng: StdRng = StdRng::from_entropy();
|
let mut rng: StdRng = StdRng::from_entropy();
|
||||||
@ -133,6 +166,9 @@ impl SearchQueue {
|
|||||||
queue.push(search_request);
|
queue.push(search_request);
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metric_searches_running.store(searches_running, Ordering::Relaxed);
|
||||||
|
metric_searches_waiting.store(queue.len(), Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -389,3 +389,25 @@ pub static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
},
|
},
|
||||||
])
|
])
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pub async fn shared_index_with_test_set() -> &'static Index<'static, Shared> {
|
||||||
|
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
|
||||||
|
INDEX
|
||||||
|
.get_or_init(|| async {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server._index("SHARED_TEST_SET").to_shared();
|
||||||
|
let url = format!("/indexes/{}/documents", urlencoding::encode(index.uid.as_ref()));
|
||||||
|
let (response, code) = index
|
||||||
|
.service
|
||||||
|
.post_str(
|
||||||
|
url,
|
||||||
|
include_str!("../assets/test_set.json"),
|
||||||
|
vec![("content-type", "application/json")],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(code, 202);
|
||||||
|
index.wait_task(response.uid()).await;
|
||||||
|
index
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
@ -4,24 +4,27 @@ use meili_snap::*;
|
|||||||
use urlencoding::encode as urlencode;
|
use urlencoding::encode as urlencode;
|
||||||
|
|
||||||
use crate::common::encoder::Encoder;
|
use crate::common::encoder::Encoder;
|
||||||
use crate::common::{GetAllDocumentsOptions, Server, Value};
|
use crate::common::{
|
||||||
|
shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set,
|
||||||
|
GetAllDocumentsOptions, Server, Value,
|
||||||
|
};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
|
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
|
||||||
// transplant
|
// transplant
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_unexisting_index_single_document() {
|
async fn get_unexisting_index_single_document() {
|
||||||
let server = Server::new().await;
|
let (_response, code) = shared_does_not_exists_index().await.get_document(1, None).await;
|
||||||
let (_response, code) = server.index("test").get_document(1, None).await;
|
|
||||||
assert_eq!(code, 404);
|
assert_eq!(code, 404);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn error_get_unexisting_document() {
|
async fn error_get_unexisting_document() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("test");
|
let index = server.unique_index();
|
||||||
index.create(None).await;
|
let (task, _code) = index.create(None).await;
|
||||||
index.wait_task(0).await;
|
index.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
let (response, code) = index.get_document(1, None).await;
|
let (response, code) = index.get_document(1, None).await;
|
||||||
|
|
||||||
let expected_response = json!({
|
let expected_response = json!({
|
||||||
@ -37,18 +40,19 @@ async fn error_get_unexisting_document() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_document() {
|
async fn get_document() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("test");
|
let index = server.unique_index();
|
||||||
index.create(None).await;
|
let (task, _code) = index.create(None).await;
|
||||||
|
index.wait_task(task.uid()).await.succeeded();
|
||||||
let documents = json!([
|
let documents = json!([
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 0,
|
||||||
"nested": { "content": "foobar" },
|
"nested": { "content": "foobar" },
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
let (_, code) = index.add_documents(documents, None).await;
|
let (task, code) = index.add_documents(documents, None).await;
|
||||||
assert_eq!(code, 202);
|
assert_eq!(code, 202);
|
||||||
index.wait_task(1).await;
|
index.wait_task(task.uid()).await.succeeded();
|
||||||
let (response, code) = index.get_document(0, None).await;
|
let (response, code) = index.get_document(0, None).await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -81,12 +85,11 @@ async fn get_document() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn error_get_unexisting_index_all_documents() {
|
async fn error_get_unexisting_index_all_documents() {
|
||||||
let server = Server::new().await;
|
let index = shared_does_not_exists_index().await;
|
||||||
let (response, code) =
|
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||||
server.index("test").get_all_documents(GetAllDocumentsOptions::default()).await;
|
|
||||||
|
|
||||||
let expected_response = json!({
|
let expected_response = json!({
|
||||||
"message": "Index `test` not found.",
|
"message": "Index `DOES_NOT_EXISTS` not found.",
|
||||||
"code": "index_not_found",
|
"code": "index_not_found",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||||
@ -98,12 +101,7 @@ async fn error_get_unexisting_index_all_documents() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_no_document() {
|
async fn get_no_document() {
|
||||||
let server = Server::new().await;
|
let index = shared_empty_index().await;
|
||||||
let index = server.index("test");
|
|
||||||
let (_, code) = index.create(None).await;
|
|
||||||
assert_eq!(code, 202);
|
|
||||||
|
|
||||||
index.wait_task(0).await;
|
|
||||||
|
|
||||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
@ -112,14 +110,12 @@ async fn get_no_document() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_all_documents_no_options() {
|
async fn get_all_documents_no_options() {
|
||||||
let server = Server::new().await;
|
let index = shared_index_with_test_set().await;
|
||||||
let index = server.index("test");
|
|
||||||
index.load_test_set().await;
|
|
||||||
|
|
||||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
let arr = response["results"].as_array().unwrap();
|
let results = response["results"].as_array().unwrap();
|
||||||
assert_eq!(arr.len(), 20);
|
assert_eq!(results.len(), 20);
|
||||||
let first = json!({
|
let first = json!({
|
||||||
"id":0,
|
"id":0,
|
||||||
"isActive":false,
|
"isActive":false,
|
||||||
@ -138,19 +134,16 @@ async fn get_all_documents_no_options() {
|
|||||||
"longitude":-145.725388,
|
"longitude":-145.725388,
|
||||||
"tags":["bug"
|
"tags":["bug"
|
||||||
,"bug"]});
|
,"bug"]});
|
||||||
assert_eq!(first, arr[0]);
|
assert_eq!(first, results[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_all_documents_no_options_with_response_compression() {
|
async fn get_all_documents_no_options_with_response_compression() {
|
||||||
let server = Server::new().await;
|
let index = shared_index_with_test_set().await;
|
||||||
let index_uid = "test";
|
|
||||||
let index = server.index(index_uid);
|
|
||||||
index.load_test_set().await;
|
|
||||||
|
|
||||||
let app = server.init_web_app().await;
|
let app = Server::new_shared().init_web_app().await;
|
||||||
let req = test::TestRequest::get()
|
let req = test::TestRequest::get()
|
||||||
.uri(&format!("/indexes/{}/documents?", urlencode(index_uid)))
|
.uri(&format!("/indexes/{}/documents?", urlencode(&index.uid)))
|
||||||
.insert_header((ACCEPT_ENCODING, "gzip"))
|
.insert_header((ACCEPT_ENCODING, "gzip"))
|
||||||
.to_request();
|
.to_request();
|
||||||
|
|
||||||
@ -169,9 +162,7 @@ async fn get_all_documents_no_options_with_response_compression() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn test_get_all_documents_limit() {
|
async fn test_get_all_documents_limit() {
|
||||||
let server = Server::new().await;
|
let index = shared_index_with_test_set().await;
|
||||||
let index = server.index("test");
|
|
||||||
index.load_test_set().await;
|
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.get_all_documents(GetAllDocumentsOptions { limit: Some(5), ..Default::default() })
|
.get_all_documents(GetAllDocumentsOptions { limit: Some(5), ..Default::default() })
|
||||||
@ -186,9 +177,7 @@ async fn test_get_all_documents_limit() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn test_get_all_documents_offset() {
|
async fn test_get_all_documents_offset() {
|
||||||
let server = Server::new().await;
|
let index = shared_index_with_test_set().await;
|
||||||
let index = server.index("test");
|
|
||||||
index.load_test_set().await;
|
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.get_all_documents(GetAllDocumentsOptions { offset: Some(5), ..Default::default() })
|
.get_all_documents(GetAllDocumentsOptions { offset: Some(5), ..Default::default() })
|
||||||
@ -203,9 +192,7 @@ async fn test_get_all_documents_offset() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn test_get_all_documents_attributes_to_retrieve() {
|
async fn test_get_all_documents_attributes_to_retrieve() {
|
||||||
let server = Server::new().await;
|
let index = shared_index_with_test_set().await;
|
||||||
let index = server.index("test");
|
|
||||||
index.load_test_set().await;
|
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.get_all_documents(GetAllDocumentsOptions {
|
.get_all_documents(GetAllDocumentsOptions {
|
||||||
@ -286,9 +273,11 @@ async fn test_get_all_documents_attributes_to_retrieve() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_document_s_nested_attributes_to_retrieve() {
|
async fn get_document_s_nested_attributes_to_retrieve() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("test");
|
let index = server.unique_index();
|
||||||
index.create(None).await;
|
let (task, _code) = index.create(None).await;
|
||||||
|
index.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
let documents = json!([
|
let documents = json!([
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 0,
|
||||||
@ -302,9 +291,9 @@ async fn get_document_s_nested_attributes_to_retrieve() {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
let (_, code) = index.add_documents(documents, None).await;
|
let (task, code) = index.add_documents(documents, None).await;
|
||||||
assert_eq!(code, 202);
|
assert_eq!(code, 202);
|
||||||
index.wait_task(1).await;
|
index.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await;
|
let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
@ -343,10 +332,10 @@ async fn get_document_s_nested_attributes_to_retrieve() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_documents_displayed_attributes_is_ignored() {
|
async fn get_documents_displayed_attributes_is_ignored() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("test");
|
let index = server.unique_index();
|
||||||
index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
|
|
||||||
index.load_test_set().await;
|
index.load_test_set().await;
|
||||||
|
index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
|
||||||
|
|
||||||
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
@ -366,10 +355,10 @@ async fn get_documents_displayed_attributes_is_ignored() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn get_document_by_filter() {
|
async fn get_document_by_filter() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("doggo");
|
let index = server.unique_index();
|
||||||
index.update_settings_filterable_attributes(json!(["color"])).await;
|
index.update_settings_filterable_attributes(json!(["color"])).await;
|
||||||
index
|
let (task, _code) = index
|
||||||
.add_documents(
|
.add_documents(
|
||||||
json!([
|
json!([
|
||||||
{ "id": 0, "color": "red" },
|
{ "id": 0, "color": "red" },
|
||||||
@ -380,7 +369,7 @@ async fn get_document_by_filter() {
|
|||||||
Some("id"),
|
Some("id"),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
index.wait_task(1).await;
|
index.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
let (response, code) = index.get_document_by_filter(json!({})).await;
|
let (response, code) = index.get_document_by_filter(json!({})).await;
|
||||||
let (response2, code2) = index.get_all_documents_raw("").await;
|
let (response2, code2) = index.get_all_documents_raw("").await;
|
||||||
@ -552,7 +541,7 @@ async fn get_document_with_vectors() {
|
|||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
server.wait_task(response.uid()).await;
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let documents = json!([
|
let documents = json!([
|
||||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||||
@ -560,7 +549,7 @@ async fn get_document_with_vectors() {
|
|||||||
]);
|
]);
|
||||||
let (value, code) = index.add_documents(documents, None).await;
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
// by default you shouldn't see the `_vectors` object
|
// by default you shouldn't see the `_vectors` object
|
||||||
let (documents, _code) = index.get_all_documents(Default::default()).await;
|
let (documents, _code) = index.get_all_documents(Default::default()).await;
|
||||||
|
@ -6,14 +6,14 @@ use crate::json;
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn formatted_contain_wildcard() {
|
async fn formatted_contain_wildcard() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("test");
|
let index = server.unique_index();
|
||||||
|
|
||||||
index.update_settings(json!({ "displayedAttributes": ["id", "cattos"] })).await;
|
index.update_settings(json!({ "displayedAttributes": ["id", "cattos"] })).await;
|
||||||
|
|
||||||
let documents = NESTED_DOCUMENTS.clone();
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
index.add_documents(documents, None).await;
|
let (response, _) = index.add_documents(documents, None).await;
|
||||||
index.wait_task(1).await;
|
index.wait_task(response.uid()).await;
|
||||||
|
|
||||||
index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }),
|
index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }),
|
||||||
|response, code|
|
|response, code|
|
||||||
@ -135,12 +135,7 @@ async fn formatted_contain_wildcard() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn format_nested() {
|
async fn format_nested() {
|
||||||
let server = Server::new().await;
|
let index = shared_index_with_nested_documents().await;
|
||||||
let index = server.index("test");
|
|
||||||
|
|
||||||
let documents = NESTED_DOCUMENTS.clone();
|
|
||||||
index.add_documents(documents, None).await;
|
|
||||||
index.wait_task(0).await;
|
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
||||||
@ -340,15 +335,15 @@ async fn format_nested() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn displayedattr_2_smol() {
|
async fn displayedattr_2_smol() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("test");
|
let index = server.unique_index();
|
||||||
|
|
||||||
// not enough displayed for the other settings
|
// not enough displayed for the other settings
|
||||||
index.update_settings(json!({ "displayedAttributes": ["id"] })).await;
|
index.update_settings(json!({ "displayedAttributes": ["id"] })).await;
|
||||||
|
|
||||||
let documents = NESTED_DOCUMENTS.clone();
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
index.add_documents(documents, None).await;
|
let (response, _) = index.add_documents(documents, None).await;
|
||||||
index.wait_task(1).await;
|
index.wait_task(response.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|
||||||
@ -538,15 +533,15 @@ async fn displayedattr_2_smol() {
|
|||||||
#[cfg(feature = "default")]
|
#[cfg(feature = "default")]
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn test_cjk_highlight() {
|
async fn test_cjk_highlight() {
|
||||||
let server = Server::new().await;
|
let server = Server::new_shared();
|
||||||
let index = server.index("test");
|
let index = server.unique_index();
|
||||||
|
|
||||||
let documents = json!([
|
let documents = json!([
|
||||||
{ "id": 0, "title": "この度、クーポンで無料で頂きました。" },
|
{ "id": 0, "title": "この度、クーポンで無料で頂きました。" },
|
||||||
{ "id": 1, "title": "大卫到了扫罗那里" },
|
{ "id": 1, "title": "大卫到了扫罗那里" },
|
||||||
]);
|
]);
|
||||||
index.add_documents(documents, None).await;
|
let (response, _) = index.add_documents(documents, None).await;
|
||||||
index.wait_task(0).await;
|
index.wait_task(response.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| {
|
.search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| {
|
||||||
|
@ -2,12 +2,11 @@ use std::marker::PhantomData;
|
|||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
|
||||||
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
|
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
|
||||||
use hashbrown::HashMap;
|
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::extract::FacetKind;
|
use super::extract::FacetKind;
|
||||||
use super::StdResult;
|
use super::StdResult;
|
||||||
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::update::new::KvReaderFieldId;
|
use crate::update::new::KvReaderFieldId;
|
||||||
use crate::vector::Embedding;
|
use crate::vector::Embedding;
|
||||||
use crate::{DocumentId, Index};
|
use crate::{DocumentId, Index};
|
||||||
@ -87,7 +86,7 @@ pub enum ArroyOperation {
|
|||||||
embedding: Embedding,
|
embedding: Embedding,
|
||||||
},
|
},
|
||||||
Finish {
|
Finish {
|
||||||
user_provided: HashMap<String, RoaringBitmap>,
|
configs: Vec<IndexEmbeddingConfig>,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,12 +417,9 @@ impl EmbeddingSender<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Marks all embedders as "to be built"
|
/// Marks all embedders as "to be built"
|
||||||
pub fn finish(
|
pub fn finish(self, configs: Vec<IndexEmbeddingConfig>) -> StdResult<(), SendError<()>> {
|
||||||
self,
|
|
||||||
user_provided: HashMap<String, RoaringBitmap>,
|
|
||||||
) -> StdResult<(), SendError<()>> {
|
|
||||||
self.0
|
self.0
|
||||||
.send(WriterOperation::ArroyOperation(ArroyOperation::Finish { user_provided }))
|
.send(WriterOperation::ArroyOperation(ArroyOperation::Finish { configs }))
|
||||||
.map_err(|_| SendError(()))
|
.map_err(|_| SendError(()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,8 +85,13 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
|||||||
for change in changes {
|
for change in changes {
|
||||||
let change = change?;
|
let change = change?;
|
||||||
match change {
|
match change {
|
||||||
DocumentChange::Deletion(_deletion) => {
|
DocumentChange::Deletion(deletion) => {
|
||||||
// handled by document sender
|
// vector deletion is handled by document sender,
|
||||||
|
// we still need to accomodate deletion from user_provided
|
||||||
|
for chunks in &mut all_chunks {
|
||||||
|
// regenerate: true means we delete from user_provided
|
||||||
|
chunks.set_regenerate(deletion.docid(), true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
DocumentChange::Update(update) => {
|
DocumentChange::Update(update) => {
|
||||||
let old_vectors = update.current_vectors(
|
let old_vectors = update.current_vectors(
|
||||||
@ -423,9 +428,9 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
|||||||
let user_provided = user_provided.0.entry_ref(self.embedder_name).or_default();
|
let user_provided = user_provided.0.entry_ref(self.embedder_name).or_default();
|
||||||
if regenerate {
|
if regenerate {
|
||||||
// regenerate == !user_provided
|
// regenerate == !user_provided
|
||||||
user_provided.del.get_or_insert(Default::default()).insert(docid);
|
user_provided.insert_del_u32(docid);
|
||||||
} else {
|
} else {
|
||||||
user_provided.add.get_or_insert(Default::default()).insert(docid);
|
user_provided.insert_add_u32(docid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -342,35 +342,28 @@ where
|
|||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
let index_embeddings = index.embedding_configs(&rtxn)?;
|
let mut index_embeddings = index.embedding_configs(&rtxn)?;
|
||||||
if index_embeddings.is_empty() {
|
if index_embeddings.is_empty() {
|
||||||
break 'vectors;
|
break 'vectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
let embedding_sender = extractor_sender.embeddings();
|
let embedding_sender = extractor_sender.embeddings();
|
||||||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||||
let datastore = ThreadLocal::with_capacity(pool.current_num_threads());
|
let mut datastore = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||||
let (finished_steps, step_name) = steps::extract_embeddings();
|
let (finished_steps, step_name) = steps::extract_embeddings();
|
||||||
|
|
||||||
|
|
||||||
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, finished_steps, total_steps, step_name)?;
|
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, finished_steps, total_steps, step_name)?;
|
||||||
|
|
||||||
|
for config in &mut index_embeddings {
|
||||||
let mut user_provided = HashMap::new();
|
'data: for data in datastore.iter_mut() {
|
||||||
for data in datastore {
|
let data = &mut data.get_mut().0;
|
||||||
let data = data.into_inner().0;
|
let Some(deladd) = data.remove(&config.name) else { continue 'data; };
|
||||||
for (embedder, deladd) in data.into_iter() {
|
deladd.apply_to(&mut config.user_provided);
|
||||||
let user_provided = user_provided.entry(embedder).or_insert(Default::default());
|
|
||||||
if let Some(del) = deladd.del {
|
|
||||||
*user_provided -= del;
|
|
||||||
}
|
|
||||||
if let Some(add) = deladd.add {
|
|
||||||
*user_provided |= add;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
embedding_sender.finish(user_provided).unwrap();
|
embedding_sender.finish(index_embeddings).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO THIS IS TOO MUCH
|
// TODO THIS IS TOO MUCH
|
||||||
@ -472,7 +465,7 @@ where
|
|||||||
writer.del_items(wtxn, *dimensions, docid)?;
|
writer.del_items(wtxn, *dimensions, docid)?;
|
||||||
writer.add_item(wtxn, docid, &embedding)?;
|
writer.add_item(wtxn, docid, &embedding)?;
|
||||||
}
|
}
|
||||||
ArroyOperation::Finish { mut user_provided } => {
|
ArroyOperation::Finish { configs } => {
|
||||||
let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
|
let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
@ -497,14 +490,6 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut configs = index.embedding_configs(wtxn)?;
|
|
||||||
|
|
||||||
for config in &mut configs {
|
|
||||||
if let Some(user_provided) = user_provided.remove(&config.name) {
|
|
||||||
config.user_provided = user_provided;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
index.put_embedding_configs(wtxn, configs)?;
|
index.put_embedding_configs(wtxn, configs)?;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user