meilisearch/meilisearch-http/src/routes/indexes/documents.rs

311 lines
9.7 KiB
Rust
Raw Normal View History

2021-09-15 00:39:02 +08:00
use actix_web::error::PayloadError;
use actix_web::http::header::CONTENT_TYPE;
2021-09-15 00:39:02 +08:00
use actix_web::web::Bytes;
use actix_web::HttpMessage;
2021-09-30 17:17:42 +08:00
use actix_web::{web, HttpRequest, HttpResponse};
use bstr::ByteSlice;
2021-09-15 00:39:02 +08:00
use futures::{Stream, StreamExt};
2021-06-24 21:02:35 +08:00
use log::debug;
use meilisearch_error::ResponseError;
use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update};
2021-09-29 04:08:03 +08:00
use meilisearch_lib::milli::update::IndexDocumentsMethod;
2021-09-29 04:22:59 +08:00
use meilisearch_lib::MeiliSearch;
use mime::Mime;
use once_cell::sync::Lazy;
2020-12-12 20:32:06 +08:00
use serde::Deserialize;
use serde_cs::vec::CS;
use serde_json::Value;
2021-09-15 00:39:02 +08:00
use tokio::sync::mpsc;
2020-12-12 20:32:06 +08:00
2021-10-12 21:23:31 +08:00
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
2021-06-24 21:02:35 +08:00
use crate::extractors::authentication::{policies::*, GuardedData};
2021-06-23 20:56:02 +08:00
use crate::extractors::payload::Payload;
2022-03-05 03:12:44 +08:00
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{fold_star_or, PaginationView, StarOr};
use crate::task::SummarizedTaskView;
2020-12-12 20:32:06 +08:00
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
vec![
"application/json".to_string(),
"application/x-ndjson".to_string(),
"text/csv".to_string(),
]
});
2021-09-15 00:39:02 +08:00
/// This is required because Payload is not Sync nor Send
2021-09-29 04:22:59 +08:00
fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, PayloadError>> {
2021-09-15 00:39:02 +08:00
let (snd, recv) = mpsc::channel(1);
tokio::task::spawn_local(async move {
while let Some(data) = payload.next().await {
let _ = snd.send(data).await;
}
});
tokio_stream::wrappers::ReceiverStream::new(recv)
}
/// Extracts the mime type from the content type and return
/// a meilisearch error if anyhthing bad happen.
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
match req.mime_type() {
Ok(Some(mime)) => Ok(Some(mime)),
Ok(None) => Ok(None),
Err(_) => match req.headers().get(CONTENT_TYPE) {
Some(content_type) => Err(MeilisearchHttpError::InvalidContentType(
content_type.as_bytes().as_bstr().to_string(),
ACCEPTED_CONTENT_TYPE.clone(),
)),
None => Err(MeilisearchHttpError::MissingContentType(
ACCEPTED_CONTENT_TYPE.clone(),
)),
},
}
}
2020-12-12 20:32:06 +08:00
#[derive(Deserialize)]
2021-07-07 22:20:22 +08:00
pub struct DocumentParam {
2021-02-13 17:44:20 +08:00
index_uid: String,
document_id: String,
2020-12-12 20:32:06 +08:00
}
2021-07-05 20:29:20 +08:00
pub fn configure(cfg: &mut web::ServiceConfig) {
2021-06-24 21:02:35 +08:00
cfg.service(
2021-07-05 20:29:20 +08:00
web::resource("")
2022-03-05 03:12:44 +08:00
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
2021-07-05 20:29:20 +08:00
)
// this route needs to be before the /documents/{document_id} to match properly
2022-03-05 03:12:44 +08:00
.service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
2021-07-05 20:29:20 +08:00
.service(
web::resource("/{document_id}")
2022-03-05 03:12:44 +08:00
.route(web::get().to(SeqHandler(get_document)))
.route(web::delete().to(SeqHandler(delete_document))),
2021-06-24 21:02:35 +08:00
);
2020-12-12 20:32:06 +08:00
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct GetDocument {
fields: Option<CS<StarOr<String>>>,
}
2021-07-07 22:20:22 +08:00
pub async fn get_document(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, MeiliSearch>,
2021-02-11 17:59:23 +08:00
path: web::Path<DocumentParam>,
params: web::Query<GetDocument>,
2020-12-12 20:32:06 +08:00
) -> Result<HttpResponse, ResponseError> {
2021-03-04 22:09:00 +08:00
let index = path.index_uid.clone();
let id = path.document_id.clone();
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
2021-09-24 18:03:16 +08:00
let document = meilisearch
.document(index, id, attributes_to_retrieve)
.await?;
2021-06-23 18:18:34 +08:00
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
2020-12-12 20:32:06 +08:00
}
2021-09-24 21:21:07 +08:00
pub async fn delete_document(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, MeiliSearch>,
2021-09-24 21:21:07 +08:00
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
2021-09-29 04:22:59 +08:00
let DocumentParam {
document_id,
index_uid,
} = path.into_inner();
2021-09-24 21:21:07 +08:00
let update = Update::DeleteDocuments(vec![document_id]);
let task: SummarizedTaskView = meilisearch.register_update(index_uid, update).await?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
2021-09-24 21:21:07 +08:00
}
2020-12-12 20:32:06 +08:00
2021-06-23 18:18:34 +08:00
#[derive(Deserialize, Debug)]
2020-12-12 20:32:06 +08:00
#[serde(rename_all = "camelCase", deny_unknown_fields)]
2021-07-07 22:20:22 +08:00
pub struct BrowseQuery {
#[serde(default)]
offset: usize,
#[serde(default = "crate::routes::PAGINATION_DEFAULT_LIMIT")]
limit: usize,
fields: Option<CS<StarOr<String>>>,
2020-12-12 20:32:06 +08:00
}
2021-07-07 22:20:22 +08:00
pub async fn get_all_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, MeiliSearch>,
path: web::Path<String>,
2021-02-11 00:08:37 +08:00
params: web::Query<BrowseQuery>,
2020-12-12 20:32:06 +08:00
) -> Result<HttpResponse, ResponseError> {
2021-06-23 18:18:34 +08:00
debug!("called with params: {:?}", params);
let BrowseQuery {
limit,
offset,
fields,
} = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
2021-02-11 00:08:37 +08:00
let (total, documents) = meilisearch
.documents(path.into_inner(), offset, limit, attributes_to_retrieve)
.await?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
2020-12-12 20:32:06 +08:00
}
2021-06-23 18:18:34 +08:00
#[derive(Deserialize, Debug)]
2020-12-12 20:32:06 +08:00
#[serde(rename_all = "camelCase", deny_unknown_fields)]
2021-07-07 22:20:22 +08:00
pub struct UpdateDocumentsQuery {
2021-10-25 22:41:23 +08:00
pub primary_key: Option<String>,
2020-12-12 20:32:06 +08:00
}
2021-09-30 17:17:42 +08:00
pub async fn add_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, MeiliSearch>,
path: web::Path<String>,
2021-09-29 06:12:25 +08:00
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
2021-09-30 17:17:42 +08:00
req: HttpRequest,
2021-10-29 22:10:58 +08:00
analytics: web::Data<dyn Analytics>,
2021-09-29 06:12:25 +08:00
) -> Result<HttpResponse, ResponseError> {
2021-09-30 17:17:42 +08:00
debug!("called with params: {:?}", params);
2021-10-12 21:23:31 +08:00
let params = params.into_inner();
let index_uid = path.into_inner();
2021-10-12 21:23:31 +08:00
analytics.add_documents(
&params,
meilisearch.get_index(index_uid.clone()).await.is_err(),
&req,
2021-10-12 21:23:31 +08:00
);
let allow_index_creation = meilisearch.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
2021-09-29 16:17:52 +08:00
meilisearch,
index_uid,
2021-10-12 21:23:31 +08:00
params.primary_key,
2021-09-29 16:17:52 +08:00
body,
2021-09-30 17:29:27 +08:00
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
2021-09-30 17:29:27 +08:00
)
.await?;
Ok(HttpResponse::Accepted().json(task))
2021-09-29 16:17:52 +08:00
}
2021-09-30 17:17:42 +08:00
pub async fn update_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, MeiliSearch>,
path: web::Path<String>,
2021-09-29 16:17:52 +08:00
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
2021-09-30 17:17:42 +08:00
req: HttpRequest,
2021-10-29 22:10:58 +08:00
analytics: web::Data<dyn Analytics>,
2021-09-29 16:17:52 +08:00
) -> Result<HttpResponse, ResponseError> {
2021-09-30 17:17:42 +08:00
debug!("called with params: {:?}", params);
let index_uid = path.into_inner();
2021-10-12 21:31:59 +08:00
analytics.update_documents(
&params,
meilisearch.get_index(index_uid.clone()).await.is_err(),
&req,
2021-10-12 21:31:59 +08:00
);
let allow_index_creation = meilisearch.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
2021-09-29 16:17:52 +08:00
meilisearch,
index_uid,
2021-09-30 17:17:42 +08:00
params.into_inner().primary_key,
2021-09-29 16:17:52 +08:00
body,
2021-09-30 17:29:27 +08:00
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
2021-09-30 17:29:27 +08:00
)
.await?;
Ok(HttpResponse::Accepted().json(task))
2021-09-29 06:12:25 +08:00
}
async fn document_addition(
mime_type: Option<Mime>,
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, MeiliSearch>,
2021-09-30 17:17:42 +08:00
index_uid: String,
primary_key: Option<String>,
2021-06-23 19:55:16 +08:00
body: Payload,
2021-09-29 06:12:25 +08:00
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, ResponseError> {
let format = match mime_type
.as_ref()
.map(|m| (m.type_().as_str(), m.subtype().as_str()))
{
Some(("application", "json")) => DocumentAdditionFormat::Json,
Some(("application", "x-ndjson")) => DocumentAdditionFormat::Ndjson,
Some(("text", "csv")) => DocumentAdditionFormat::Csv,
Some((type_, subtype)) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
)
.into())
}
None => {
return Err(
MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()).into(),
)
2021-09-30 17:29:27 +08:00
}
2021-09-30 17:17:42 +08:00
};
2021-09-15 00:39:02 +08:00
let update = Update::DocumentAddition {
payload: Box::new(payload_to_stream(body)),
2021-09-30 17:17:42 +08:00
primary_key,
2021-09-29 06:12:25 +08:00
method,
format,
allow_index_creation,
2021-09-15 00:39:02 +08:00
};
2021-09-30 17:17:42 +08:00
let task = meilisearch.register_update(index_uid, update).await?.into();
2021-03-04 22:10:58 +08:00
debug!("returns: {:?}", task);
Ok(task)
2020-12-12 20:32:06 +08:00
}
2021-09-24 21:21:07 +08:00
pub async fn delete_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, MeiliSearch>,
path: web::Path<String>,
2021-09-24 21:21:07 +08:00
body: web::Json<Vec<Value>>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let ids = body
.iter()
.map(|v| {
v.as_str()
.map(String::from)
.unwrap_or_else(|| v.to_string())
})
.collect();
let update = Update::DeleteDocuments(ids);
let task: SummarizedTaskView = meilisearch
.register_update(path.into_inner(), update)
.await?
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
2021-09-24 21:21:07 +08:00
}
pub async fn clear_all_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, MeiliSearch>,
path: web::Path<String>,
2021-09-24 21:21:07 +08:00
) -> Result<HttpResponse, ResponseError> {
let update = Update::ClearDocuments;
let task: SummarizedTaskView = meilisearch
.register_update(path.into_inner(), update)
.await?
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
2021-09-24 21:21:07 +08:00
}