data add documents

This commit is contained in:
mpostma 2020-12-23 13:52:28 +01:00
parent 0d7c4beecd
commit 1a38bfd31f
5 changed files with 100 additions and 25 deletions

41
Cargo.lock generated
View File

@ -332,6 +332,19 @@ dependencies = [
"serde_json", "serde_json",
] ]
[[package]]
name = "async-compression"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb1ff21a63d3262af46b9f33a826a8d134e2d0d9b2179c86034948b732ea8b2a"
dependencies = [
"flate2",
"futures-core",
"memchr",
"pin-project-lite 0.1.11",
"tokio",
]
[[package]] [[package]]
name = "async-trait" name = "async-trait"
version = "0.1.42" version = "0.1.42"
@ -1530,6 +1543,7 @@ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
"assert-json-diff", "assert-json-diff",
"async-compression",
"byte-unit", "byte-unit",
"bytes 0.6.0", "bytes 0.6.0",
"chrono", "chrono",
@ -1537,6 +1551,7 @@ dependencies = [
"env_logger 0.8.2", "env_logger 0.8.2",
"flate2", "flate2",
"futures", "futures",
"futures-util",
"grenad", "grenad",
"heed", "heed",
"http", "http",
@ -1545,6 +1560,7 @@ dependencies = [
"log", "log",
"main_error", "main_error",
"meilisearch-error", "meilisearch-error",
"memmap",
"milli", "milli",
"mime", "mime",
"once_cell", "once_cell",
@ -1680,12 +1696,24 @@ dependencies = [
"kernel32-sys", "kernel32-sys",
"libc", "libc",
"log", "log",
"miow", "miow 0.2.2",
"net2", "net2",
"slab", "slab",
"winapi 0.2.8", "winapi 0.2.8",
] ]
[[package]]
name = "mio-named-pipes"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656"
dependencies = [
"log",
"mio",
"miow 0.3.6",
"winapi 0.3.9",
]
[[package]] [[package]]
name = "mio-uds" name = "mio-uds"
version = "0.6.8" version = "0.6.8"
@ -1709,6 +1737,16 @@ dependencies = [
"ws2_32-sys", "ws2_32-sys",
] ]
[[package]]
name = "miow"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a33c1b55807fbed163481b5ba66db4b2fa6cde694a5027be10fb724206c5897"
dependencies = [
"socket2",
"winapi 0.3.9",
]
[[package]] [[package]]
name = "near-proximity" name = "near-proximity"
version = "0.1.0" version = "0.1.0"
@ -2895,6 +2933,7 @@ dependencies = [
"libc", "libc",
"memchr", "memchr",
"mio", "mio",
"mio-named-pipes",
"mio-uds", "mio-uds",
"num_cpus", "num_cpus",
"pin-project-lite 0.1.11", "pin-project-lite 0.1.11",

View File

@ -19,6 +19,7 @@ actix-rt = "1"
actix-service = "1.0.6" actix-service = "1.0.6"
actix-web = { version = "3.3.2", features = ["rustls"] } actix-web = { version = "3.3.2", features = ["rustls"] }
anyhow = "1.0.36" anyhow = "1.0.36"
async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] }
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
bytes = "0.6.0" bytes = "0.6.0"
chrono = { version = "0.4.19", features = ["serde"] } chrono = { version = "0.4.19", features = ["serde"] }
@ -49,10 +50,12 @@ slice-group-by = "0.2.6"
structopt = "0.3.20" structopt = "0.3.20"
tar = "0.4.29" tar = "0.4.29"
tempfile = "3.1.0" tempfile = "3.1.0"
tokio = "*" tokio = { version = "*", features = ["full"] }
ureq = { version = "1.5.1", default-features = false, features = ["tls"] } ureq = { version = "1.5.1", default-features = false, features = ["tls"] }
walkdir = "2.3.1" walkdir = "2.3.1"
whoami = "1.0.0" whoami = "1.0.0"
futures-util = "0.3.8"
memmap = "0.7.0"
[dependencies.sentry] [dependencies.sentry]
default-features = false default-features = false

View File

@ -1,11 +1,15 @@
use std::ops::Deref; use std::ops::Deref;
use std::sync::Arc; use std::sync::Arc;
use sha2::Digest; use async_compression::tokio_02::write::GzipEncoder;
use futures_util::stream::StreamExt;
use tokio::io::AsyncWriteExt;
use milli::Index; use milli::Index;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use sha2::Digest;
use crate::option::Opt; use crate::option::Opt;
use crate::updates::UpdateQueue; use crate::updates::{UpdateQueue, UpdateMeta, UpdateStatus, UpdateMetaProgress};
#[derive(Clone)] #[derive(Clone)]
pub struct Data { pub struct Data {
@ -75,11 +79,43 @@ impl Data {
Ok(Data { inner }) Ok(Data { inner })
} }
pub async fn add_documents<B, E>(
&self,
method: IndexDocumentsMethod,
format: UpdateFormat,
mut stream: impl futures::Stream<Item=Result<B, E>> + Unpin,
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateMetaProgress, String>>
where
B: Deref<Target = [u8]>,
E: std::error::Error + Send + Sync + 'static,
{
let file = tokio::task::block_in_place(tempfile::tempfile)?;
let file = tokio::fs::File::from_std(file);
let mut encoder = GzipEncoder::new(file);
while let Some(result) = stream.next().await {
let bytes = &*result?;
encoder.write_all(&bytes[..]).await?;
}
encoder.shutdown().await?;
let mut file = encoder.into_inner();
file.sync_all().await?;
let file = file.into_std().await;
let mmap = unsafe { memmap::Mmap::map(&file)? };
let meta = UpdateMeta::DocumentsAddition { method, format };
let update_id = tokio::task::block_in_place(|| self.update_queue.register_update(&meta, &mmap[..]))?;
Ok(UpdateStatus::Pending { update_id, meta })
}
#[inline] #[inline]
pub fn http_payload_size_limit(&self) -> usize { pub fn http_payload_size_limit(&self) -> usize {
self.options.http_payload_size_limit.get_bytes() as usize self.options.http_payload_size_limit.get_bytes() as usize
} }
#[inline]
pub fn api_keys(&self) -> &ApiKeys { pub fn api_keys(&self) -> &ApiKeys {
&self.api_keys &self.api_keys
} }

View File

@ -93,10 +93,10 @@ async fn update_multiple_documents(
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")] #[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn add_documents( async fn add_documents(
_data: web::Data<Data>, data: web::Data<Data>,
_path: web::Path<IndexParam>, _path: web::Path<IndexParam>,
_params: web::Query<UpdateDocumentsQuery>, _params: web::Query<UpdateDocumentsQuery>,
_body: web::Json<Vec<Document>>, body: web::Json<Vec<Document>>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
todo!() todo!()
} }

View File

@ -4,6 +4,7 @@ pub use settings::{Settings, Facets};
use std::io; use std::io;
use std::sync::Arc; use std::sync::Arc;
use std::ops::Deref;
use anyhow::Result; use anyhow::Result;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
@ -20,8 +21,8 @@ use crate::option::Opt;
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")] #[serde(tag = "type")]
enum UpdateMeta { pub enum UpdateMeta {
DocumentsAddition { method: String, format: String }, DocumentsAddition { method: IndexDocumentsMethod, format: UpdateFormat },
ClearDocuments, ClearDocuments,
Settings(Settings), Settings(Settings),
Facets(Facets), Facets(Facets),
@ -29,7 +30,7 @@ enum UpdateMeta {
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")] #[serde(tag = "type")]
enum UpdateMetaProgress { pub enum UpdateMetaProgress {
DocumentsAddition { DocumentsAddition {
step: usize, step: usize,
total_steps: usize, total_steps: usize,
@ -41,7 +42,7 @@ enum UpdateMetaProgress {
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
#[serde(tag = "type")] #[serde(tag = "type")]
#[allow(dead_code)] #[allow(dead_code)]
enum UpdateStatus<M, P, N> { pub enum UpdateStatus<M, P, N> {
Pending { update_id: u64, meta: M }, Pending { update_id: u64, meta: M },
Progressing { update_id: u64, meta: P }, Progressing { update_id: u64, meta: P },
Processed { update_id: u64, meta: N }, Processed { update_id: u64, meta: N },
@ -53,6 +54,13 @@ pub struct UpdateQueue {
inner: Arc<UpdateStore<UpdateMeta, String>>, inner: Arc<UpdateStore<UpdateMeta, String>>,
} }
impl Deref for UpdateQueue {
type Target = Arc<UpdateStore<UpdateMeta, String>>;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
#[derive(Debug, Clone, StructOpt)] #[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts { pub struct IndexerOpts {
@ -164,27 +172,16 @@ impl UpdateHandler {
fn update_documents( fn update_documents(
&self, &self,
format: String, format: UpdateFormat,
method: String, method: IndexDocumentsMethod,
content: &[u8], content: &[u8],
update_builder: UpdateBuilder, update_builder: UpdateBuilder,
) -> Result<()> { ) -> Result<()> {
// We must use the write transaction of the update here. // We must use the write transaction of the update here.
let mut wtxn = self.indexes.write_txn()?; let mut wtxn = self.indexes.write_txn()?;
let mut builder = update_builder.index_documents(&mut wtxn, &self.indexes); let mut builder = update_builder.index_documents(&mut wtxn, &self.indexes);
builder.update_format(format);
match format.as_str() { builder.index_documents_method(method);
"csv" => builder.update_format(UpdateFormat::Csv),
"json" => builder.update_format(UpdateFormat::Json),
"json-stream" => builder.update_format(UpdateFormat::JsonStream),
otherwise => panic!("invalid update format {:?}", otherwise),
};
match method.as_str() {
"replace" => builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments),
"update" => builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments),
otherwise => panic!("invalid indexing method {:?}", otherwise),
};
let gzipped = true; let gzipped = true;
let reader = if gzipped { let reader = if gzipped {