mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 00:55:00 +08:00
data add documents
This commit is contained in:
parent
0d7c4beecd
commit
1a38bfd31f
41
Cargo.lock
generated
41
Cargo.lock
generated
@ -332,6 +332,19 @@ dependencies = [
|
|||||||
"serde_json",
|
"serde_json",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-compression"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fb1ff21a63d3262af46b9f33a826a8d134e2d0d9b2179c86034948b732ea8b2a"
|
||||||
|
dependencies = [
|
||||||
|
"flate2",
|
||||||
|
"futures-core",
|
||||||
|
"memchr",
|
||||||
|
"pin-project-lite 0.1.11",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-trait"
|
name = "async-trait"
|
||||||
version = "0.1.42"
|
version = "0.1.42"
|
||||||
@ -1530,6 +1543,7 @@ dependencies = [
|
|||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"assert-json-diff",
|
"assert-json-diff",
|
||||||
|
"async-compression",
|
||||||
"byte-unit",
|
"byte-unit",
|
||||||
"bytes 0.6.0",
|
"bytes 0.6.0",
|
||||||
"chrono",
|
"chrono",
|
||||||
@ -1537,6 +1551,7 @@ dependencies = [
|
|||||||
"env_logger 0.8.2",
|
"env_logger 0.8.2",
|
||||||
"flate2",
|
"flate2",
|
||||||
"futures",
|
"futures",
|
||||||
|
"futures-util",
|
||||||
"grenad",
|
"grenad",
|
||||||
"heed",
|
"heed",
|
||||||
"http",
|
"http",
|
||||||
@ -1545,6 +1560,7 @@ dependencies = [
|
|||||||
"log",
|
"log",
|
||||||
"main_error",
|
"main_error",
|
||||||
"meilisearch-error",
|
"meilisearch-error",
|
||||||
|
"memmap",
|
||||||
"milli",
|
"milli",
|
||||||
"mime",
|
"mime",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@ -1680,12 +1696,24 @@ dependencies = [
|
|||||||
"kernel32-sys",
|
"kernel32-sys",
|
||||||
"libc",
|
"libc",
|
||||||
"log",
|
"log",
|
||||||
"miow",
|
"miow 0.2.2",
|
||||||
"net2",
|
"net2",
|
||||||
"slab",
|
"slab",
|
||||||
"winapi 0.2.8",
|
"winapi 0.2.8",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mio-named-pipes"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"mio",
|
||||||
|
"miow 0.3.6",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mio-uds"
|
name = "mio-uds"
|
||||||
version = "0.6.8"
|
version = "0.6.8"
|
||||||
@ -1709,6 +1737,16 @@ dependencies = [
|
|||||||
"ws2_32-sys",
|
"ws2_32-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "miow"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a33c1b55807fbed163481b5ba66db4b2fa6cde694a5027be10fb724206c5897"
|
||||||
|
dependencies = [
|
||||||
|
"socket2",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "near-proximity"
|
name = "near-proximity"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@ -2895,6 +2933,7 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
"memchr",
|
"memchr",
|
||||||
"mio",
|
"mio",
|
||||||
|
"mio-named-pipes",
|
||||||
"mio-uds",
|
"mio-uds",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"pin-project-lite 0.1.11",
|
"pin-project-lite 0.1.11",
|
||||||
|
@ -19,6 +19,7 @@ actix-rt = "1"
|
|||||||
actix-service = "1.0.6"
|
actix-service = "1.0.6"
|
||||||
actix-web = { version = "3.3.2", features = ["rustls"] }
|
actix-web = { version = "3.3.2", features = ["rustls"] }
|
||||||
anyhow = "1.0.36"
|
anyhow = "1.0.36"
|
||||||
|
async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] }
|
||||||
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
||||||
bytes = "0.6.0"
|
bytes = "0.6.0"
|
||||||
chrono = { version = "0.4.19", features = ["serde"] }
|
chrono = { version = "0.4.19", features = ["serde"] }
|
||||||
@ -49,10 +50,12 @@ slice-group-by = "0.2.6"
|
|||||||
structopt = "0.3.20"
|
structopt = "0.3.20"
|
||||||
tar = "0.4.29"
|
tar = "0.4.29"
|
||||||
tempfile = "3.1.0"
|
tempfile = "3.1.0"
|
||||||
tokio = "*"
|
tokio = { version = "*", features = ["full"] }
|
||||||
ureq = { version = "1.5.1", default-features = false, features = ["tls"] }
|
ureq = { version = "1.5.1", default-features = false, features = ["tls"] }
|
||||||
walkdir = "2.3.1"
|
walkdir = "2.3.1"
|
||||||
whoami = "1.0.0"
|
whoami = "1.0.0"
|
||||||
|
futures-util = "0.3.8"
|
||||||
|
memmap = "0.7.0"
|
||||||
|
|
||||||
[dependencies.sentry]
|
[dependencies.sentry]
|
||||||
default-features = false
|
default-features = false
|
||||||
|
40
src/data.rs
40
src/data.rs
@ -1,11 +1,15 @@
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use sha2::Digest;
|
use async_compression::tokio_02::write::GzipEncoder;
|
||||||
|
use futures_util::stream::StreamExt;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
use milli::Index;
|
use milli::Index;
|
||||||
|
use milli::update::{IndexDocumentsMethod, UpdateFormat};
|
||||||
|
use sha2::Digest;
|
||||||
|
|
||||||
use crate::option::Opt;
|
use crate::option::Opt;
|
||||||
use crate::updates::UpdateQueue;
|
use crate::updates::{UpdateQueue, UpdateMeta, UpdateStatus, UpdateMetaProgress};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Data {
|
pub struct Data {
|
||||||
@ -75,11 +79,43 @@ impl Data {
|
|||||||
Ok(Data { inner })
|
Ok(Data { inner })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn add_documents<B, E>(
|
||||||
|
&self,
|
||||||
|
method: IndexDocumentsMethod,
|
||||||
|
format: UpdateFormat,
|
||||||
|
mut stream: impl futures::Stream<Item=Result<B, E>> + Unpin,
|
||||||
|
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateMetaProgress, String>>
|
||||||
|
where
|
||||||
|
B: Deref<Target = [u8]>,
|
||||||
|
E: std::error::Error + Send + Sync + 'static,
|
||||||
|
{
|
||||||
|
let file = tokio::task::block_in_place(tempfile::tempfile)?;
|
||||||
|
let file = tokio::fs::File::from_std(file);
|
||||||
|
let mut encoder = GzipEncoder::new(file);
|
||||||
|
|
||||||
|
while let Some(result) = stream.next().await {
|
||||||
|
let bytes = &*result?;
|
||||||
|
encoder.write_all(&bytes[..]).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder.shutdown().await?;
|
||||||
|
let mut file = encoder.into_inner();
|
||||||
|
file.sync_all().await?;
|
||||||
|
let file = file.into_std().await;
|
||||||
|
let mmap = unsafe { memmap::Mmap::map(&file)? };
|
||||||
|
|
||||||
|
let meta = UpdateMeta::DocumentsAddition { method, format };
|
||||||
|
let update_id = tokio::task::block_in_place(|| self.update_queue.register_update(&meta, &mmap[..]))?;
|
||||||
|
|
||||||
|
Ok(UpdateStatus::Pending { update_id, meta })
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn http_payload_size_limit(&self) -> usize {
|
pub fn http_payload_size_limit(&self) -> usize {
|
||||||
self.options.http_payload_size_limit.get_bytes() as usize
|
self.options.http_payload_size_limit.get_bytes() as usize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn api_keys(&self) -> &ApiKeys {
|
pub fn api_keys(&self) -> &ApiKeys {
|
||||||
&self.api_keys
|
&self.api_keys
|
||||||
}
|
}
|
||||||
|
@ -93,10 +93,10 @@ async fn update_multiple_documents(
|
|||||||
|
|
||||||
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
|
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
|
||||||
async fn add_documents(
|
async fn add_documents(
|
||||||
_data: web::Data<Data>,
|
data: web::Data<Data>,
|
||||||
_path: web::Path<IndexParam>,
|
_path: web::Path<IndexParam>,
|
||||||
_params: web::Query<UpdateDocumentsQuery>,
|
_params: web::Query<UpdateDocumentsQuery>,
|
||||||
_body: web::Json<Vec<Document>>,
|
body: web::Json<Vec<Document>>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ pub use settings::{Settings, Facets};
|
|||||||
|
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use flate2::read::GzDecoder;
|
use flate2::read::GzDecoder;
|
||||||
@ -20,8 +21,8 @@ use crate::option::Opt;
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
#[serde(tag = "type")]
|
#[serde(tag = "type")]
|
||||||
enum UpdateMeta {
|
pub enum UpdateMeta {
|
||||||
DocumentsAddition { method: String, format: String },
|
DocumentsAddition { method: IndexDocumentsMethod, format: UpdateFormat },
|
||||||
ClearDocuments,
|
ClearDocuments,
|
||||||
Settings(Settings),
|
Settings(Settings),
|
||||||
Facets(Facets),
|
Facets(Facets),
|
||||||
@ -29,7 +30,7 @@ enum UpdateMeta {
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
#[serde(tag = "type")]
|
#[serde(tag = "type")]
|
||||||
enum UpdateMetaProgress {
|
pub enum UpdateMetaProgress {
|
||||||
DocumentsAddition {
|
DocumentsAddition {
|
||||||
step: usize,
|
step: usize,
|
||||||
total_steps: usize,
|
total_steps: usize,
|
||||||
@ -41,7 +42,7 @@ enum UpdateMetaProgress {
|
|||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
#[serde(tag = "type")]
|
#[serde(tag = "type")]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
enum UpdateStatus<M, P, N> {
|
pub enum UpdateStatus<M, P, N> {
|
||||||
Pending { update_id: u64, meta: M },
|
Pending { update_id: u64, meta: M },
|
||||||
Progressing { update_id: u64, meta: P },
|
Progressing { update_id: u64, meta: P },
|
||||||
Processed { update_id: u64, meta: N },
|
Processed { update_id: u64, meta: N },
|
||||||
@ -53,6 +54,13 @@ pub struct UpdateQueue {
|
|||||||
inner: Arc<UpdateStore<UpdateMeta, String>>,
|
inner: Arc<UpdateStore<UpdateMeta, String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Deref for UpdateQueue {
|
||||||
|
type Target = Arc<UpdateStore<UpdateMeta, String>>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, StructOpt)]
|
#[derive(Debug, Clone, StructOpt)]
|
||||||
pub struct IndexerOpts {
|
pub struct IndexerOpts {
|
||||||
@ -164,27 +172,16 @@ impl UpdateHandler {
|
|||||||
|
|
||||||
fn update_documents(
|
fn update_documents(
|
||||||
&self,
|
&self,
|
||||||
format: String,
|
format: UpdateFormat,
|
||||||
method: String,
|
method: IndexDocumentsMethod,
|
||||||
content: &[u8],
|
content: &[u8],
|
||||||
update_builder: UpdateBuilder,
|
update_builder: UpdateBuilder,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = self.indexes.write_txn()?;
|
let mut wtxn = self.indexes.write_txn()?;
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &self.indexes);
|
let mut builder = update_builder.index_documents(&mut wtxn, &self.indexes);
|
||||||
|
builder.update_format(format);
|
||||||
match format.as_str() {
|
builder.index_documents_method(method);
|
||||||
"csv" => builder.update_format(UpdateFormat::Csv),
|
|
||||||
"json" => builder.update_format(UpdateFormat::Json),
|
|
||||||
"json-stream" => builder.update_format(UpdateFormat::JsonStream),
|
|
||||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
|
||||||
};
|
|
||||||
|
|
||||||
match method.as_str() {
|
|
||||||
"replace" => builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments),
|
|
||||||
"update" => builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments),
|
|
||||||
otherwise => panic!("invalid indexing method {:?}", otherwise),
|
|
||||||
};
|
|
||||||
|
|
||||||
let gzipped = true;
|
let gzipped = true;
|
||||||
let reader = if gzipped {
|
let reader = if gzipped {
|
||||||
|
Loading…
Reference in New Issue
Block a user