mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
write the dump export
This commit is contained in:
parent
8954b1bd1d
commit
b7f9c94f4a
@ -16,7 +16,6 @@ time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsi
|
|||||||
tar = "0.4.38"
|
tar = "0.4.38"
|
||||||
anyhow = "1.0.65"
|
anyhow = "1.0.65"
|
||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
index-scheduler = { path = "../index-scheduler" }
|
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
http = "0.2.8"
|
http = "0.2.8"
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use meilisearch_types::error::{Code, ErrorCode};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
@ -16,3 +17,19 @@ pub enum Error {
|
|||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Uuid(#[from] uuid::Error),
|
Uuid(#[from] uuid::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ErrorCode for Error {
|
||||||
|
fn error_code(&self) -> Code {
|
||||||
|
match self {
|
||||||
|
// Are these three really Internal errors?
|
||||||
|
Error::Io(_) => Code::Internal,
|
||||||
|
Error::Serde(_) => Code::Internal,
|
||||||
|
Error::Uuid(_) => Code::Internal,
|
||||||
|
|
||||||
|
// all these errors should never be raised when creating a dump, thus no error code should be associated.
|
||||||
|
Error::DumpV1Unsupported => Code::Internal,
|
||||||
|
Error::BadIndexName => Code::Internal,
|
||||||
|
Error::MalformedTask => Code::Internal,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -52,12 +52,11 @@ pub(crate) mod test {
|
|||||||
};
|
};
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use index_scheduler::task::Details;
|
|
||||||
use maplit::btreeset;
|
use maplit::btreeset;
|
||||||
use meilisearch_auth::{Action, Key};
|
use meilisearch_types::keys::{Action, Key};
|
||||||
use meilisearch_types::milli::{self, update::Setting};
|
use meilisearch_types::milli::{self, update::Setting};
|
||||||
use meilisearch_types::settings::{Checked, Settings};
|
use meilisearch_types::settings::{Checked, Settings};
|
||||||
use meilisearch_types::tasks::{DetailsView, Kind, Status, TaskView};
|
use meilisearch_types::tasks::{Kind, Status};
|
||||||
use meilisearch_types::{index_uid::IndexUid, star_or::StarOr};
|
use meilisearch_types::{index_uid::IndexUid, star_or::StarOr};
|
||||||
use serde_json::{json, Map, Value};
|
use serde_json::{json, Map, Value};
|
||||||
use time::{macros::datetime, Duration};
|
use time::{macros::datetime, Duration};
|
||||||
@ -116,7 +115,7 @@ pub(crate) mod test {
|
|||||||
settings.check()
|
settings.check()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_test_tasks() -> Vec<(TaskView, Option<Vec<Document>>)> {
|
pub fn create_test_tasks() -> Vec<(Task, Option<Vec<Document>>)> {
|
||||||
vec![
|
vec![
|
||||||
(
|
(
|
||||||
TaskView {
|
TaskView {
|
||||||
|
@ -124,7 +124,7 @@ impl CompatV5ToV6 {
|
|||||||
indexed_documents,
|
indexed_documents,
|
||||||
} => v6::Details::DocumentAddition {
|
} => v6::Details::DocumentAddition {
|
||||||
received_documents: received_documents as u64,
|
received_documents: received_documents as u64,
|
||||||
indexed_documents: indexed_documents.map_or(0, |i| i as u64),
|
indexed_documents: indexed_documents.map(|i| i as u64),
|
||||||
},
|
},
|
||||||
v5::Details::Settings { settings } => v6::Details::Settings {
|
v5::Details::Settings { settings } => v6::Details::Settings {
|
||||||
settings: settings.into(),
|
settings: settings.into(),
|
||||||
|
202
dump/src/reader/v6/mod.rs
Normal file
202
dump/src/reader/v6/mod.rs
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
use std::{
|
||||||
|
fs::{self, File},
|
||||||
|
io::{BufRead, BufReader},
|
||||||
|
path::Path,
|
||||||
|
str::FromStr,
|
||||||
|
};
|
||||||
|
|
||||||
|
use tempfile::TempDir;
|
||||||
|
use time::OffsetDateTime;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::{Error, IndexMetadata, Result, Version};
|
||||||
|
|
||||||
|
mod tasks;
|
||||||
|
|
||||||
|
pub use meilisearch_types::milli;
|
||||||
|
|
||||||
|
use super::Document;
|
||||||
|
|
||||||
|
pub type Metadata = crate::Metadata;
|
||||||
|
|
||||||
|
pub type Settings<T> = meilisearch_types::settings::Settings<T>;
|
||||||
|
pub type Checked = meilisearch_types::settings::Checked;
|
||||||
|
pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||||
|
|
||||||
|
pub type Task = tasks::TaskDump;
|
||||||
|
pub type Key = meilisearch_types::keys::Key;
|
||||||
|
|
||||||
|
// ===== Other types to clarify the code of the compat module
|
||||||
|
// everything related to the tasks
|
||||||
|
pub type Status = meilisearch_types::tasks::Status;
|
||||||
|
pub type Kind = tasks::KindDump;
|
||||||
|
pub type Details = meilisearch_types::tasks::Details;
|
||||||
|
|
||||||
|
// everything related to the settings
|
||||||
|
pub type Setting<T> = meilisearch_types::milli::update::Setting<T>;
|
||||||
|
pub type TypoTolerance = meilisearch_types::settings::TypoSettings;
|
||||||
|
pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting;
|
||||||
|
pub type FacetingSettings = meilisearch_types::settings::FacetingSettings;
|
||||||
|
pub type PaginationSettings = meilisearch_types::settings::PaginationSettings;
|
||||||
|
|
||||||
|
// everything related to the api keys
|
||||||
|
pub type Action = meilisearch_types::keys::Action;
|
||||||
|
pub type StarOr<T> = meilisearch_types::star_or::StarOr<T>;
|
||||||
|
pub type IndexUid = meilisearch_types::index_uid::IndexUid;
|
||||||
|
|
||||||
|
// everything related to the errors
|
||||||
|
pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||||
|
pub type Code = meilisearch_types::error::Code;
|
||||||
|
|
||||||
|
pub struct V6Reader {
|
||||||
|
dump: TempDir,
|
||||||
|
instance_uid: Uuid,
|
||||||
|
metadata: Metadata,
|
||||||
|
tasks: BufReader<File>,
|
||||||
|
keys: BufReader<File>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl V6Reader {
|
||||||
|
pub fn open(dump: TempDir) -> Result<Self> {
|
||||||
|
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
||||||
|
let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?;
|
||||||
|
let instance_uid = Uuid::from_str(&instance_uid)?;
|
||||||
|
|
||||||
|
Ok(V6Reader {
|
||||||
|
metadata: serde_json::from_reader(&*meta_file)?,
|
||||||
|
instance_uid,
|
||||||
|
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||||
|
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||||
|
dump,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn version(&self) -> Version {
|
||||||
|
Version::V6
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn date(&self) -> Option<OffsetDateTime> {
|
||||||
|
Some(self.metadata.dump_date)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn instance_uid(&self) -> Result<Option<Uuid>> {
|
||||||
|
Ok(Some(self.instance_uid))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<V6IndexReader>> + '_>> {
|
||||||
|
let entries = fs::read_dir(self.dump.path().join("indexes"))?;
|
||||||
|
Ok(Box::new(
|
||||||
|
entries
|
||||||
|
.map(|entry| -> Result<Option<_>> {
|
||||||
|
let entry = entry?;
|
||||||
|
if entry.file_type()?.is_dir() {
|
||||||
|
let index = V6IndexReader::new(
|
||||||
|
entry
|
||||||
|
.file_name()
|
||||||
|
.to_str()
|
||||||
|
.ok_or(Error::BadIndexName)?
|
||||||
|
.to_string(),
|
||||||
|
&entry.path(),
|
||||||
|
)?;
|
||||||
|
Ok(Some(index))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter_map(|entry| entry.transpose()),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tasks(
|
||||||
|
&mut self,
|
||||||
|
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
|
||||||
|
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||||
|
let task: Task = serde_json::from_str(&line?)?;
|
||||||
|
|
||||||
|
let update_file_path = self
|
||||||
|
.dump
|
||||||
|
.path()
|
||||||
|
.join("tasks")
|
||||||
|
.join("update_files")
|
||||||
|
.join(format!("{}.jsonl", task.uid.to_string()));
|
||||||
|
|
||||||
|
if update_file_path.exists() {
|
||||||
|
Ok((
|
||||||
|
task,
|
||||||
|
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok((task, None))
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
|
||||||
|
Box::new(
|
||||||
|
(&mut self.keys)
|
||||||
|
.lines()
|
||||||
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct UpdateFile {
|
||||||
|
reader: BufReader<File>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UpdateFile {
|
||||||
|
fn new(path: &Path) -> Result<Self> {
|
||||||
|
Ok(UpdateFile {
|
||||||
|
reader: BufReader::new(File::open(path)?),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for UpdateFile {
|
||||||
|
type Item = Result<Document>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
(&mut self.reader)
|
||||||
|
.lines()
|
||||||
|
.map(|line| {
|
||||||
|
line.map_err(Error::from)
|
||||||
|
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
|
||||||
|
})
|
||||||
|
.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct V6IndexReader {
|
||||||
|
metadata: IndexMetadata,
|
||||||
|
documents: BufReader<File>,
|
||||||
|
settings: BufReader<File>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl V6IndexReader {
|
||||||
|
pub fn new(_name: String, path: &Path) -> Result<Self> {
|
||||||
|
let metadata = File::open(path.join("metadata.json"))?;
|
||||||
|
|
||||||
|
let ret = V6IndexReader {
|
||||||
|
metadata: serde_json::from_reader(metadata)?,
|
||||||
|
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||||
|
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn metadata(&self) -> &IndexMetadata {
|
||||||
|
&self.metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
|
||||||
|
Ok((&mut self.documents)
|
||||||
|
.lines()
|
||||||
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||||
|
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||||
|
Ok(settings.check())
|
||||||
|
}
|
||||||
|
}
|
81
dump/src/reader/v6/tasks.rs
Normal file
81
dump/src/reader/v6/tasks.rs
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
use meilisearch_types::{
|
||||||
|
error::ResponseError,
|
||||||
|
milli::update::IndexDocumentsMethod,
|
||||||
|
settings::Unchecked,
|
||||||
|
tasks::{Details, Status, TaskId},
|
||||||
|
};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct TaskDump {
|
||||||
|
pub uid: TaskId,
|
||||||
|
#[serde(default)]
|
||||||
|
pub index_uid: Option<String>,
|
||||||
|
pub status: Status,
|
||||||
|
// TODO use our own Kind for the user
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
pub kind: KindDump,
|
||||||
|
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub details: Option<Details>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub error: Option<ResponseError>,
|
||||||
|
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
|
pub enqueued_at: OffsetDateTime,
|
||||||
|
#[serde(
|
||||||
|
with = "time::serde::rfc3339::option",
|
||||||
|
skip_serializing_if = "Option::is_none",
|
||||||
|
default
|
||||||
|
)]
|
||||||
|
pub started_at: Option<OffsetDateTime>,
|
||||||
|
#[serde(
|
||||||
|
with = "time::serde::rfc3339::option",
|
||||||
|
skip_serializing_if = "Option::is_none",
|
||||||
|
default
|
||||||
|
)]
|
||||||
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub enum KindDump {
|
||||||
|
DocumentImport {
|
||||||
|
primary_key: Option<String>,
|
||||||
|
method: IndexDocumentsMethod,
|
||||||
|
documents_count: u64,
|
||||||
|
allow_index_creation: bool,
|
||||||
|
},
|
||||||
|
DocumentDeletion {
|
||||||
|
documents_ids: Vec<String>,
|
||||||
|
},
|
||||||
|
DocumentClear,
|
||||||
|
Settings {
|
||||||
|
settings: meilisearch_types::settings::Settings<Unchecked>,
|
||||||
|
is_deletion: bool,
|
||||||
|
allow_index_creation: bool,
|
||||||
|
},
|
||||||
|
IndexDeletion,
|
||||||
|
IndexCreation {
|
||||||
|
primary_key: Option<String>,
|
||||||
|
},
|
||||||
|
IndexUpdate {
|
||||||
|
primary_key: Option<String>,
|
||||||
|
},
|
||||||
|
IndexSwap {
|
||||||
|
lhs: String,
|
||||||
|
rhs: String,
|
||||||
|
},
|
||||||
|
CancelTask {
|
||||||
|
tasks: Vec<TaskId>,
|
||||||
|
},
|
||||||
|
DeleteTasks {
|
||||||
|
query: String,
|
||||||
|
tasks: Vec<TaskId>,
|
||||||
|
},
|
||||||
|
DumpExport,
|
||||||
|
Snapshot,
|
||||||
|
}
|
@ -22,13 +22,15 @@ pub struct DumpWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl DumpWriter {
|
impl DumpWriter {
|
||||||
pub fn new(instance_uuid: Uuid) -> Result<DumpWriter> {
|
pub fn new(instance_uuid: Option<Uuid>) -> Result<DumpWriter> {
|
||||||
let dir = TempDir::new()?;
|
let dir = TempDir::new()?;
|
||||||
|
|
||||||
fs::write(
|
if let Some(instance_uuid) = instance_uuid {
|
||||||
dir.path().join("instance_uid.uuid"),
|
fs::write(
|
||||||
&instance_uuid.as_hyphenated().to_string(),
|
dir.path().join("instance_uid.uuid"),
|
||||||
)?;
|
&instance_uuid.as_hyphenated().to_string(),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
let metadata = Metadata {
|
let metadata = Metadata {
|
||||||
dump_version: CURRENT_DUMP_VERSION,
|
dump_version: CURRENT_DUMP_VERSION,
|
||||||
@ -133,7 +135,6 @@ impl UpdateFile {
|
|||||||
writer.write_all(b"\n")?;
|
writer.write_all(b"\n")?;
|
||||||
writer.flush()?;
|
writer.flush()?;
|
||||||
} else {
|
} else {
|
||||||
dbg!(&self.path);
|
|
||||||
let file = File::create(&self.path).unwrap();
|
let file = File::create(&self.path).unwrap();
|
||||||
self.writer = Some(BufWriter::new(file));
|
self.writer = Some(BufWriter::new(file));
|
||||||
self.push_document(document)?;
|
self.push_document(document)?;
|
||||||
|
@ -13,6 +13,7 @@ file-store = { path = "../file-store" }
|
|||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
roaring = { version = "0.10.0", features = ["serde"] }
|
roaring = { version = "0.10.0", features = ["serde"] }
|
||||||
|
dump = { path = "../dump" }
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
serde = { version = "1.0.136", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||||
tempfile = "3.3.0"
|
tempfile = "3.3.0"
|
||||||
|
@ -1,7 +1,11 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::BufWriter;
|
||||||
|
|
||||||
use crate::{autobatcher::BatchKind, Error, IndexScheduler, Result, TaskId};
|
use crate::{autobatcher::BatchKind, Error, IndexScheduler, Result, TaskId};
|
||||||
|
|
||||||
|
use dump::IndexMetadata;
|
||||||
|
use meilisearch_types::milli::documents::obkv_to_object;
|
||||||
use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task};
|
||||||
|
|
||||||
use log::{debug, info};
|
use log::{debug, info};
|
||||||
@ -25,7 +29,7 @@ pub(crate) enum Batch {
|
|||||||
Cancel(Task),
|
Cancel(Task),
|
||||||
TaskDeletion(Task),
|
TaskDeletion(Task),
|
||||||
Snapshot(Vec<Task>),
|
Snapshot(Vec<Task>),
|
||||||
Dump(Vec<Task>),
|
Dump(Task),
|
||||||
IndexOperation(IndexOperation),
|
IndexOperation(IndexOperation),
|
||||||
IndexCreation {
|
IndexCreation {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
@ -100,9 +104,10 @@ impl Batch {
|
|||||||
match self {
|
match self {
|
||||||
Batch::Cancel(task)
|
Batch::Cancel(task)
|
||||||
| Batch::TaskDeletion(task)
|
| Batch::TaskDeletion(task)
|
||||||
|
| Batch::Dump(task)
|
||||||
| Batch::IndexCreation { task, .. }
|
| Batch::IndexCreation { task, .. }
|
||||||
| Batch::IndexUpdate { task, .. } => vec![task.uid],
|
| Batch::IndexUpdate { task, .. } => vec![task.uid],
|
||||||
Batch::Snapshot(tasks) | Batch::Dump(tasks) | Batch::IndexDeletion { tasks, .. } => {
|
Batch::Snapshot(tasks) | Batch::IndexDeletion { tasks, .. } => {
|
||||||
tasks.iter().map(|task| task.uid).collect()
|
tasks.iter().map(|task| task.uid).collect()
|
||||||
}
|
}
|
||||||
Batch::IndexOperation(operation) => match operation {
|
Batch::IndexOperation(operation) => match operation {
|
||||||
@ -402,8 +407,11 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
// 4. we batch the dumps.
|
// 4. we batch the dumps.
|
||||||
let to_dump = self.get_kind(rtxn, Kind::DumpExport)? & enqueued;
|
let to_dump = self.get_kind(rtxn, Kind::DumpExport)? & enqueued;
|
||||||
if !to_dump.is_empty() {
|
if let Some(to_dump) = to_dump.min() {
|
||||||
return Ok(Some(Batch::Dump(self.get_existing_tasks(rtxn, to_dump)?)));
|
return Ok(Some(Batch::Dump(
|
||||||
|
self.get_task(rtxn, to_dump)?
|
||||||
|
.ok_or(Error::CorruptedTaskQueue)?,
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. We take the next task and try to batch all the tasks associated with this index.
|
// 5. We take the next task and try to batch all the tasks associated with this index.
|
||||||
@ -477,7 +485,80 @@ impl IndexScheduler {
|
|||||||
Ok(vec![task])
|
Ok(vec![task])
|
||||||
}
|
}
|
||||||
Batch::Snapshot(_) => todo!(),
|
Batch::Snapshot(_) => todo!(),
|
||||||
Batch::Dump(_) => todo!(),
|
Batch::Dump(mut task) => {
|
||||||
|
let KindWithContent::DumpExport { keys, instance_uid, dump_uid } = &task.kind else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
let dump = dump::DumpWriter::new(instance_uid.clone())?;
|
||||||
|
let mut d_keys = dump.create_keys()?;
|
||||||
|
|
||||||
|
// 1. dump the keys
|
||||||
|
for key in keys {
|
||||||
|
d_keys.push_key(key)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let rtxn = self.env.read_txn()?;
|
||||||
|
|
||||||
|
// 2. dump the tasks
|
||||||
|
let mut tasks = dump.create_tasks_queue()?;
|
||||||
|
for ret in self.all_tasks.iter(&rtxn)? {
|
||||||
|
let (_, task) = ret?;
|
||||||
|
let mut dump_content_file = tasks.push_task(&task)?;
|
||||||
|
|
||||||
|
// 2.1. Dump the `content_file` associated with the task if there is one.
|
||||||
|
if let Some(content_file) = task.content_uuid() {
|
||||||
|
let content_file = self.file_store.get_update(*content_file)?;
|
||||||
|
|
||||||
|
let reader = DocumentsBatchReader::from_reader(content_file)
|
||||||
|
.map_err(milli::Error::from)?;
|
||||||
|
|
||||||
|
let (mut cursor, documents_batch_index) =
|
||||||
|
reader.into_cursor_and_fields_index();
|
||||||
|
|
||||||
|
while let Some(doc) = cursor.next_document().map_err(milli::Error::from)? {
|
||||||
|
dump_content_file
|
||||||
|
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: maybe `self.indexes` could use this rtxn instead of creating its own
|
||||||
|
drop(rtxn);
|
||||||
|
|
||||||
|
// 3. Dump the indexes
|
||||||
|
for (uid, index) in self.indexes()? {
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
let metadata = IndexMetadata {
|
||||||
|
uid: uid.clone(),
|
||||||
|
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||||
|
created_at: index.created_at(&rtxn)?,
|
||||||
|
updated_at: index.updated_at(&rtxn)?,
|
||||||
|
};
|
||||||
|
let mut index_dumper = dump.create_index(&uid, &metadata)?;
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
|
||||||
|
// 3.1. Dump the documents
|
||||||
|
for ret in index.all_documents(&rtxn)? {
|
||||||
|
let (_id, doc) = ret?;
|
||||||
|
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||||
|
index_dumper.push_document(&document)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3.2. Dump the settings
|
||||||
|
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
|
||||||
|
index_dumper.settings(&settings)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
||||||
|
let file = File::create(path).unwrap();
|
||||||
|
dump.persist_to(BufWriter::new(file)).unwrap();
|
||||||
|
|
||||||
|
task.status = Status::Succeeded;
|
||||||
|
|
||||||
|
Ok(vec![task])
|
||||||
|
}
|
||||||
Batch::IndexOperation(operation) => {
|
Batch::IndexOperation(operation) => {
|
||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
let index = match operation {
|
let index = match operation {
|
||||||
@ -679,14 +760,14 @@ impl IndexScheduler {
|
|||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
task.details = Some(Details::DocumentAddition {
|
task.details = Some(Details::DocumentAddition {
|
||||||
received_documents: number_of_documents,
|
received_documents: number_of_documents,
|
||||||
indexed_documents,
|
indexed_documents: Some(indexed_documents),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
task.status = Status::Failed;
|
task.status = Status::Failed;
|
||||||
task.details = Some(Details::DocumentAddition {
|
task.details = Some(Details::DocumentAddition {
|
||||||
received_documents: count,
|
received_documents: count,
|
||||||
indexed_documents: count,
|
indexed_documents: Some(count),
|
||||||
});
|
});
|
||||||
task.error = Some(error.into())
|
task.error = Some(error.into())
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,8 @@ pub enum Error {
|
|||||||
#[error("`{0}` is not a type. Available types are")]
|
#[error("`{0}` is not a type. Available types are")]
|
||||||
InvalidKind(String),
|
InvalidKind(String),
|
||||||
|
|
||||||
|
#[error(transparent)]
|
||||||
|
Dump(#[from] dump::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Heed(#[from] heed::Error),
|
Heed(#[from] heed::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@ -48,8 +50,9 @@ impl ErrorCode for Error {
|
|||||||
Error::InvalidKind(_) => Code::BadRequest,
|
Error::InvalidKind(_) => Code::BadRequest,
|
||||||
|
|
||||||
// TODO: TAMO: are all these errors really internal?
|
// TODO: TAMO: are all these errors really internal?
|
||||||
|
Error::Dump(e) => e.error_code(),
|
||||||
|
Error::Milli(e) => e.error_code(),
|
||||||
Error::Heed(_) => Code::Internal,
|
Error::Heed(_) => Code::Internal,
|
||||||
Error::Milli(_) => Code::Internal,
|
|
||||||
Error::FileStore(_) => Code::Internal,
|
Error::FileStore(_) => Code::Internal,
|
||||||
Error::IoError(_) => Code::Internal,
|
Error::IoError(_) => Code::Internal,
|
||||||
Error::Anyhow(_) => Code::Internal,
|
Error::Anyhow(_) => Code::Internal,
|
||||||
|
@ -154,6 +154,9 @@ pub struct IndexScheduler {
|
|||||||
/// Weither autobatching is enabled or not.
|
/// Weither autobatching is enabled or not.
|
||||||
pub(crate) autobatching_enabled: bool,
|
pub(crate) autobatching_enabled: bool,
|
||||||
|
|
||||||
|
/// The path used to create the dumps.
|
||||||
|
pub(crate) dumps_path: PathBuf,
|
||||||
|
|
||||||
// ================= test
|
// ================= test
|
||||||
/// The next entry is dedicated to the tests.
|
/// The next entry is dedicated to the tests.
|
||||||
/// It provide a way to break in multiple part of the scheduler.
|
/// It provide a way to break in multiple part of the scheduler.
|
||||||
@ -175,6 +178,7 @@ impl IndexScheduler {
|
|||||||
tasks_path: PathBuf,
|
tasks_path: PathBuf,
|
||||||
update_file_path: PathBuf,
|
update_file_path: PathBuf,
|
||||||
indexes_path: PathBuf,
|
indexes_path: PathBuf,
|
||||||
|
dumps_path: PathBuf,
|
||||||
index_size: usize,
|
index_size: usize,
|
||||||
indexer_config: IndexerConfig,
|
indexer_config: IndexerConfig,
|
||||||
autobatching_enabled: bool,
|
autobatching_enabled: bool,
|
||||||
@ -183,6 +187,7 @@ impl IndexScheduler {
|
|||||||
std::fs::create_dir_all(&tasks_path)?;
|
std::fs::create_dir_all(&tasks_path)?;
|
||||||
std::fs::create_dir_all(&update_file_path)?;
|
std::fs::create_dir_all(&update_file_path)?;
|
||||||
std::fs::create_dir_all(&indexes_path)?;
|
std::fs::create_dir_all(&indexes_path)?;
|
||||||
|
std::fs::create_dir_all(&dumps_path)?;
|
||||||
|
|
||||||
let mut options = heed::EnvOpenOptions::new();
|
let mut options = heed::EnvOpenOptions::new();
|
||||||
options.max_dbs(6);
|
options.max_dbs(6);
|
||||||
@ -205,6 +210,7 @@ impl IndexScheduler {
|
|||||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||||
autobatching_enabled,
|
autobatching_enabled,
|
||||||
|
dumps_path,
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
test_breakpoint_sdr,
|
test_breakpoint_sdr,
|
||||||
@ -227,6 +233,7 @@ impl IndexScheduler {
|
|||||||
index_mapper: self.index_mapper.clone(),
|
index_mapper: self.index_mapper.clone(),
|
||||||
wake_up: self.wake_up.clone(),
|
wake_up: self.wake_up.clone(),
|
||||||
autobatching_enabled: self.autobatching_enabled,
|
autobatching_enabled: self.autobatching_enabled,
|
||||||
|
dumps_path: self.dumps_path.clone(),
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||||
@ -342,7 +349,7 @@ impl IndexScheduler {
|
|||||||
started_at: None,
|
started_at: None,
|
||||||
finished_at: None,
|
finished_at: None,
|
||||||
error: None,
|
error: None,
|
||||||
details: task.default_details(),
|
details: (&task).into(),
|
||||||
status: Status::Enqueued,
|
status: Status::Enqueued,
|
||||||
kind: task,
|
kind: task,
|
||||||
};
|
};
|
||||||
@ -367,9 +374,9 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
match wtxn.commit() {
|
match wtxn.commit() {
|
||||||
Ok(()) => (),
|
Ok(()) => (),
|
||||||
e @ Err(_) => {
|
_e @ Err(_) => {
|
||||||
todo!("remove the data associated with the task");
|
todo!("remove the data associated with the task");
|
||||||
e?;
|
// _e?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -436,6 +443,7 @@ impl IndexScheduler {
|
|||||||
// TODO the info field should've been set by the process_batch function
|
// TODO the info field should've been set by the process_batch function
|
||||||
self.update_task(&mut wtxn, &task)?;
|
self.update_task(&mut wtxn, &task)?;
|
||||||
}
|
}
|
||||||
|
log::info!("A batch of tasks was successfully completed.");
|
||||||
}
|
}
|
||||||
// In case of a failure we must get back and patch all the tasks with the error.
|
// In case of a failure we must get back and patch all the tasks with the error.
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@ -453,7 +461,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
*self.processing_tasks.write().unwrap() = (finished_at, RoaringBitmap::new());
|
*self.processing_tasks.write().unwrap() = (finished_at, RoaringBitmap::new());
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
log::info!("A batch of tasks was successfully completed.");
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.test_breakpoint_sdr
|
self.test_breakpoint_sdr
|
||||||
@ -542,6 +549,7 @@ mod tests {
|
|||||||
tempdir.path().join("db_path"),
|
tempdir.path().join("db_path"),
|
||||||
tempdir.path().join("file_store"),
|
tempdir.path().join("file_store"),
|
||||||
tempdir.path().join("indexes"),
|
tempdir.path().join("indexes"),
|
||||||
|
tempdir.path().join("dumps"),
|
||||||
1024 * 1024,
|
1024 * 1024,
|
||||||
IndexerConfig::default(),
|
IndexerConfig::default(),
|
||||||
autobatching, // enable autobatching
|
autobatching, // enable autobatching
|
||||||
|
@ -1,16 +1,19 @@
|
|||||||
use std::{any::Any, sync::Arc};
|
use std::{any::Any, sync::Arc};
|
||||||
|
|
||||||
use actix_web::HttpRequest;
|
use actix_web::HttpRequest;
|
||||||
|
use meilisearch_types::InstanceUid;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt};
|
use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt};
|
||||||
|
|
||||||
use super::{find_user_id, Analytics};
|
use super::{find_user_id, Analytics};
|
||||||
|
|
||||||
pub struct MockAnalytics;
|
pub struct MockAnalytics {
|
||||||
|
instance_uid: Option<InstanceUid>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct SearchAggregator {}
|
pub struct SearchAggregator;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
impl SearchAggregator {
|
impl SearchAggregator {
|
||||||
@ -23,13 +26,17 @@ impl SearchAggregator {
|
|||||||
|
|
||||||
impl MockAnalytics {
|
impl MockAnalytics {
|
||||||
#[allow(clippy::new_ret_no_self)]
|
#[allow(clippy::new_ret_no_self)]
|
||||||
pub fn new(opt: &Opt) -> (Arc<dyn Analytics>, String) {
|
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||||
let user = find_user_id(&opt.db_path).unwrap_or_default();
|
let instance_uid = find_user_id(&opt.db_path);
|
||||||
(Arc::new(Self), user)
|
Arc::new(Self { instance_uid })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Analytics for MockAnalytics {
|
impl Analytics for MockAnalytics {
|
||||||
|
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
|
||||||
|
self.instance_uid.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
// These methods are noop and should be optimized out
|
// These methods are noop and should be optimized out
|
||||||
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||||
|
@ -5,8 +5,10 @@ mod segment_analytics;
|
|||||||
|
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
use actix_web::HttpRequest;
|
use actix_web::HttpRequest;
|
||||||
|
use meilisearch_types::InstanceUid;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use platform_dirs::AppDirs;
|
use platform_dirs::AppDirs;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
@ -51,13 +53,16 @@ fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
|
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
|
||||||
fn find_user_id(db_path: &Path) -> Option<String> {
|
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
|
||||||
fs::read_to_string(db_path.join("instance-uid"))
|
fs::read_to_string(db_path.join("instance-uid"))
|
||||||
.ok()
|
.ok()
|
||||||
.or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok())
|
.or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok())
|
||||||
|
.and_then(|uid| InstanceUid::from_str(&uid).ok())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Analytics: Sync + Send {
|
pub trait Analytics: Sync + Send {
|
||||||
|
fn instance_uid(&self) -> Option<&InstanceUid>;
|
||||||
|
|
||||||
/// The method used to publish most analytics that do not need to be batched every hours
|
/// The method used to publish most analytics that do not need to be batched every hours
|
||||||
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
|
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
|
|||||||
opt.db_path.join("tasks"),
|
opt.db_path.join("tasks"),
|
||||||
opt.db_path.join("update_files"),
|
opt.db_path.join("update_files"),
|
||||||
opt.db_path.join("indexes"),
|
opt.db_path.join("indexes"),
|
||||||
|
opt.dumps_dir.clone(),
|
||||||
opt.max_index_size.get_bytes() as usize,
|
opt.max_index_size.get_bytes() as usize,
|
||||||
(&opt.indexer_options).try_into()?,
|
(&opt.indexer_options).try_into()?,
|
||||||
true,
|
true,
|
||||||
|
@ -53,15 +53,15 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?;
|
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?;
|
||||||
|
|
||||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
let (analytics, user) = if !opt.no_analytics {
|
let analytics = if !opt.no_analytics {
|
||||||
analytics::SegmentAnalytics::new(&opt, &meilisearch).await
|
analytics::SegmentAnalytics::new(&opt, &meilisearch).await
|
||||||
} else {
|
} else {
|
||||||
analytics::MockAnalytics::new(&opt)
|
analytics::MockAnalytics::new(&opt)
|
||||||
};
|
};
|
||||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||||
let (analytics, user) = analytics::MockAnalytics::new(&opt);
|
let analytics = analytics::MockAnalytics::new(&opt);
|
||||||
|
|
||||||
print_launch_resume(&opt, &user, config_read_from);
|
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||||
|
|
||||||
run_http(index_scheduler, auth_controller, opt, analytics).await?;
|
run_http(index_scheduler, auth_controller, opt, analytics).await?;
|
||||||
|
|
||||||
@ -133,7 +133,7 @@ async fn run_http(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option<PathBuf>) {
|
pub fn print_launch_resume(opt: &Opt, analytics: Arc<dyn Analytics>, config_read_from: Option<PathBuf>) {
|
||||||
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
||||||
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
||||||
let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() {
|
let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() {
|
||||||
@ -186,8 +186,8 @@ Anonymous telemetry:\t\"Enabled\""
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !user.is_empty() {
|
if let Some(instance_uid) = analytics.instance_uid() {
|
||||||
eprintln!("Instance UID:\t\t\"{}\"", user);
|
eprintln!("Instance UID:\t\t\"{}\"", instance_uid.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!();
|
eprintln!();
|
||||||
|
@ -2,13 +2,17 @@ use actix_web::web::Data;
|
|||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
use meilisearch_auth::AuthController;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
use time::macros::format_description;
|
||||||
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::Analytics;
|
||||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::routes::SummarizedTaskView;
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
||||||
@ -16,16 +20,28 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
|
|
||||||
pub async fn create_dump(
|
pub async fn create_dump(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||||
|
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<dyn Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
|
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
|
||||||
|
|
||||||
let task = KindWithContent::DumpExport {
|
let dump_uid = OffsetDateTime::now_utc()
|
||||||
output: "todo".to_string().into(),
|
.format(format_description!(
|
||||||
};
|
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||||
let res = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
debug!("returns: {:?}", res);
|
let task = KindWithContent::DumpExport {
|
||||||
Ok(HttpResponse::Accepted().json(res))
|
keys: auth_controller.list_keys()?,
|
||||||
|
instance_uid: analytics.instance_uid().cloned(),
|
||||||
|
dump_uid,
|
||||||
|
};
|
||||||
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
|
debug!("returns: {:?}", task);
|
||||||
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
@ -109,7 +109,10 @@ pub async fn delete_document(
|
|||||||
index_uid,
|
index_uid,
|
||||||
documents_ids: vec![document_id],
|
documents_ids: vec![document_id],
|
||||||
};
|
};
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
@ -314,7 +317,10 @@ pub async fn delete_documents(
|
|||||||
index_uid: path.into_inner(),
|
index_uid: path.into_inner(),
|
||||||
documents_ids: ids,
|
documents_ids: ids,
|
||||||
};
|
};
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
@ -327,7 +333,10 @@ pub async fn clear_all_documents(
|
|||||||
let task = KindWithContent::DocumentClear {
|
let task = KindWithContent::DocumentClear {
|
||||||
index_uid: path.into_inner(),
|
index_uid: path.into_inner(),
|
||||||
};
|
};
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
|
@ -13,7 +13,7 @@ use crate::analytics::Analytics;
|
|||||||
use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
|
||||||
use super::Pagination;
|
use super::{Pagination, SummarizedTaskView};
|
||||||
|
|
||||||
pub mod documents;
|
pub mod documents;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
@ -108,7 +108,10 @@ pub async fn create_index(
|
|||||||
index_uid: uid,
|
index_uid: uid,
|
||||||
primary_key,
|
primary_key,
|
||||||
};
|
};
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
} else {
|
} else {
|
||||||
@ -156,7 +159,10 @@ pub async fn update_index(
|
|||||||
primary_key: body.primary_key,
|
primary_key: body.primary_key,
|
||||||
};
|
};
|
||||||
|
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
@ -169,7 +175,10 @@ pub async fn delete_index(
|
|||||||
let task = KindWithContent::IndexDeletion {
|
let task = KindWithContent::IndexDeletion {
|
||||||
index_uid: index_uid.into_inner(),
|
index_uid: index_uid.into_inner(),
|
||||||
};
|
};
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
@ -1,27 +1,16 @@
|
|||||||
use std::collections::BTreeSet;
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
|
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
use fst::IntoStreamer;
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
use actix_web::{web, HttpRequest, HttpResponse};
|
use actix_web::{web, HttpRequest, HttpResponse};
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::settings::{settings, Settings, Unchecked};
|
||||||
use meilisearch_types::milli::update::Setting;
|
|
||||||
use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET};
|
|
||||||
use meilisearch_types::settings::{
|
|
||||||
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
|
|
||||||
Unchecked,
|
|
||||||
};
|
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
use meilisearch_types::Index;
|
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
use crate::analytics::Analytics;
|
||||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||||
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use crate::routes::SummarizedTaskView;
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! make_setting_route {
|
macro_rules! make_setting_route {
|
||||||
@ -33,14 +22,14 @@ macro_rules! make_setting_route {
|
|||||||
|
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_types::milli::update::Setting;
|
use meilisearch_types::milli::update::Setting;
|
||||||
use meilisearch_types::settings::Settings;
|
use meilisearch_types::settings::{settings, Settings};
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
|
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use $crate::analytics::Analytics;
|
use $crate::analytics::Analytics;
|
||||||
use $crate::extractors::authentication::{policies::*, GuardedData};
|
use $crate::extractors::authentication::{policies::*, GuardedData};
|
||||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use $crate::routes::indexes::settings::settings;
|
use $crate::routes::SummarizedTaskView;
|
||||||
|
|
||||||
pub async fn delete(
|
pub async fn delete(
|
||||||
index_scheduler: GuardedData<
|
index_scheduler: GuardedData<
|
||||||
@ -61,8 +50,10 @@ macro_rules! make_setting_route {
|
|||||||
is_deletion: true,
|
is_deletion: true,
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
};
|
};
|
||||||
let task =
|
let task: SummarizedTaskView =
|
||||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
@ -97,8 +88,10 @@ macro_rules! make_setting_route {
|
|||||||
is_deletion: false,
|
is_deletion: false,
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
};
|
};
|
||||||
let task =
|
let task: SummarizedTaskView =
|
||||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
@ -459,7 +452,10 @@ pub async fn update_all(
|
|||||||
is_deletion: false,
|
is_deletion: false,
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
};
|
};
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
@ -489,113 +485,11 @@ pub async fn delete_all(
|
|||||||
is_deletion: true,
|
is_deletion: true,
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
};
|
};
|
||||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> {
|
|
||||||
let displayed_attributes = index
|
|
||||||
.displayed_fields(rtxn)?
|
|
||||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
|
||||||
|
|
||||||
let searchable_attributes = index
|
|
||||||
.user_defined_searchable_fields(rtxn)?
|
|
||||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
|
||||||
|
|
||||||
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
|
||||||
|
|
||||||
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
|
||||||
|
|
||||||
let criteria = index
|
|
||||||
.criteria(rtxn)?
|
|
||||||
.into_iter()
|
|
||||||
.map(|c| c.to_string())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let stop_words = index
|
|
||||||
.stop_words(rtxn)?
|
|
||||||
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
|
|
||||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
|
||||||
})
|
|
||||||
.transpose()?
|
|
||||||
.unwrap_or_default();
|
|
||||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
|
||||||
|
|
||||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
|
||||||
// this information we are going to put space between words.
|
|
||||||
let synonyms = index
|
|
||||||
.synonyms(rtxn)?
|
|
||||||
.iter()
|
|
||||||
.map(|(key, values)| {
|
|
||||||
(
|
|
||||||
key.join(" "),
|
|
||||||
values.iter().map(|value| value.join(" ")).collect(),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
|
||||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
|
||||||
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
|
|
||||||
};
|
|
||||||
|
|
||||||
let disabled_words = match index.exact_words(rtxn)? {
|
|
||||||
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
|
||||||
None => BTreeSet::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let disabled_attributes = index
|
|
||||||
.exact_attributes(rtxn)?
|
|
||||||
.into_iter()
|
|
||||||
.map(String::from)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let typo_tolerance = TypoSettings {
|
|
||||||
enabled: Setting::Set(index.authorize_typos(rtxn)?),
|
|
||||||
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
|
||||||
disable_on_words: Setting::Set(disabled_words),
|
|
||||||
disable_on_attributes: Setting::Set(disabled_attributes),
|
|
||||||
};
|
|
||||||
|
|
||||||
let faceting = FacetingSettings {
|
|
||||||
max_values_per_facet: Setting::Set(
|
|
||||||
index
|
|
||||||
.max_values_per_facet(rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
let pagination = PaginationSettings {
|
|
||||||
max_total_hits: Setting::Set(
|
|
||||||
index
|
|
||||||
.pagination_max_total_hits(rtxn)?
|
|
||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(Settings {
|
|
||||||
displayed_attributes: match displayed_attributes {
|
|
||||||
Some(attrs) => Setting::Set(attrs),
|
|
||||||
None => Setting::Reset,
|
|
||||||
},
|
|
||||||
searchable_attributes: match searchable_attributes {
|
|
||||||
Some(attrs) => Setting::Set(attrs),
|
|
||||||
None => Setting::Reset,
|
|
||||||
},
|
|
||||||
filterable_attributes: Setting::Set(filterable_attributes),
|
|
||||||
sortable_attributes: Setting::Set(sortable_attributes),
|
|
||||||
ranking_rules: Setting::Set(criteria),
|
|
||||||
stop_words: Setting::Set(stop_words),
|
|
||||||
distinct_attribute: match distinct_field {
|
|
||||||
Some(field) => Setting::Set(field),
|
|
||||||
None => Setting::Reset,
|
|
||||||
},
|
|
||||||
synonyms: Setting::Set(synonyms),
|
|
||||||
typo_tolerance: Setting::Set(typo_tolerance),
|
|
||||||
faceting: Setting::Set(faceting),
|
|
||||||
pagination: Setting::Set(pagination),
|
|
||||||
_kind: PhantomData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
@ -121,7 +121,7 @@ impl From<Details> for DetailsView {
|
|||||||
indexed_documents,
|
indexed_documents,
|
||||||
} => DetailsView {
|
} => DetailsView {
|
||||||
received_documents: Some(received_documents),
|
received_documents: Some(received_documents),
|
||||||
indexed_documents: Some(indexed_documents),
|
indexed_documents,
|
||||||
..DetailsView::default()
|
..DetailsView::default()
|
||||||
},
|
},
|
||||||
Details::Settings { settings } => DetailsView {
|
Details::Settings { settings } => DetailsView {
|
||||||
|
@ -4,6 +4,7 @@ use std::str::FromStr;
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
use meilisearch_types::{milli, Document};
|
use meilisearch_types::{milli, Document};
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
use milli::tokenizer::TokenizerBuilder;
|
||||||
use milli::{
|
use milli::{
|
||||||
@ -24,10 +25,6 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
|||||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||||
|
|
||||||
/// The maximimum number of results that the engine
|
|
||||||
/// will be able to return in one search call.
|
|
||||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
|
@ -10,6 +10,7 @@ csv = "1.1.6"
|
|||||||
either = { version = "1.6.1", features = ["serde"] }
|
either = { version = "1.6.1", features = ["serde"] }
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", branch = "indexation-abortion", default-features = false }
|
milli = { git = "https://github.com/meilisearch/milli.git", branch = "indexation-abortion", default-features = false }
|
||||||
enum-iterator = "0.7.0"
|
enum-iterator = "0.7.0"
|
||||||
|
fst = "0.4.7"
|
||||||
proptest = { version = "1.0.0", optional = true }
|
proptest = { version = "1.0.0", optional = true }
|
||||||
proptest-derive = { version = "0.3.0", optional = true }
|
proptest-derive = { version = "0.3.0", optional = true }
|
||||||
roaring = { version = "0.10.0", features = ["serde"] }
|
roaring = { version = "0.10.0", features = ["serde"] }
|
||||||
|
@ -9,5 +9,7 @@ pub mod tasks;
|
|||||||
pub use milli;
|
pub use milli;
|
||||||
pub use milli::heed;
|
pub use milli::heed;
|
||||||
pub use milli::Index;
|
pub use milli::Index;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||||
|
pub type InstanceUid = Uuid;
|
||||||
|
@ -2,9 +2,15 @@ use std::collections::{BTreeMap, BTreeSet};
|
|||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
|
use fst::IntoStreamer;
|
||||||
use milli::update::Setting;
|
use milli::update::Setting;
|
||||||
|
use milli::{Index, DEFAULT_VALUES_PER_FACET};
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
|
|
||||||
|
/// The maximimum number of results that the engine
|
||||||
|
/// will be able to return in one search call.
|
||||||
|
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
||||||
|
|
||||||
fn serialize_with_wildcard<S>(
|
fn serialize_with_wildcard<S>(
|
||||||
field: &Setting<Vec<String>>,
|
field: &Setting<Vec<String>>,
|
||||||
s: S,
|
s: S,
|
||||||
@ -366,6 +372,114 @@ pub fn apply_settings_to_builder(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn settings(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &crate::heed::RoTxn,
|
||||||
|
) -> Result<Settings<Checked>, milli::Error> {
|
||||||
|
let displayed_attributes = index
|
||||||
|
.displayed_fields(rtxn)?
|
||||||
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
|
|
||||||
|
let searchable_attributes = index
|
||||||
|
.user_defined_searchable_fields(rtxn)?
|
||||||
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
|
|
||||||
|
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
||||||
|
|
||||||
|
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
||||||
|
|
||||||
|
let criteria = index
|
||||||
|
.criteria(rtxn)?
|
||||||
|
.into_iter()
|
||||||
|
.map(|c| c.to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let stop_words = index
|
||||||
|
.stop_words(rtxn)?
|
||||||
|
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
|
||||||
|
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||||
|
})
|
||||||
|
.transpose()?
|
||||||
|
.unwrap_or_default();
|
||||||
|
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||||
|
|
||||||
|
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||||
|
// this information we are going to put space between words.
|
||||||
|
let synonyms = index
|
||||||
|
.synonyms(rtxn)?
|
||||||
|
.iter()
|
||||||
|
.map(|(key, values)| {
|
||||||
|
(
|
||||||
|
key.join(" "),
|
||||||
|
values.iter().map(|value| value.join(" ")).collect(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||||
|
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||||
|
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
|
||||||
|
};
|
||||||
|
|
||||||
|
let disabled_words = match index.exact_words(rtxn)? {
|
||||||
|
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
||||||
|
None => BTreeSet::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let disabled_attributes = index
|
||||||
|
.exact_attributes(rtxn)?
|
||||||
|
.into_iter()
|
||||||
|
.map(String::from)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let typo_tolerance = TypoSettings {
|
||||||
|
enabled: Setting::Set(index.authorize_typos(rtxn)?),
|
||||||
|
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
||||||
|
disable_on_words: Setting::Set(disabled_words),
|
||||||
|
disable_on_attributes: Setting::Set(disabled_attributes),
|
||||||
|
};
|
||||||
|
|
||||||
|
let faceting = FacetingSettings {
|
||||||
|
max_values_per_facet: Setting::Set(
|
||||||
|
index
|
||||||
|
.max_values_per_facet(rtxn)?
|
||||||
|
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
let pagination = PaginationSettings {
|
||||||
|
max_total_hits: Setting::Set(
|
||||||
|
index
|
||||||
|
.pagination_max_total_hits(rtxn)?
|
||||||
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Settings {
|
||||||
|
displayed_attributes: match displayed_attributes {
|
||||||
|
Some(attrs) => Setting::Set(attrs),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
|
searchable_attributes: match searchable_attributes {
|
||||||
|
Some(attrs) => Setting::Set(attrs),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
|
filterable_attributes: Setting::Set(filterable_attributes),
|
||||||
|
sortable_attributes: Setting::Set(sortable_attributes),
|
||||||
|
ranking_rules: Setting::Set(criteria),
|
||||||
|
stop_words: Setting::Set(stop_words),
|
||||||
|
distinct_attribute: match distinct_field {
|
||||||
|
Some(field) => Setting::Set(field),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
|
synonyms: Setting::Set(synonyms),
|
||||||
|
typo_tolerance: Setting::Set(typo_tolerance),
|
||||||
|
faceting: Setting::Set(faceting),
|
||||||
|
pagination: Setting::Set(pagination),
|
||||||
|
_kind: PhantomData,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod test {
|
pub(crate) mod test {
|
||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
|
@ -3,7 +3,6 @@ use roaring::RoaringBitmap;
|
|||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
use std::{
|
use std::{
|
||||||
fmt::{Display, Write},
|
fmt::{Display, Write},
|
||||||
path::PathBuf,
|
|
||||||
str::FromStr,
|
str::FromStr,
|
||||||
};
|
};
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
@ -11,7 +10,9 @@ use uuid::Uuid;
|
|||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::{Code, ResponseError},
|
error::{Code, ResponseError},
|
||||||
|
keys::Key,
|
||||||
settings::{Settings, Unchecked},
|
settings::{Settings, Unchecked},
|
||||||
|
InstanceUid,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type TaskId = u32;
|
pub type TaskId = u32;
|
||||||
@ -71,6 +72,26 @@ impl Task {
|
|||||||
IndexSwap { lhs, rhs } => Some(vec![lhs, rhs]),
|
IndexSwap { lhs, rhs } => Some(vec![lhs, rhs]),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the content-uuid if there is one
|
||||||
|
pub fn content_uuid(&self) -> Option<&Uuid> {
|
||||||
|
match self.kind {
|
||||||
|
KindWithContent::DocumentImport {
|
||||||
|
ref content_file, ..
|
||||||
|
} => Some(content_file),
|
||||||
|
KindWithContent::DocumentDeletion { .. }
|
||||||
|
| KindWithContent::DocumentClear { .. }
|
||||||
|
| KindWithContent::Settings { .. }
|
||||||
|
| KindWithContent::IndexDeletion { .. }
|
||||||
|
| KindWithContent::IndexCreation { .. }
|
||||||
|
| KindWithContent::IndexUpdate { .. }
|
||||||
|
| KindWithContent::IndexSwap { .. }
|
||||||
|
| KindWithContent::CancelTask { .. }
|
||||||
|
| KindWithContent::DeleteTasks { .. }
|
||||||
|
| KindWithContent::DumpExport { .. }
|
||||||
|
| KindWithContent::Snapshot => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
@ -120,7 +141,9 @@ pub enum KindWithContent {
|
|||||||
tasks: RoaringBitmap,
|
tasks: RoaringBitmap,
|
||||||
},
|
},
|
||||||
DumpExport {
|
DumpExport {
|
||||||
output: PathBuf,
|
dump_uid: String,
|
||||||
|
keys: Vec<Key>,
|
||||||
|
instance_uid: Option<InstanceUid>,
|
||||||
},
|
},
|
||||||
Snapshot,
|
Snapshot,
|
||||||
}
|
}
|
||||||
@ -167,7 +190,7 @@ impl KindWithContent {
|
|||||||
documents_count, ..
|
documents_count, ..
|
||||||
} => Some(Details::DocumentAddition {
|
} => Some(Details::DocumentAddition {
|
||||||
received_documents: *documents_count,
|
received_documents: *documents_count,
|
||||||
indexed_documents: 0,
|
indexed_documents: Some(0),
|
||||||
}),
|
}),
|
||||||
KindWithContent::DocumentDeletion {
|
KindWithContent::DocumentDeletion {
|
||||||
index_uid: _,
|
index_uid: _,
|
||||||
@ -204,6 +227,38 @@ impl KindWithContent {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<&KindWithContent> for Option<Details> {
|
||||||
|
fn from(kind: &KindWithContent) -> Self {
|
||||||
|
match kind {
|
||||||
|
KindWithContent::DocumentImport {
|
||||||
|
documents_count, ..
|
||||||
|
} => Some(Details::DocumentAddition {
|
||||||
|
received_documents: *documents_count,
|
||||||
|
indexed_documents: None,
|
||||||
|
}),
|
||||||
|
KindWithContent::DocumentDeletion { .. } => None,
|
||||||
|
KindWithContent::DocumentClear { .. } => None,
|
||||||
|
KindWithContent::Settings { new_settings, .. } => Some(Details::Settings {
|
||||||
|
settings: new_settings.clone(),
|
||||||
|
}),
|
||||||
|
KindWithContent::IndexDeletion { .. } => None,
|
||||||
|
KindWithContent::IndexCreation { primary_key, .. } => Some(Details::IndexInfo {
|
||||||
|
primary_key: primary_key.clone(),
|
||||||
|
}),
|
||||||
|
KindWithContent::IndexUpdate { primary_key, .. } => Some(Details::IndexInfo {
|
||||||
|
primary_key: primary_key.clone(),
|
||||||
|
}),
|
||||||
|
KindWithContent::IndexSwap { .. } => None,
|
||||||
|
KindWithContent::CancelTask { .. } => None,
|
||||||
|
KindWithContent::DeleteTasks { .. } => todo!(),
|
||||||
|
KindWithContent::DumpExport { dump_uid, .. } => Some(Details::Dump {
|
||||||
|
dump_uid: dump_uid.clone(),
|
||||||
|
}),
|
||||||
|
KindWithContent::Snapshot => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub enum Status {
|
pub enum Status {
|
||||||
@ -289,7 +344,7 @@ impl FromStr for Kind {
|
|||||||
pub enum Details {
|
pub enum Details {
|
||||||
DocumentAddition {
|
DocumentAddition {
|
||||||
received_documents: u64,
|
received_documents: u64,
|
||||||
indexed_documents: u64,
|
indexed_documents: Option<u64>,
|
||||||
},
|
},
|
||||||
Settings {
|
Settings {
|
||||||
settings: Settings<Unchecked>,
|
settings: Settings<Unchecked>,
|
||||||
|
Loading…
Reference in New Issue
Block a user