mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Merge #4655
4655: Remove `exportPuffinReport` experimental feature r=Kerollmops a=Kerollmops This PR fixes #4605 by removing every trace of Puffin. Puffin is a great tool, but we use a better approach to measuring performance. Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
e248d2a1e6
26
Cargo.lock
generated
26
Cargo.lock
generated
@ -2464,7 +2464,6 @@ dependencies = [
|
|||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"page_size 0.5.0",
|
"page_size 0.5.0",
|
||||||
"puffin",
|
|
||||||
"rayon",
|
"rayon",
|
||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
@ -3231,12 +3230,6 @@ version = "0.4.21"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lz4_flex"
|
|
||||||
version = "0.10.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "macro_rules_attribute"
|
name = "macro_rules_attribute"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@ -3341,7 +3334,6 @@ dependencies = [
|
|||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"platform-dirs",
|
"platform-dirs",
|
||||||
"prometheus",
|
"prometheus",
|
||||||
"puffin",
|
|
||||||
"rand",
|
"rand",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
@ -3509,7 +3501,6 @@ dependencies = [
|
|||||||
"obkv",
|
"obkv",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
"puffin",
|
|
||||||
"rand",
|
"rand",
|
||||||
"rand_pcg",
|
"rand_pcg",
|
||||||
"rayon",
|
"rayon",
|
||||||
@ -4180,23 +4171,6 @@ version = "2.28.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "puffin"
|
|
||||||
version = "0.16.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"bincode",
|
|
||||||
"byteorder",
|
|
||||||
"cfg-if",
|
|
||||||
"instant",
|
|
||||||
"lz4_flex",
|
|
||||||
"once_cell",
|
|
||||||
"parking_lot",
|
|
||||||
"serde",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pulp"
|
name = "pulp"
|
||||||
version = "0.18.9"
|
version = "0.18.9"
|
||||||
|
@ -22,7 +22,6 @@ flate2 = "1.0.28"
|
|||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
page_size = "0.5.0"
|
page_size = "0.5.0"
|
||||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
|
||||||
rayon = "1.8.1"
|
rayon = "1.8.1"
|
||||||
roaring = { version = "0.10.2", features = ["serde"] }
|
roaring = { version = "0.10.2", features = ["serde"] }
|
||||||
serde = { version = "1.0.195", features = ["derive"] }
|
serde = { version = "1.0.195", features = ["derive"] }
|
||||||
|
@ -529,8 +529,6 @@ impl IndexScheduler {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||||
|
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
||||||
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||||
|
|
||||||
@ -639,8 +637,6 @@ impl IndexScheduler {
|
|||||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
puffin::profile_function!(batch.to_string());
|
|
||||||
|
|
||||||
match batch {
|
match batch {
|
||||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||||
@ -1226,8 +1222,6 @@ impl IndexScheduler {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
match operation {
|
match operation {
|
||||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||||
|
@ -68,19 +68,6 @@ impl RoFeatures {
|
|||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_puffin(&self) -> Result<()> {
|
|
||||||
if self.runtime.export_puffin_reports {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(FeatureNotEnabledError {
|
|
||||||
disabled_action: "Outputting Puffin reports to disk",
|
|
||||||
feature: "export puffin reports",
|
|
||||||
issue_link: "https://github.com/meilisearch/product/discussions/693",
|
|
||||||
}
|
|
||||||
.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureData {
|
impl FeatureData {
|
||||||
|
@ -32,7 +32,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
|||||||
features: _,
|
features: _,
|
||||||
max_number_of_tasks: _,
|
max_number_of_tasks: _,
|
||||||
max_number_of_batched_tasks: _,
|
max_number_of_batched_tasks: _,
|
||||||
puffin_frame: _,
|
|
||||||
wake_up: _,
|
wake_up: _,
|
||||||
dumps_path: _,
|
dumps_path: _,
|
||||||
snapshots_path: _,
|
snapshots_path: _,
|
||||||
|
@ -33,7 +33,6 @@ pub type Result<T> = std::result::Result<T, Error>;
|
|||||||
pub type TaskId = u32;
|
pub type TaskId = u32;
|
||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::fs::File;
|
|
||||||
use std::io::{self, BufReader, Read};
|
use std::io::{self, BufReader, Read};
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
@ -59,7 +58,6 @@ use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfi
|
|||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
use meilisearch_types::task_view::TaskView;
|
use meilisearch_types::task_view::TaskView;
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||||
use puffin::FrameView;
|
|
||||||
use rayon::current_num_threads;
|
use rayon::current_num_threads;
|
||||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -344,9 +342,6 @@ pub struct IndexScheduler {
|
|||||||
/// The Authorization header to send to the webhook URL.
|
/// The Authorization header to send to the webhook URL.
|
||||||
pub(crate) webhook_authorization_header: Option<String>,
|
pub(crate) webhook_authorization_header: Option<String>,
|
||||||
|
|
||||||
/// A frame to output the indexation profiling files to disk.
|
|
||||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
|
||||||
|
|
||||||
/// The path used to create the dumps.
|
/// The path used to create the dumps.
|
||||||
pub(crate) dumps_path: PathBuf,
|
pub(crate) dumps_path: PathBuf,
|
||||||
|
|
||||||
@ -401,7 +396,6 @@ impl IndexScheduler {
|
|||||||
cleanup_enabled: self.cleanup_enabled,
|
cleanup_enabled: self.cleanup_enabled,
|
||||||
max_number_of_tasks: self.max_number_of_tasks,
|
max_number_of_tasks: self.max_number_of_tasks,
|
||||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||||
puffin_frame: self.puffin_frame.clone(),
|
|
||||||
snapshots_path: self.snapshots_path.clone(),
|
snapshots_path: self.snapshots_path.clone(),
|
||||||
dumps_path: self.dumps_path.clone(),
|
dumps_path: self.dumps_path.clone(),
|
||||||
auth_path: self.auth_path.clone(),
|
auth_path: self.auth_path.clone(),
|
||||||
@ -500,7 +494,6 @@ impl IndexScheduler {
|
|||||||
env,
|
env,
|
||||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
|
||||||
autobatching_enabled: options.autobatching_enabled,
|
autobatching_enabled: options.autobatching_enabled,
|
||||||
cleanup_enabled: options.cleanup_enabled,
|
cleanup_enabled: options.cleanup_enabled,
|
||||||
max_number_of_tasks: options.max_number_of_tasks,
|
max_number_of_tasks: options.max_number_of_tasks,
|
||||||
@ -621,10 +614,6 @@ impl IndexScheduler {
|
|||||||
run.wake_up.wait();
|
run.wake_up.wait();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let puffin_enabled = run.features().check_puffin().is_ok();
|
|
||||||
puffin::set_scopes_on(puffin_enabled);
|
|
||||||
puffin::GlobalProfiler::lock().new_frame();
|
|
||||||
|
|
||||||
match run.tick() {
|
match run.tick() {
|
||||||
Ok(TickOutcome::TickAgain(_)) => (),
|
Ok(TickOutcome::TickAgain(_)) => (),
|
||||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||||
@ -636,31 +625,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Let's write the previous frame to disk but only if
|
|
||||||
// the user wanted to profile with puffin.
|
|
||||||
if puffin_enabled {
|
|
||||||
let mut frame_view = run.puffin_frame.lock();
|
|
||||||
if !frame_view.is_empty() {
|
|
||||||
let now = OffsetDateTime::now_utc();
|
|
||||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
|
||||||
Ok(file) => file,
|
|
||||||
Err(e) => {
|
|
||||||
tracing::error!("{e}");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
|
||||||
tracing::error!("{e}");
|
|
||||||
}
|
|
||||||
if let Err(e) = file.sync_all() {
|
|
||||||
tracing::error!("{e}");
|
|
||||||
}
|
|
||||||
// We erase this frame view as it is no more useful. We want to
|
|
||||||
// measure the new frames now that we exported the previous ones.
|
|
||||||
*frame_view = FrameView::default();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -6,7 +6,6 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub vector_store: bool,
|
pub vector_store: bool,
|
||||||
pub metrics: bool,
|
pub metrics: bool,
|
||||||
pub logs_route: bool,
|
pub logs_route: bool,
|
||||||
pub export_puffin_reports: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
|
@ -67,7 +67,6 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
|||||||
pin-project-lite = "0.2.13"
|
pin-project-lite = "0.2.13"
|
||||||
platform-dirs = "0.3.0"
|
platform-dirs = "0.3.0"
|
||||||
prometheus = { version = "0.13.3", features = ["process"] }
|
prometheus = { version = "0.13.3", features = ["process"] }
|
||||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rayon = "1.8.0"
|
rayon = "1.8.0"
|
||||||
regex = "1.10.2"
|
regex = "1.10.2"
|
||||||
|
@ -47,8 +47,6 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub metrics: Option<bool>,
|
pub metrics: Option<bool>,
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub logs_route: Option<bool>,
|
pub logs_route: Option<bool>,
|
||||||
#[deserr(default)]
|
|
||||||
pub export_puffin_reports: Option<bool>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn patch_features(
|
async fn patch_features(
|
||||||
@ -68,21 +66,13 @@ async fn patch_features(
|
|||||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||||
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
||||||
export_puffin_reports: new_features
|
|
||||||
.0
|
|
||||||
.export_puffin_reports
|
|
||||||
.unwrap_or(old_features.export_puffin_reports),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||||
// the it renames to camelCase, which we don't want for analytics.
|
// the it renames to camelCase, which we don't want for analytics.
|
||||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
|
||||||
vector_store,
|
new_features;
|
||||||
metrics,
|
|
||||||
logs_route,
|
|
||||||
export_puffin_reports,
|
|
||||||
} = new_features;
|
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
"Experimental features Updated".to_string(),
|
"Experimental features Updated".to_string(),
|
||||||
@ -90,7 +80,6 @@ async fn patch_features(
|
|||||||
"vector_store": vector_store,
|
"vector_store": vector_store,
|
||||||
"metrics": metrics,
|
"metrics": metrics,
|
||||||
"logs_route": logs_route,
|
"logs_route": logs_route,
|
||||||
"export_puffin_reports": export_puffin_reports,
|
|
||||||
}),
|
}),
|
||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
@ -1859,8 +1859,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -20,8 +20,7 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -32,8 +31,7 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -44,8 +42,7 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -57,8 +54,7 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -70,8 +66,7 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -90,8 +85,7 @@ async fn experimental_feature_metrics() {
|
|||||||
{
|
{
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": true,
|
"metrics": true,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -146,7 +140,7 @@ async fn errors() {
|
|||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
|
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`",
|
||||||
"code": "bad_request",
|
"code": "bad_request",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||||
|
@ -18,8 +18,7 @@ async fn index_with_documents_user_provided<'a>(
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -47,8 +46,7 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -98,8 +98,7 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false,
|
"logsRoute": false
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -67,9 +67,6 @@ filter-parser = { path = "../filter-parser" }
|
|||||||
# documents words self-join
|
# documents words self-join
|
||||||
itertools = "0.11.0"
|
itertools = "0.11.0"
|
||||||
|
|
||||||
# profiling
|
|
||||||
puffin = "0.16.0"
|
|
||||||
|
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
candle-core = { version = "0.4.1" }
|
candle-core = { version = "0.4.1" }
|
||||||
candle-transformers = { version = "0.4.1" }
|
candle-transformers = { version = "0.4.1" }
|
||||||
|
@ -21,8 +21,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
|||||||
name = "clear_documents"
|
name = "clear_documents"
|
||||||
)]
|
)]
|
||||||
pub fn execute(self) -> Result<u64> {
|
pub fn execute(self) -> Result<u64> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||||
let Index {
|
let Index {
|
||||||
env: _env,
|
env: _env,
|
||||||
|
@ -29,8 +29,6 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
|||||||
autogenerate_docids: bool,
|
autogenerate_docids: bool,
|
||||||
reader: DocumentsBatchReader<R>,
|
reader: DocumentsBatchReader<R>,
|
||||||
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
|
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
|
||||||
|
@ -29,8 +29,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let max_positions_per_attributes = max_positions_per_attributes
|
let max_positions_per_attributes = max_positions_per_attributes
|
||||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
@ -23,8 +23,6 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
_settings_diff: &InnerIndexSettingsDiff,
|
_settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut facet_number_docids_sorter = create_sorter(
|
let mut facet_number_docids_sorter = create_sorter(
|
||||||
|
@ -28,8 +28,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
_settings_diff: &InnerIndexSettingsDiff,
|
_settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||||
|
|
||||||
|
@ -47,8 +47,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||||
) -> Result<ExtractedFacetValues> {
|
) -> Result<ExtractedFacetValues> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||||
|
@ -26,8 +26,6 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
_settings_diff: &InnerIndexSettingsDiff,
|
_settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut fid_word_count_docids_sorter = create_sorter(
|
let mut fid_word_count_docids_sorter = create_sorter(
|
||||||
|
@ -20,8 +20,6 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -91,8 +91,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<Vec<ExtractedVectorPoints>> {
|
) -> Result<Vec<ExtractedVectorPoints>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let reindex_vectors = settings_diff.reindex_vectors();
|
let reindex_vectors = settings_diff.reindex_vectors();
|
||||||
|
|
||||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||||
@ -295,7 +293,6 @@ fn push_vectors_diff(
|
|||||||
delta: VectorStateDelta,
|
delta: VectorStateDelta,
|
||||||
reindex_vectors: bool,
|
reindex_vectors: bool,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
||||||
if must_remove
|
if must_remove
|
||||||
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||||
@ -367,7 +364,6 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
request_threads: &ThreadPoolNoAbort,
|
request_threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
||||||
let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk
|
let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk
|
||||||
|
|
||||||
|
@ -36,8 +36,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
grenad::Reader<BufReader<File>>,
|
grenad::Reader<BufReader<File>>,
|
||||||
grenad::Reader<BufReader<File>>,
|
grenad::Reader<BufReader<File>>,
|
||||||
)> {
|
)> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_fid_docids_sorter = create_sorter(
|
let mut word_fid_docids_sorter = create_sorter(
|
||||||
@ -167,8 +165,6 @@ fn words_into_sorter(
|
|||||||
add_words: &BTreeSet<Vec<u8>>,
|
add_words: &BTreeSet<Vec<u8>>,
|
||||||
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
use itertools::merge_join_by;
|
use itertools::merge_join_by;
|
||||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
|
let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
|
||||||
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
|
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
|
||||||
|
|
||||||
@ -71,8 +70,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
// if we change document, we fill the sorter
|
// if we change document, we fill the sorter
|
||||||
if current_document_id.map_or(false, |id| id != document_id) {
|
if current_document_id.map_or(false, |id| id != document_id) {
|
||||||
puffin::profile_scope!("Document into sorter");
|
|
||||||
|
|
||||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||||
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
|
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
@ -163,7 +160,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(document_id) = current_document_id {
|
if let Some(document_id) = current_document_id {
|
||||||
puffin::profile_scope!("Final document into sorter");
|
|
||||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||||
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
|
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
@ -176,7 +172,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
puffin::profile_scope!("sorter_into_reader");
|
|
||||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||||
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
|
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
@ -25,8 +25,6 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
_settings_diff: &InnerIndexSettingsDiff,
|
_settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_position_docids_sorter = create_sorter(
|
let mut word_position_docids_sorter = create_sorter(
|
||||||
@ -104,8 +102,6 @@ fn words_position_into_sorter(
|
|||||||
add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
|
add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
|
||||||
word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
use itertools::merge_join_by;
|
use itertools::merge_join_by;
|
||||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||||
|
|
||||||
|
@ -47,8 +47,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|
||||||
|| {
|
|| {
|
||||||
original_obkv_chunks
|
original_obkv_chunks
|
||||||
@ -90,7 +88,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
extract_fid_word_count_docids,
|
extract_fid_word_count_docids,
|
||||||
TypedChunk::FieldIdWordCountDocids,
|
TypedChunk::FieldIdWordCountDocids,
|
||||||
"field-id-wordcount-docids",
|
|
||||||
);
|
);
|
||||||
run_extraction_task::<
|
run_extraction_task::<
|
||||||
_,
|
_,
|
||||||
@ -117,7 +114,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
word_fid_docids_reader,
|
word_fid_docids_reader,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"word-docids",
|
|
||||||
);
|
);
|
||||||
|
|
||||||
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
|
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
|
||||||
@ -127,7 +123,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
extract_word_position_docids,
|
extract_word_position_docids,
|
||||||
TypedChunk::WordPositionDocids,
|
TypedChunk::WordPositionDocids,
|
||||||
"word-position-docids",
|
|
||||||
);
|
);
|
||||||
|
|
||||||
run_extraction_task::<
|
run_extraction_task::<
|
||||||
@ -141,7 +136,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
extract_facet_string_docids,
|
extract_facet_string_docids,
|
||||||
TypedChunk::FieldIdFacetStringDocids,
|
TypedChunk::FieldIdFacetStringDocids,
|
||||||
"field-id-facet-string-docids",
|
|
||||||
);
|
);
|
||||||
|
|
||||||
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
|
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
|
||||||
@ -151,7 +145,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
extract_facet_number_docids,
|
extract_facet_number_docids,
|
||||||
TypedChunk::FieldIdFacetNumberDocids,
|
TypedChunk::FieldIdFacetNumberDocids,
|
||||||
"field-id-facet-number-docids",
|
|
||||||
);
|
);
|
||||||
|
|
||||||
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
|
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
|
||||||
@ -161,7 +154,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
extract_word_pair_proximity_docids,
|
extract_word_pair_proximity_docids,
|
||||||
TypedChunk::WordPairProximityDocids,
|
TypedChunk::WordPairProximityDocids,
|
||||||
"word-pair-proximity-docids",
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,7 +177,6 @@ fn run_extraction_task<FE, FS, M>(
|
|||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
extract_fn: FE,
|
extract_fn: FE,
|
||||||
serialize_fn: FS,
|
serialize_fn: FS,
|
||||||
name: &'static str,
|
|
||||||
) where
|
) where
|
||||||
FE: Fn(
|
FE: Fn(
|
||||||
grenad::Reader<CursorClonableMmap>,
|
grenad::Reader<CursorClonableMmap>,
|
||||||
@ -203,7 +194,7 @@ fn run_extraction_task<FE, FS, M>(
|
|||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: ¤t_span, "extract_multiple_chunks");
|
let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: ¤t_span, "extract_multiple_chunks");
|
||||||
let _entered = child_span.enter();
|
let _entered = child_span.enter();
|
||||||
puffin::profile_scope!("extract_multiple_chunks", name);
|
|
||||||
match extract_fn(chunk, indexer, &settings_diff) {
|
match extract_fn(chunk, indexer, &settings_diff) {
|
||||||
Ok(chunk) => {
|
Ok(chunk) => {
|
||||||
let _ = lmdb_writer_sx.send(Ok(serialize_fn(chunk)));
|
let _ = lmdb_writer_sx.send(Ok(serialize_fn(chunk)));
|
||||||
|
@ -61,7 +61,6 @@ pub fn sorter_into_reader(
|
|||||||
sorter: grenad::Sorter<MergeFn>,
|
sorter: grenad::Sorter<MergeFn>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
@ -182,8 +181,6 @@ where
|
|||||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||||
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
|
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let database = database.remap_types::<Bytes, Bytes>();
|
let database = database.remap_types::<Bytes, Bytes>();
|
||||||
|
|
||||||
|
@ -141,8 +141,6 @@ where
|
|||||||
mut self,
|
mut self,
|
||||||
reader: DocumentsBatchReader<R>,
|
reader: DocumentsBatchReader<R>,
|
||||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// Early return when there is no document to add
|
// Early return when there is no document to add
|
||||||
if reader.is_empty() {
|
if reader.is_empty() {
|
||||||
return Ok((self, Ok(0)));
|
return Ok((self, Ok(0)));
|
||||||
@ -187,8 +185,6 @@ where
|
|||||||
mut self,
|
mut self,
|
||||||
to_delete: Vec<String>,
|
to_delete: Vec<String>,
|
||||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// Early return when there is no document to add
|
// Early return when there is no document to add
|
||||||
if to_delete.is_empty() {
|
if to_delete.is_empty() {
|
||||||
// Maintains Invariant: remove documents actually always returns Ok for the inner result
|
// Maintains Invariant: remove documents actually always returns Ok for the inner result
|
||||||
@ -223,8 +219,6 @@ where
|
|||||||
mut self,
|
mut self,
|
||||||
to_delete: &RoaringBitmap,
|
to_delete: &RoaringBitmap,
|
||||||
) -> Result<(Self, u64)> {
|
) -> Result<(Self, u64)> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// Early return when there is no document to add
|
// Early return when there is no document to add
|
||||||
if to_delete.is_empty() {
|
if to_delete.is_empty() {
|
||||||
return Ok((self, 0));
|
return Ok((self, 0));
|
||||||
@ -249,8 +243,6 @@ where
|
|||||||
name = "index_documents"
|
name = "index_documents"
|
||||||
)]
|
)]
|
||||||
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
if self.added_documents == 0 && self.deleted_documents == 0 {
|
if self.added_documents == 0 && self.deleted_documents == 0 {
|
||||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
||||||
@ -279,8 +271,6 @@ where
|
|||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let TransformOutput {
|
let TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
mut settings_diff,
|
mut settings_diff,
|
||||||
@ -404,7 +394,7 @@ where
|
|||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks");
|
let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks");
|
||||||
let _enter = child_span.enter();
|
let _enter = child_span.enter();
|
||||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
|
||||||
// split obkv file into several chunks
|
// split obkv file into several chunks
|
||||||
let original_chunk_iter = match original_documents {
|
let original_chunk_iter = match original_documents {
|
||||||
Some(original_documents) => {
|
Some(original_documents) => {
|
||||||
@ -612,8 +602,6 @@ where
|
|||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// Merged databases are already been indexed, we start from this count;
|
// Merged databases are already been indexed, we start from this count;
|
||||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||||
|
|
||||||
@ -657,7 +645,6 @@ where
|
|||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
|
let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
puffin::profile_scope!("compute_prefix_diffs");
|
|
||||||
|
|
||||||
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||||
|
|
||||||
@ -797,8 +784,6 @@ fn execute_word_prefix_docids(
|
|||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
|
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
|
||||||
builder.chunk_compression_type = indexer_config.chunk_compression_type;
|
builder.chunk_compression_type = indexer_config.chunk_compression_type;
|
||||||
builder.chunk_compression_level = indexer_config.chunk_compression_level;
|
builder.chunk_compression_level = indexer_config.chunk_compression_level;
|
||||||
|
@ -161,8 +161,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
|
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
|
||||||
let external_documents_ids = self.index.external_documents_ids();
|
let external_documents_ids = self.index.external_documents_ids();
|
||||||
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
|
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
|
||||||
@ -375,8 +373,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
where
|
where
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// there may be duplicates in the documents to remove.
|
// there may be duplicates in the documents to remove.
|
||||||
to_remove.sort_unstable();
|
to_remove.sort_unstable();
|
||||||
to_remove.dedup();
|
to_remove.dedup();
|
||||||
@ -466,8 +462,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
where
|
where
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let mut documents_deleted = 0;
|
let mut documents_deleted = 0;
|
||||||
let mut document_sorter_value_buffer = Vec::new();
|
let mut document_sorter_value_buffer = Vec::new();
|
||||||
let mut document_sorter_key_buffer = Vec::new();
|
let mut document_sorter_key_buffer = Vec::new();
|
||||||
@ -686,8 +680,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let primary_key = self
|
let primary_key = self
|
||||||
.index
|
.index
|
||||||
.primary_key(wtxn)?
|
.primary_key(wtxn)?
|
||||||
|
@ -118,65 +118,6 @@ impl TypedChunk {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TypedChunk {
|
|
||||||
pub fn to_debug_string(&self) -> String {
|
|
||||||
match self {
|
|
||||||
TypedChunk::FieldIdDocidFacetStrings(grenad) => {
|
|
||||||
format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
|
|
||||||
format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::Documents(grenad) => {
|
|
||||||
format!("Documents {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdWordCountDocids(grenad) => {
|
|
||||||
format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::WordDocids {
|
|
||||||
word_docids_reader,
|
|
||||||
exact_word_docids_reader,
|
|
||||||
word_fid_docids_reader,
|
|
||||||
} => format!(
|
|
||||||
"WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {}, word_fid_docids_reader: {} }}",
|
|
||||||
word_docids_reader.len(),
|
|
||||||
exact_word_docids_reader.len(),
|
|
||||||
word_fid_docids_reader.len()
|
|
||||||
),
|
|
||||||
TypedChunk::WordPositionDocids(grenad) => {
|
|
||||||
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::WordPairProximityDocids(grenad) => {
|
|
||||||
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdFacetStringDocids((grenad, _)) => {
|
|
||||||
format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdFacetNumberDocids(grenad) => {
|
|
||||||
format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdFacetExistsDocids(grenad) => {
|
|
||||||
format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
|
|
||||||
format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
|
|
||||||
format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::GeoPoints(grenad) => {
|
|
||||||
format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
|
|
||||||
}
|
|
||||||
TypedChunk::VectorPoints{ remove_vectors, manual_vectors, embeddings, expected_dimension, embedder_name } => {
|
|
||||||
format!("VectorPoints {{ remove_vectors: {}, manual_vectors: {}, embeddings: {}, dimension: {}, embedder_name: {} }}", remove_vectors.len(), manual_vectors.len(), embeddings.as_ref().map(|e| e.len()).unwrap_or_default(), expected_dimension, embedder_name)
|
|
||||||
}
|
|
||||||
TypedChunk::ScriptLanguageDocids(sl_map) => {
|
|
||||||
format!("ScriptLanguageDocids {{ number_of_entries: {} }}", sl_map.len())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||||
/// Return new documents seen.
|
/// Return new documents seen.
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
|
||||||
@ -185,8 +126,6 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
) -> Result<(RoaringBitmap, bool)> {
|
) -> Result<(RoaringBitmap, bool)> {
|
||||||
puffin::profile_function!(typed_chunks[0].to_debug_string());
|
|
||||||
|
|
||||||
let mut is_merged_database = false;
|
let mut is_merged_database = false;
|
||||||
match typed_chunks[0] {
|
match typed_chunks[0] {
|
||||||
TypedChunk::Documents(_) => {
|
TypedChunk::Documents(_) => {
|
||||||
@ -877,7 +816,6 @@ where
|
|||||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||||
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
|
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let database = database.remap_types::<Bytes, Bytes>();
|
let database = database.remap_types::<Bytes, Bytes>();
|
||||||
|
|
||||||
|
@ -398,8 +398,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// if the settings are set before any document update, we don't need to do anything, and
|
// if the settings are set before any document update, we don't need to do anything, and
|
||||||
// will set the primary key during the first document addition.
|
// will set the primary key during the first document addition.
|
||||||
if self.index.number_of_documents(self.wtxn)? == 0 {
|
if self.index.number_of_documents(self.wtxn)? == 0 {
|
||||||
|
@ -52,8 +52,6 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// It is forbidden to keep a mutable reference into the database
|
// It is forbidden to keep a mutable reference into the database
|
||||||
// and write into it at the same time, therefore we write into another file.
|
// and write into it at the same time, therefore we write into another file.
|
||||||
let mut prefix_docids_sorter = create_sorter(
|
let mut prefix_docids_sorter = create_sorter(
|
||||||
|
@ -57,7 +57,6 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
||||||
|
|
||||||
let mut prefix_integer_docids_sorter = create_sorter(
|
let mut prefix_integer_docids_sorter = create_sorter(
|
||||||
|
@ -45,8 +45,6 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
|
|||||||
name = "words_prefix_fst"
|
name = "words_prefix_fst"
|
||||||
)]
|
)]
|
||||||
pub fn execute(self) -> Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let words_fst = self.index.words_fst(self.wtxn)?;
|
let words_fst = self.index.words_fst(self.wtxn)?;
|
||||||
|
|
||||||
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];
|
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];
|
||||||
|
Loading…
Reference in New Issue
Block a user