2021-07-07 11:42:14 +02:00
mod datasets_paths ;
2021-09-22 12:10:21 +02:00
mod utils ;
2021-07-07 11:42:14 +02:00
2021-09-22 12:10:21 +02:00
use std ::fs ::{ create_dir_all , remove_dir_all } ;
2021-07-07 11:42:14 +02:00
use std ::path ::Path ;
2024-11-19 10:45:27 +01:00
use bumpalo ::Bump ;
2021-07-07 11:42:14 +02:00
use criterion ::{ criterion_group , criterion_main , Criterion } ;
2024-11-19 10:45:27 +01:00
use milli ::documents ::PrimaryKey ;
2022-08-11 11:15:46 +02:00
use milli ::heed ::{ EnvOpenOptions , RwTxn } ;
2024-12-10 16:30:48 +01:00
use milli ::progress ::Progress ;
2024-11-19 10:45:27 +01:00
use milli ::update ::new ::indexer ;
use milli ::update ::{ IndexDocumentsMethod , IndexerConfig , Settings } ;
use milli ::vector ::EmbeddingConfigs ;
2021-07-07 11:42:14 +02:00
use milli ::Index ;
2022-06-16 10:17:58 +02:00
use rand ::seq ::SliceRandom ;
use rand_chacha ::rand_core ::SeedableRng ;
use roaring ::RoaringBitmap ;
2021-07-07 11:42:14 +02:00
2024-12-02 18:13:56 +01:00
#[ cfg(not(windows)) ]
2022-08-10 12:31:09 +02:00
#[ global_allocator ]
static ALLOC : mimalloc ::MiMalloc = mimalloc ::MiMalloc ;
2022-06-16 10:17:58 +02:00
const BENCHMARK_ITERATION : usize = 10 ;
2021-07-07 11:42:14 +02:00
fn setup_dir ( path : impl AsRef < Path > ) {
match remove_dir_all ( path . as_ref ( ) ) {
Ok ( _ ) = > ( ) ,
Err ( e ) if e . kind ( ) = = std ::io ::ErrorKind ::NotFound = > ( ) ,
Err ( e ) = > panic! ( " {} " , e ) ,
}
create_dir_all ( path ) . unwrap ( ) ;
}
fn setup_index ( ) -> Index {
let path = " benches.mmdb " ;
2023-01-17 18:01:26 +01:00
setup_dir ( path ) ;
2021-07-07 11:42:14 +02:00
let mut options = EnvOpenOptions ::new ( ) ;
options . map_size ( 100 * 1024 * 1024 * 1024 ) ; // 100 GB
2024-11-19 13:35:39 +01:00
options . max_readers ( 100 ) ;
2021-07-07 11:42:14 +02:00
Index ::new ( options , path ) . unwrap ( )
}
2022-06-16 10:17:58 +02:00
fn setup_settings < ' t > (
2023-11-23 12:20:44 +01:00
wtxn : & mut RwTxn < ' t > ,
2022-06-16 10:17:58 +02:00
index : & ' t Index ,
primary_key : & str ,
searchable_fields : & [ & str ] ,
filterable_fields : & [ & str ] ,
sortable_fields : & [ & str ] ,
) {
let config = IndexerConfig ::default ( ) ;
let mut builder = Settings ::new ( wtxn , index , & config ) ;
builder . set_primary_key ( primary_key . to_owned ( ) ) ;
let searchable_fields = searchable_fields . iter ( ) . map ( | s | s . to_string ( ) ) . collect ( ) ;
builder . set_searchable_fields ( searchable_fields ) ;
let filterable_fields = filterable_fields . iter ( ) . map ( | s | s . to_string ( ) ) . collect ( ) ;
builder . set_filterable_fields ( filterable_fields ) ;
let sortable_fields = sortable_fields . iter ( ) . map ( | s | s . to_string ( ) ) . collect ( ) ;
builder . set_sortable_fields ( sortable_fields ) ;
2022-10-05 17:41:07 +02:00
builder . execute ( | _ | ( ) , | | false ) . unwrap ( ) ;
2022-06-16 10:17:58 +02:00
}
2023-01-17 18:01:26 +01:00
fn setup_index_with_settings (
2022-06-16 10:17:58 +02:00
primary_key : & str ,
searchable_fields : & [ & str ] ,
filterable_fields : & [ & str ] ,
sortable_fields : & [ & str ] ,
) -> milli ::Index {
let index = setup_index ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
setup_settings (
& mut wtxn ,
& index ,
primary_key ,
searchable_fields ,
filterable_fields ,
sortable_fields ,
) ;
wtxn . commit ( ) . unwrap ( ) ;
index
}
fn choose_document_ids_from_index_batched (
index : & Index ,
count : usize ,
batch_size : usize ,
) -> Vec < RoaringBitmap > {
let rtxn = index . read_txn ( ) . unwrap ( ) ;
// create batch of document ids to delete
let mut rng = rand_chacha ::ChaCha8Rng ::seed_from_u64 ( 7700 ) ;
let document_ids : Vec < _ > = index . documents_ids ( & rtxn ) . unwrap ( ) . into_iter ( ) . collect ( ) ;
let document_ids_to_delete : Vec < _ > =
document_ids . choose_multiple ( & mut rng , count ) . map ( Clone ::clone ) . collect ( ) ;
document_ids_to_delete
. chunks ( batch_size )
. map ( | c | {
let mut batch = RoaringBitmap ::new ( ) ;
for id in c {
batch . insert ( * id ) ;
}
batch
} )
. collect ( )
}
2021-07-07 11:42:14 +02:00
fn indexing_songs_default ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2021-07-07 11:42:14 +02:00
group . bench_function ( " Indexing songs with default settings " , | b | {
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " album " , " artist " ] ;
let filterable_fields =
[ " released-timestamp " , " duration-float " , " genre " , " country " , " artist " ] ;
let sortable_fields = [ ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
2021-07-07 11:42:14 +02:00
} ,
2021-08-19 15:02:43 +02:00
move | index | {
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-08-19 15:02:43 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2021-09-22 12:10:21 +02:00
2021-07-07 11:42:14 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2021-08-19 15:02:43 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
2021-07-07 11:42:14 +02:00
} ,
)
} ) ;
}
2022-07-04 15:10:12 +02:00
fn reindexing_songs_default ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Reindexing songs with default settings " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " id " ;
let searchable_fields = [ " title " , " album " , " artist " ] ;
let filterable_fields =
[ " released-timestamp " , " duration-float " , " genre " , " country " , " artist " ] ;
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-07-04 15:10:12 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2022-06-16 10:17:58 +02:00
fn deleting_songs_in_batches_default ( c : & mut Criterion ) {
2022-02-21 16:30:13 +01:00
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2022-06-16 10:32:58 +02:00
group . bench_function ( " -songs-delete-facetedString-facetedNumber-searchable- " , | b | {
2022-02-21 16:30:13 +01:00
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " album " , " artist " ] ;
let filterable_fields =
[ " released-timestamp " , " duration-float " , " genre " , " country " , " artist " ] ;
let sortable_fields = [ ] ;
2022-02-21 16:30:13 +01:00
2022-06-16 10:17:58 +02:00
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
// We index only one half of the dataset in the setup part
// as we don't care about the time it takes.
2022-02-21 16:30:13 +01:00
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2024-11-19 10:45:27 +01:00
2022-06-16 10:17:58 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-06-16 10:17:58 +02:00
let count = 1250 ;
let batch_size = 250 ;
let document_ids_to_delete =
choose_document_ids_from_index_batched ( & index , count , batch_size ) ;
( index , document_ids_to_delete )
} ,
move | ( index , document_ids_to_delete ) | {
2023-11-06 11:56:46 +01:00
delete_documents_from_ids ( index , document_ids_to_delete )
2022-06-16 10:17:58 +02:00
} ,
)
} ) ;
}
fn indexing_songs_in_three_batches_default ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Indexing songs in three batches with default settings " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " id " ;
let searchable_fields = [ " title " , " album " , " artist " ] ;
let filterable_fields =
[ " released-timestamp " , " duration-float " , " genre " , " country " , " artist " ] ;
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
2022-02-21 16:30:13 +01:00
// We index only one half of the dataset in the setup part
2022-02-22 17:39:24 +01:00
// as we don't care about the time it takes.
2022-02-21 16:30:13 +01:00
let config = IndexerConfig ::default ( ) ;
2022-06-16 10:17:58 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS_1_2 , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-21 16:30:13 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-02-21 16:30:13 +01:00
index
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS_3_4 , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-21 16:30:13 +01:00
2024-11-19 10:45:27 +01:00
wtxn . commit ( ) . unwrap ( ) ;
drop ( rtxn ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS_4_4 , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-21 16:30:13 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-02-21 16:30:13 +01:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2021-07-07 11:42:14 +02:00
fn indexing_songs_without_faceted_numbers ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2021-07-07 11:42:14 +02:00
group . bench_function ( " Indexing songs without faceted numbers " , | b | {
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " album " , " artist " ] ;
let filterable_fields = [ " genre " , " country " , " artist " ] ;
let sortable_fields = [ ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
2021-07-07 11:42:14 +02:00
} ,
2021-08-19 15:02:43 +02:00
move | index | {
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-08-19 15:02:43 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2021-09-22 12:10:21 +02:00
2021-07-07 11:42:14 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2021-08-19 15:02:43 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
2021-07-07 11:42:14 +02:00
} ,
)
} ) ;
}
fn indexing_songs_without_faceted_fields ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2021-07-07 11:42:14 +02:00
group . bench_function ( " Indexing songs without any facets " , | b | {
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " album " , " artist " ] ;
let filterable_fields = [ ] ;
let sortable_fields = [ ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
2021-07-07 11:42:14 +02:00
} ,
2021-08-19 15:02:43 +02:00
move | index | {
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-08-19 15:02:43 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_SONGS , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2021-09-22 12:10:21 +02:00
2021-07-07 11:42:14 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2021-08-19 15:02:43 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
2021-07-07 11:42:14 +02:00
} ,
)
} ) ;
}
fn indexing_wiki ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2021-07-07 11:42:14 +02:00
group . bench_function ( " Indexing wiki " , | b | {
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " body " ] ;
let filterable_fields = [ ] ;
let sortable_fields = [ ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
2021-07-07 11:42:14 +02:00
} ,
2021-08-19 15:02:43 +02:00
move | index | {
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-08-19 15:02:43 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_WIKI_ARTICLES , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2021-09-22 12:10:21 +02:00
2021-07-07 11:42:14 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2021-08-19 15:02:43 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
2021-07-07 11:42:14 +02:00
} ,
)
} ) ;
}
2022-07-04 15:10:12 +02:00
fn reindexing_wiki ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Reindexing wiki " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " id " ;
let searchable_fields = [ " title " , " body " ] ;
let filterable_fields = [ ] ;
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-07-04 15:10:12 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_WIKI_ARTICLES , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_WIKI_ARTICLES , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2022-06-16 10:17:58 +02:00
fn deleting_wiki_in_batches_default ( c : & mut Criterion ) {
2022-02-21 17:59:03 +01:00
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2022-06-16 10:32:58 +02:00
group . bench_function ( " -wiki-delete-searchable- " , | b | {
2022-02-21 17:59:03 +01:00
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " body " ] ;
let filterable_fields = [ ] ;
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
2022-02-21 17:59:03 +01:00
2022-06-16 10:17:58 +02:00
// We index only one half of the dataset in the setup part
// as we don't care about the time it takes.
2022-02-21 17:59:03 +01:00
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_WIKI_ARTICLES , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2024-11-19 10:45:27 +01:00
2022-06-16 10:17:58 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-06-16 10:17:58 +02:00
let count = 1250 ;
let batch_size = 250 ;
let document_ids_to_delete =
choose_document_ids_from_index_batched ( & index , count , batch_size ) ;
( index , document_ids_to_delete )
} ,
move | ( index , document_ids_to_delete ) | {
2023-11-06 11:56:46 +01:00
delete_documents_from_ids ( index , document_ids_to_delete )
2022-06-16 10:17:58 +02:00
} ,
)
} ) ;
}
fn indexing_wiki_in_three_batches ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Indexing wiki in three batches " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " id " ;
let searchable_fields = [ " title " , " body " ] ;
let filterable_fields = [ ] ;
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
2022-02-21 17:59:03 +01:00
// We index only one half of the dataset in the setup part
2022-02-22 17:39:24 +01:00
// as we don't care about the time it takes.
2022-02-21 17:59:03 +01:00
let config = IndexerConfig ::default ( ) ;
2024-11-19 10:45:27 +01:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents =
utils ::documents_from ( datasets_paths ::SMOL_WIKI_ARTICLES_1_2 , " csv " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-21 17:59:03 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-02-21 17:59:03 +01:00
index
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
2022-02-21 17:59:03 +01:00
2024-11-19 10:45:27 +01:00
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
2022-02-21 17:59:03 +01:00
let documents =
utils ::documents_from ( datasets_paths ::SMOL_WIKI_ARTICLES_3_4 , " csv " ) ;
2024-11-19 10:45:27 +01:00
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
2022-02-21 17:59:03 +01:00
2024-11-19 10:45:27 +01:00
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-21 17:59:03 +01:00
2024-11-19 10:45:27 +01:00
wtxn . commit ( ) . unwrap ( ) ;
drop ( rtxn ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
2022-02-21 17:59:03 +01:00
let documents =
utils ::documents_from ( datasets_paths ::SMOL_WIKI_ARTICLES_4_4 , " csv " ) ;
2024-11-19 10:45:27 +01:00
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2024-11-19 10:45:27 +01:00
)
. unwrap ( ) ;
2022-02-21 17:59:03 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-02-21 17:59:03 +01:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2021-07-07 11:42:14 +02:00
fn indexing_movies_default ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2021-07-07 11:42:14 +02:00
group . bench_function ( " Indexing movies with default settings " , | b | {
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " overview " ] ;
2022-07-18 10:34:12 +02:00
let filterable_fields = [ " release_date " , " genres " ] ;
2022-06-16 10:17:58 +02:00
let sortable_fields = [ ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
2021-07-07 11:42:14 +02:00
} ,
2021-08-19 15:02:43 +02:00
move | index | {
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-08-19 15:02:43 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::MOVIES , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2021-07-07 11:42:14 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2021-08-19 15:02:43 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
2021-07-07 11:42:14 +02:00
} ,
)
} ) ;
}
2022-07-04 15:10:12 +02:00
fn reindexing_movies_default ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Reindexing movies with default settings " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " id " ;
let searchable_fields = [ " title " , " overview " ] ;
2022-07-18 10:34:12 +02:00
let filterable_fields = [ " release_date " , " genres " ] ;
2022-07-04 15:10:12 +02:00
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-07-04 15:10:12 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::MOVIES , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::MOVIES , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2022-06-16 10:17:58 +02:00
fn deleting_movies_in_batches_default ( c : & mut Criterion ) {
2022-02-22 13:47:37 +01:00
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2022-06-16 10:32:58 +02:00
group . bench_function ( " -movies-delete-facetedString-facetedNumber-searchable- " , | b | {
2022-02-22 13:47:37 +01:00
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [ " title " , " overview " ] ;
2022-07-18 10:34:12 +02:00
let filterable_fields = [ " release_date " , " genres " ] ;
2022-06-16 10:17:58 +02:00
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
2022-02-22 13:47:37 +01:00
2022-06-16 10:17:58 +02:00
// We index only one half of the dataset in the setup part
// as we don't care about the time it takes.
2022-02-22 13:47:37 +01:00
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::MOVIES , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2024-11-19 10:45:27 +01:00
2022-06-16 10:17:58 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-06-16 10:17:58 +02:00
let count = 1250 ;
let batch_size = 250 ;
let document_ids_to_delete =
choose_document_ids_from_index_batched ( & index , count , batch_size ) ;
( index , document_ids_to_delete )
} ,
move | ( index , document_ids_to_delete ) | {
2023-11-06 11:56:46 +01:00
delete_documents_from_ids ( index , document_ids_to_delete )
2022-06-16 10:17:58 +02:00
} ,
)
} ) ;
}
2022-02-22 13:47:37 +01:00
2023-11-06 11:56:46 +01:00
fn delete_documents_from_ids ( index : Index , document_ids_to_delete : Vec < RoaringBitmap > ) {
2024-11-19 10:45:27 +01:00
let config = IndexerConfig ::default ( ) ;
2023-11-06 11:56:46 +01:00
for ids in document_ids_to_delete {
2024-11-19 10:45:27 +01:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let primary_key = index . primary_key ( & rtxn ) . unwrap ( ) . unwrap ( ) ;
let primary_key = PrimaryKey ::new ( primary_key , & db_fields_ids_map ) . unwrap ( ) ;
let mut indexer = indexer ::DocumentDeletion ::new ( ) ;
indexer . delete_documents_by_docids ( ids ) ;
let indexer_alloc = Bump ::new ( ) ;
let document_changes = indexer . into_changes ( & indexer_alloc , primary_key ) ;
indexer ::index (
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
Some ( primary_key ) ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2024-11-19 10:45:27 +01:00
)
. unwrap ( ) ;
2023-11-06 11:56:46 +01:00
2024-11-19 10:45:27 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2023-11-06 11:56:46 +01:00
}
index . prepare_for_closing ( ) . wait ( ) ;
}
2022-06-16 10:17:58 +02:00
fn indexing_movies_in_three_batches ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Indexing movies in three batches " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " id " ;
let searchable_fields = [ " title " , " overview " ] ;
2022-07-18 10:34:12 +02:00
let filterable_fields = [ " release_date " , " genres " ] ;
2022-06-16 10:17:58 +02:00
let sortable_fields = [ ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
2022-02-22 13:47:37 +01:00
// We index only one half of the dataset in the setup part
2022-02-22 17:39:24 +01:00
// as we don't care about the time it takes.
2022-02-22 13:47:37 +01:00
let config = IndexerConfig ::default ( ) ;
2024-11-19 10:45:27 +01:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::MOVIES_1_2 , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-22 13:47:37 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-02-22 13:47:37 +01:00
index
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::MOVIES_3_4 , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-22 13:47:37 +01:00
2024-11-19 10:45:27 +01:00
wtxn . commit ( ) . unwrap ( ) ;
drop ( rtxn ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::MOVIES_4_4 , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
2022-02-22 13:47:37 +01:00
2024-11-19 10:45:27 +01:00
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-02-22 13:47:37 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-02-22 13:47:37 +01:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2022-05-02 17:00:03 +02:00
fn indexing_nested_movies_default ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2022-05-02 17:00:03 +02:00
group . bench_function ( " Indexing nested movies with default settings " , | b | {
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
2022-05-02 17:00:03 +02:00
let searchable_fields = [
" title " ,
" overview " ,
" provider_names " ,
" genres " ,
" crew.name " ,
" cast.character " ,
" cast.name " ,
2022-06-16 10:17:58 +02:00
] ;
2022-05-02 17:00:03 +02:00
let filterable_fields = [
" popularity " ,
" release_date " ,
" runtime " ,
" vote_average " ,
" external_ids " ,
" keywords " ,
" providers.buy.name " ,
" providers.rent.name " ,
" providers.flatrate.name " ,
" provider_names " ,
" genres " ,
" crew.name " ,
" cast.character " ,
" cast.name " ,
2022-06-16 10:17:58 +02:00
] ;
let sortable_fields = [ " popularity " , " runtime " , " vote_average " , " release_date " ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
2022-05-02 17:00:03 +02:00
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::NESTED_MOVIES , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-05-02 17:00:03 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-05-02 17:00:03 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2022-06-16 10:17:58 +02:00
fn deleting_nested_movies_in_batches_default ( c : & mut Criterion ) {
2022-05-02 17:00:03 +02:00
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2022-06-16 10:32:58 +02:00
group . bench_function ( " -movies-delete-facetedString-facetedNumber-searchable-nested- " , | b | {
2022-05-02 17:00:03 +02:00
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " id " ;
let searchable_fields = [
" title " ,
" overview " ,
" provider_names " ,
" genres " ,
" crew.name " ,
" cast.character " ,
" cast.name " ,
] ;
let filterable_fields = [
" popularity " ,
" release_date " ,
" runtime " ,
" vote_average " ,
" external_ids " ,
" keywords " ,
" providers.buy.name " ,
" providers.rent.name " ,
" providers.flatrate.name " ,
" provider_names " ,
" genres " ,
" crew.name " ,
" cast.character " ,
" cast.name " ,
] ;
let sortable_fields = [ " popularity " , " runtime " , " vote_average " , " release_date " ] ;
2022-05-02 17:00:03 +02:00
2022-06-16 10:17:58 +02:00
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
// We index only one half of the dataset in the setup part
// as we don't care about the time it takes.
2022-05-02 17:00:03 +02:00
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::NESTED_MOVIES , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2024-11-19 10:45:27 +01:00
2022-06-16 10:17:58 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-06-16 10:17:58 +02:00
let count = 1250 ;
let batch_size = 250 ;
let document_ids_to_delete =
choose_document_ids_from_index_batched ( & index , count , batch_size ) ;
( index , document_ids_to_delete )
} ,
move | ( index , document_ids_to_delete ) | {
2023-11-06 11:56:46 +01:00
delete_documents_from_ids ( index , document_ids_to_delete )
2022-06-16 10:17:58 +02:00
} ,
)
} ) ;
}
2022-05-02 17:00:03 +02:00
2022-06-16 10:17:58 +02:00
fn indexing_nested_movies_without_faceted_fields ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Indexing nested movies without any facets " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " id " ;
2022-05-02 17:00:03 +02:00
let searchable_fields = [
" title " ,
" overview " ,
" provider_names " ,
" genres " ,
" crew.name " ,
" cast.character " ,
" cast.name " ,
2022-06-16 10:17:58 +02:00
] ;
let filterable_fields = [ ] ;
let sortable_fields = [ ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
2022-05-02 17:00:03 +02:00
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::NESTED_MOVIES , " json " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-05-02 17:00:03 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-05-02 17:00:03 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2021-09-13 18:08:28 +02:00
fn indexing_geo ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
2022-06-16 10:17:58 +02:00
group . sample_size ( BENCHMARK_ITERATION ) ;
2021-09-13 18:08:28 +02:00
group . bench_function ( " Indexing geo_point " , | b | {
b . iter_with_setup (
move | | {
2022-06-16 10:17:58 +02:00
let primary_key = " geonameid " ;
let searchable_fields = [ " name " , " alternatenames " , " elevation " ] ;
let filterable_fields = [ " _geo " , " population " , " elevation " ] ;
let sortable_fields = [ " _geo " , " population " , " elevation " ] ;
setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
)
} ,
move | index | {
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-09-13 18:08:28 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_ALL_COUNTRIES , " jsonl " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2021-09-13 18:08:28 +02:00
2022-06-16 10:17:58 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2021-09-13 18:08:28 +02:00
2022-06-16 10:17:58 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2021-09-13 18:08:28 +02:00
2022-07-04 15:10:12 +02:00
fn reindexing_geo ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
group . bench_function ( " Reindexing geo_point " , | b | {
b . iter_with_setup (
move | | {
let primary_key = " geonameid " ;
let searchable_fields = [ " name " , " alternatenames " , " elevation " ] ;
let filterable_fields = [ " _geo " , " population " , " elevation " ] ;
let sortable_fields = [ " _geo " , " population " , " elevation " ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-07-04 15:10:12 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_ALL_COUNTRIES , " jsonl " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index
} ,
move | index | {
let config = IndexerConfig ::default ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_ALL_COUNTRIES , " jsonl " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2022-07-04 15:10:12 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-07-04 15:10:12 +02:00
index . prepare_for_closing ( ) . wait ( ) ;
} ,
)
} ) ;
}
2022-06-16 10:17:58 +02:00
fn deleting_geo_in_batches_default ( c : & mut Criterion ) {
let mut group = c . benchmark_group ( " indexing " ) ;
group . sample_size ( BENCHMARK_ITERATION ) ;
2022-06-16 10:32:58 +02:00
group . bench_function ( " -geo-delete-facetedNumber-facetedGeo-searchable- " , | b | {
2022-06-16 10:17:58 +02:00
b . iter_with_setup (
move | | {
let primary_key = " geonameid " ;
let searchable_fields = [ " name " , " alternatenames " , " elevation " ] ;
let filterable_fields = [ " _geo " , " population " , " elevation " ] ;
let sortable_fields = [ " _geo " , " population " , " elevation " ] ;
let index = setup_index_with_settings (
2023-01-17 18:01:26 +01:00
primary_key ,
2022-06-16 10:17:58 +02:00
& searchable_fields ,
& filterable_fields ,
& sortable_fields ,
) ;
2021-09-13 18:08:28 +02:00
2022-06-16 10:17:58 +02:00
// We index only one half of the dataset in the setup part
// as we don't care about the time it takes.
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-09-13 18:08:28 +02:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
let mut indexer =
indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
let documents = utils ::documents_from ( datasets_paths ::SMOL_ALL_COUNTRIES , " jsonl " ) ;
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 14:58:25 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 14:58:25 +01:00
)
2024-11-19 10:45:27 +01:00
. unwrap ( ) ;
indexer ::index (
2022-10-05 17:41:07 +02:00
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-19 10:45:27 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
EmbeddingConfigs ::default ( ) ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2022-10-05 17:41:07 +02:00
)
. unwrap ( ) ;
2024-11-19 10:45:27 +01:00
2022-06-16 10:17:58 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-19 10:45:27 +01:00
drop ( rtxn ) ;
2022-06-16 10:17:58 +02:00
let count = 1250 ;
let batch_size = 250 ;
let document_ids_to_delete =
choose_document_ids_from_index_batched ( & index , count , batch_size ) ;
( index , document_ids_to_delete )
} ,
move | ( index , document_ids_to_delete ) | {
2023-11-06 11:56:46 +01:00
delete_documents_from_ids ( index , document_ids_to_delete )
2021-09-13 18:08:28 +02:00
} ,
)
} ) ;
}
2021-07-07 11:42:14 +02:00
criterion_group! (
benches ,
indexing_songs_default ,
2022-07-04 15:10:12 +02:00
reindexing_songs_default ,
2022-06-16 10:17:58 +02:00
deleting_songs_in_batches_default ,
2021-07-07 11:42:14 +02:00
indexing_songs_without_faceted_numbers ,
indexing_songs_without_faceted_fields ,
2022-02-21 16:30:13 +01:00
indexing_songs_in_three_batches_default ,
2021-07-07 11:42:14 +02:00
indexing_wiki ,
2022-07-04 15:10:12 +02:00
reindexing_wiki ,
2022-06-16 10:17:58 +02:00
deleting_wiki_in_batches_default ,
2022-02-21 17:59:03 +01:00
indexing_wiki_in_three_batches ,
2021-09-13 18:08:28 +02:00
indexing_movies_default ,
2022-07-04 15:10:12 +02:00
reindexing_movies_default ,
2022-06-16 10:17:58 +02:00
deleting_movies_in_batches_default ,
2022-02-22 13:47:37 +01:00
indexing_movies_in_three_batches ,
2022-05-02 17:00:03 +02:00
indexing_nested_movies_default ,
2022-06-16 10:17:58 +02:00
deleting_nested_movies_in_batches_default ,
2022-05-02 17:00:03 +02:00
indexing_nested_movies_without_faceted_fields ,
2022-06-16 10:17:58 +02:00
indexing_geo ,
2022-07-04 15:10:12 +02:00
reindexing_geo ,
2022-06-16 10:17:58 +02:00
deleting_geo_in_batches_default
2021-07-07 11:42:14 +02:00
) ;
criterion_main! ( benches ) ;