2022-04-26 18:12:58 +02:00
use big_s ::S ;
2024-11-18 17:39:55 +01:00
use bumpalo ::Bump ;
2022-04-26 18:12:58 +02:00
use heed ::EnvOpenOptions ;
use maplit ::hashset ;
2024-11-19 11:24:36 +01:00
use milli ::documents ::mmap_from_objects ;
2024-12-10 16:30:48 +01:00
use milli ::progress ::Progress ;
2024-11-18 17:39:55 +01:00
use milli ::update ::new ::indexer ;
2024-11-19 11:24:36 +01:00
use milli ::update ::{ IndexDocumentsMethod , IndexerConfig , Settings } ;
2024-11-18 17:39:55 +01:00
use milli ::vector ::EmbeddingConfigs ;
2023-06-22 17:13:40 +02:00
use milli ::{ FacetDistribution , Index , Object , OrderBy } ;
2024-11-19 11:24:36 +01:00
use serde_json ::{ from_value , json } ;
2022-04-26 18:12:58 +02:00
#[ test ]
fn test_facet_distribution_with_no_facet_values ( ) {
let path = tempfile ::tempdir ( ) . unwrap ( ) ;
let mut options = EnvOpenOptions ::new ( ) ;
options . map_size ( 10 * 1024 * 1024 ) ; // 10 MB
let index = Index ::new ( options , & path ) . unwrap ( ) ;
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
let config = IndexerConfig ::default ( ) ;
let mut builder = Settings ::new ( & mut wtxn , & index , & config ) ;
builder . set_filterable_fields ( hashset! {
S ( " genres " ) ,
S ( " tags " ) ,
} ) ;
2022-10-05 17:41:07 +02:00
builder . execute ( | _ | ( ) , | | false ) . unwrap ( ) ;
2024-11-20 11:35:01 +01:00
wtxn . commit ( ) . unwrap ( ) ;
2022-04-26 18:12:58 +02:00
// index documents
let config = IndexerConfig { max_memory : Some ( 10 * 1024 * 1024 ) , .. Default ::default ( ) } ;
2024-11-18 17:39:55 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
2024-11-20 11:35:01 +01:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2024-11-18 17:39:55 +01:00
let db_fields_ids_map = index . fields_ids_map ( & rtxn ) . unwrap ( ) ;
let mut new_fields_ids_map = db_fields_ids_map . clone ( ) ;
2022-04-26 18:12:58 +02:00
2024-11-18 17:39:55 +01:00
let embedders = EmbeddingConfigs ::default ( ) ;
let mut indexer = indexer ::DocumentOperation ::new ( IndexDocumentsMethod ::ReplaceDocuments ) ;
2022-04-26 18:12:58 +02:00
2024-11-18 17:39:55 +01:00
let doc1 : Object = from_value (
json! ( { " id " : 123 , " title " : " What a week, hu... " , " genres " : [ ] , " tags " : [ " blue " ] } ) ,
)
. unwrap ( ) ;
let doc2 : Object =
from_value ( json! ( { " id " : 345 , " title " : " I am the pig! " , " tags " : [ " red " ] } ) ) . unwrap ( ) ;
let documents = mmap_from_objects ( vec! [ doc1 , doc2 ] ) ;
2022-04-26 18:12:58 +02:00
// index documents
2024-11-18 17:39:55 +01:00
indexer . add_documents ( & documents ) . unwrap ( ) ;
let indexer_alloc = Bump ::new ( ) ;
2024-11-20 14:58:25 +01:00
let ( document_changes , _operation_stats , primary_key ) = indexer
2024-11-20 15:10:09 +01:00
. into_changes (
& indexer_alloc ,
& index ,
& rtxn ,
None ,
& mut new_fields_ids_map ,
& | | false ,
2024-12-10 16:30:48 +01:00
Progress ::default ( ) ,
2024-11-20 15:10:09 +01:00
)
2024-11-20 14:58:25 +01:00
. unwrap ( ) ;
2024-11-18 17:39:55 +01:00
indexer ::index (
& mut wtxn ,
& index ,
2024-11-27 17:04:49 +01:00
& milli ::ThreadPoolNoAbortBuilder ::new ( ) . build ( ) . unwrap ( ) ,
2024-11-18 17:39:55 +01:00
config . grenad_parameters ( ) ,
& db_fields_ids_map ,
new_fields_ids_map ,
primary_key ,
& document_changes ,
embedders ,
& | | false ,
2024-12-10 16:30:48 +01:00
& Progress ::default ( ) ,
2024-11-18 17:39:55 +01:00
)
. unwrap ( ) ;
2022-04-26 18:12:58 +02:00
wtxn . commit ( ) . unwrap ( ) ;
2024-11-20 11:35:01 +01:00
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let mut distrib = FacetDistribution ::new ( & rtxn , & index ) ;
2023-06-22 17:13:40 +02:00
distrib . facets ( vec! [ ( " genres " , OrderBy ::default ( ) ) ] ) ;
2022-04-26 18:12:58 +02:00
let result = distrib . execute ( ) . unwrap ( ) ;
assert_eq! ( result [ " genres " ] . len ( ) , 0 ) ;
2024-11-20 11:35:01 +01:00
let mut distrib = FacetDistribution ::new ( & rtxn , & index ) ;
2023-06-22 17:13:40 +02:00
distrib . facets ( vec! [ ( " tags " , OrderBy ::default ( ) ) ] ) ;
2022-04-26 18:12:58 +02:00
let result = distrib . execute ( ) . unwrap ( ) ;
assert_eq! ( result [ " tags " ] . len ( ) , 2 ) ;
}