2022-04-27 00:12:58 +08:00
|
|
|
use big_s::S;
|
2024-11-19 00:39:55 +08:00
|
|
|
use bumpalo::Bump;
|
2022-04-27 00:12:58 +08:00
|
|
|
use heed::EnvOpenOptions;
|
|
|
|
use maplit::hashset;
|
2024-11-19 18:24:36 +08:00
|
|
|
use milli::documents::mmap_from_objects;
|
2024-11-19 00:39:55 +08:00
|
|
|
use milli::update::new::indexer;
|
2024-11-19 18:24:36 +08:00
|
|
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
2024-11-19 00:39:55 +08:00
|
|
|
use milli::vector::EmbeddingConfigs;
|
2023-06-22 23:13:40 +08:00
|
|
|
use milli::{FacetDistribution, Index, Object, OrderBy};
|
2024-11-19 18:24:36 +08:00
|
|
|
use serde_json::{from_value, json};
|
2022-04-27 00:12:58 +08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_facet_distribution_with_no_facet_values() {
|
|
|
|
let path = tempfile::tempdir().unwrap();
|
|
|
|
let mut options = EnvOpenOptions::new();
|
|
|
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
|
|
|
let index = Index::new(options, &path).unwrap();
|
|
|
|
|
|
|
|
let mut wtxn = index.write_txn().unwrap();
|
|
|
|
let config = IndexerConfig::default();
|
|
|
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
|
|
|
|
|
|
|
builder.set_filterable_fields(hashset! {
|
|
|
|
S("genres"),
|
|
|
|
S("tags"),
|
|
|
|
});
|
2022-10-05 23:41:07 +08:00
|
|
|
builder.execute(|_| (), || false).unwrap();
|
2024-11-20 18:35:01 +08:00
|
|
|
wtxn.commit().unwrap();
|
2022-04-27 00:12:58 +08:00
|
|
|
|
|
|
|
// index documents
|
|
|
|
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
2024-11-19 00:39:55 +08:00
|
|
|
let rtxn = index.read_txn().unwrap();
|
2024-11-20 18:35:01 +08:00
|
|
|
let mut wtxn = index.write_txn().unwrap();
|
2024-11-19 00:39:55 +08:00
|
|
|
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
|
|
|
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
2022-04-27 00:12:58 +08:00
|
|
|
|
2024-11-19 00:39:55 +08:00
|
|
|
let embedders = EmbeddingConfigs::default();
|
|
|
|
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
2022-04-27 00:12:58 +08:00
|
|
|
|
2024-11-19 00:39:55 +08:00
|
|
|
let doc1: Object = from_value(
|
|
|
|
json!({ "id": 123, "title": "What a week, hu...", "genres": [], "tags": ["blue"] }),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
let doc2: Object =
|
|
|
|
from_value(json!({ "id": 345, "title": "I am the pig!", "tags": ["red"] })).unwrap();
|
|
|
|
let documents = mmap_from_objects(vec![doc1, doc2]);
|
2022-04-27 00:12:58 +08:00
|
|
|
|
|
|
|
// index documents
|
2024-11-19 00:39:55 +08:00
|
|
|
indexer.add_documents(&documents).unwrap();
|
|
|
|
|
|
|
|
let indexer_alloc = Bump::new();
|
2024-11-20 21:58:25 +08:00
|
|
|
let (document_changes, _operation_stats, primary_key) = indexer
|
2024-11-20 22:10:09 +08:00
|
|
|
.into_changes(
|
|
|
|
&indexer_alloc,
|
|
|
|
&index,
|
|
|
|
&rtxn,
|
|
|
|
None,
|
|
|
|
&mut new_fields_ids_map,
|
|
|
|
&|| false,
|
|
|
|
&|_progress| (),
|
|
|
|
)
|
2024-11-20 21:58:25 +08:00
|
|
|
.unwrap();
|
2024-11-19 00:39:55 +08:00
|
|
|
|
|
|
|
indexer::index(
|
|
|
|
&mut wtxn,
|
|
|
|
&index,
|
|
|
|
config.grenad_parameters(),
|
|
|
|
&db_fields_ids_map,
|
|
|
|
new_fields_ids_map,
|
|
|
|
primary_key,
|
|
|
|
&document_changes,
|
|
|
|
embedders,
|
|
|
|
&|| false,
|
|
|
|
&|_| (),
|
|
|
|
)
|
|
|
|
.unwrap();
|
2022-04-27 00:12:58 +08:00
|
|
|
|
|
|
|
wtxn.commit().unwrap();
|
|
|
|
|
2024-11-20 18:35:01 +08:00
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
|
|
let mut distrib = FacetDistribution::new(&rtxn, &index);
|
2023-06-22 23:13:40 +08:00
|
|
|
distrib.facets(vec![("genres", OrderBy::default())]);
|
2022-04-27 00:12:58 +08:00
|
|
|
let result = distrib.execute().unwrap();
|
|
|
|
assert_eq!(result["genres"].len(), 0);
|
|
|
|
|
2024-11-20 18:35:01 +08:00
|
|
|
let mut distrib = FacetDistribution::new(&rtxn, &index);
|
2023-06-22 23:13:40 +08:00
|
|
|
distrib.facets(vec![("tags", OrderBy::default())]);
|
2022-04-27 00:12:58 +08:00
|
|
|
let result = distrib.execute().unwrap();
|
|
|
|
assert_eq!(result["tags"].len(), 2);
|
|
|
|
}
|