mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Make the tests pass
This commit is contained in:
parent
e95e47d258
commit
4ceade43cd
@ -37,7 +37,6 @@ use meilisearch_types::milli::vector::parsed_vectors::{
|
|||||||
use meilisearch_types::milli::{self, Filter};
|
use meilisearch_types::milli::{self, Filter};
|
||||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||||
use meilisearch_types::zstd::dict::DecoderDictionary;
|
|
||||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use time::macros::format_description;
|
use time::macros::format_description;
|
||||||
@ -909,8 +908,7 @@ impl IndexScheduler {
|
|||||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
let dictionary =
|
let dictionary = index.document_decompression_dictionary(&rtxn)?;
|
||||||
index.document_compression_dictionary(&rtxn)?.map(DecoderDictionary::copy);
|
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
let embedding_configs = index.embedding_configs(&rtxn)?;
|
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
@ -2465,12 +2465,20 @@ mod tests {
|
|||||||
|
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -2525,12 +2533,20 @@ mod tests {
|
|||||||
|
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -2904,12 +2920,20 @@ mod tests {
|
|||||||
// has everything being pushed successfully in milli?
|
// has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -2955,12 +2979,20 @@ mod tests {
|
|||||||
// has everything being pushed successfully in milli?
|
// has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -3011,12 +3043,20 @@ mod tests {
|
|||||||
// has everything being pushed successfully in milli?
|
// has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -3129,12 +3169,20 @@ mod tests {
|
|||||||
// has everything being pushed successfully in milli?
|
// has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -3184,12 +3232,20 @@ mod tests {
|
|||||||
// has everything being pushed successfully in milli?
|
// has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -3898,12 +3954,20 @@ mod tests {
|
|||||||
// Has everything being pushed successfully in milli?
|
// Has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -3969,12 +4033,20 @@ mod tests {
|
|||||||
// Has everything being pushed successfully in milli?
|
// Has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -4037,12 +4109,20 @@ mod tests {
|
|||||||
// Has everything being pushed successfully in milli?
|
// Has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -4098,12 +4178,20 @@ mod tests {
|
|||||||
// Has everything being pushed successfully in milli?
|
// Has everything being pushed successfully in milli?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -4159,6 +4247,8 @@ mod tests {
|
|||||||
// Is the primary key still what we expect?
|
// Is the primary key still what we expect?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||||
snapshot!(primary_key, @"id");
|
snapshot!(primary_key, @"id");
|
||||||
|
|
||||||
@ -4168,7 +4258,13 @@ mod tests {
|
|||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -4220,6 +4316,8 @@ mod tests {
|
|||||||
// Is the primary key still what we expect?
|
// Is the primary key still what we expect?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||||
snapshot!(primary_key, @"id");
|
snapshot!(primary_key, @"id");
|
||||||
|
|
||||||
@ -4229,7 +4327,13 @@ mod tests {
|
|||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -4303,6 +4407,8 @@ mod tests {
|
|||||||
// Is the primary key still what we expect?
|
// Is the primary key still what we expect?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||||
snapshot!(primary_key, @"id");
|
snapshot!(primary_key, @"id");
|
||||||
|
|
||||||
@ -4312,7 +4418,13 @@ mod tests {
|
|||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -4389,6 +4501,8 @@ mod tests {
|
|||||||
// Is the primary key still what we expect?
|
// Is the primary key still what we expect?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||||
snapshot!(primary_key, @"paw");
|
snapshot!(primary_key, @"paw");
|
||||||
|
|
||||||
@ -4398,7 +4512,13 @@ mod tests {
|
|||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -4468,6 +4588,8 @@ mod tests {
|
|||||||
// Is the primary key still what we expect?
|
// Is the primary key still what we expect?
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||||
snapshot!(primary_key, @"doggoid");
|
snapshot!(primary_key, @"doggoid");
|
||||||
|
|
||||||
@ -4477,7 +4599,13 @@ mod tests {
|
|||||||
let documents = index
|
let documents = index
|
||||||
.all_compressed_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
@ -5120,6 +5248,8 @@ mod tests {
|
|||||||
{
|
{
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
|
|
||||||
// Ensure the document have been inserted into the relevant bitamp
|
// Ensure the document have been inserted into the relevant bitamp
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
@ -5139,8 +5269,12 @@ mod tests {
|
|||||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||||
|
|
||||||
let doc = index.compressed_documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
let (_id, compressed_doc) =
|
||||||
|
index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let doc = obkv_to_json(
|
let doc = obkv_to_json(
|
||||||
&[
|
&[
|
||||||
fields_ids_map.id("doggo").unwrap(),
|
fields_ids_map.id("doggo").unwrap(),
|
||||||
@ -5194,6 +5328,8 @@ mod tests {
|
|||||||
{
|
{
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
|
|
||||||
// Ensure the document have been inserted into the relevant bitamp
|
// Ensure the document have been inserted into the relevant bitamp
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
@ -5216,8 +5352,12 @@ mod tests {
|
|||||||
// remained beagle
|
// remained beagle
|
||||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||||
|
|
||||||
let doc = index.compressed_documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
let (_id, compressed_doc) =
|
||||||
|
index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let doc = obkv_to_json(
|
let doc = obkv_to_json(
|
||||||
&[
|
&[
|
||||||
fields_ids_map.id("doggo").unwrap(),
|
fields_ids_map.id("doggo").unwrap(),
|
||||||
@ -5309,12 +5449,20 @@ mod tests {
|
|||||||
|
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
|
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
|
||||||
|
|
||||||
@ -5348,12 +5496,20 @@ mod tests {
|
|||||||
|
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
// the all the vectors linked to the new specified embedder have been removed
|
// the all the vectors linked to the new specified embedder have been removed
|
||||||
// Only the unknown embedders stays in the document DB
|
// Only the unknown embedders stays in the document DB
|
||||||
@ -5456,9 +5612,15 @@ mod tests {
|
|||||||
|
|
||||||
// the document with the id 3 should have its original embedding updated
|
// the document with the id 3 should have its original embedding updated
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
|
let (_id, compressed_doc) =
|
||||||
let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
|
index.compressed_documents(&rtxn, Some(docid)).unwrap().remove(0);
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
let doc = obkv_to_json(&field_ids, &field_ids_map, doc).unwrap();
|
||||||
snapshot!(json_string!(doc), @r###"
|
snapshot!(json_string!(doc), @r###"
|
||||||
{
|
{
|
||||||
"id": 3,
|
"id": 3,
|
||||||
@ -5570,12 +5732,20 @@ mod tests {
|
|||||||
|
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||||
@ -5610,12 +5780,20 @@ mod tests {
|
|||||||
|
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||||
@ -5726,12 +5904,20 @@ mod tests {
|
|||||||
{
|
{
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
|
||||||
}
|
}
|
||||||
@ -5761,12 +5947,20 @@ mod tests {
|
|||||||
{
|
{
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||||
}
|
}
|
||||||
@ -5794,12 +5988,20 @@ mod tests {
|
|||||||
{
|
{
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_compressed_documents(&rtxn)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
.map(|ret| {
|
||||||
|
let (_id, compressed_doc) = ret.unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
// FIXME: redaction
|
// FIXME: redaction
|
||||||
|
@ -20,7 +20,6 @@ use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
|||||||
use meilisearch_types::milli::DocumentId;
|
use meilisearch_types::milli::DocumentId;
|
||||||
use meilisearch_types::star_or::OptionStarOrList;
|
use meilisearch_types::star_or::OptionStarOrList;
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
use meilisearch_types::zstd::dict::DecoderDictionary;
|
|
||||||
use meilisearch_types::{milli, Document, Index};
|
use meilisearch_types::{milli, Document, Index};
|
||||||
use mime::Mime;
|
use mime::Mime;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
@ -604,7 +603,7 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
retrieve_vectors: RetrieveVectors,
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let dictionary = index.document_compression_dictionary(rtxn)?.map(DecoderDictionary::copy);
|
let dictionary = index.document_decompression_dictionary(rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
@ -19,7 +19,6 @@ use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
|||||||
use meilisearch_types::milli::vector::Embedder;
|
use meilisearch_types::milli::vector::Embedder;
|
||||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
use meilisearch_types::zstd::dict::DecoderDictionary;
|
|
||||||
use meilisearch_types::{milli, Document};
|
use meilisearch_types::{milli, Document};
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
use milli::tokenizer::TokenizerBuilder;
|
||||||
use milli::{
|
use milli::{
|
||||||
@ -1124,18 +1123,16 @@ fn make_hits(
|
|||||||
formatter_builder.crop_marker(format.crop_marker);
|
formatter_builder.crop_marker(format.crop_marker);
|
||||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||||
let compression_dictionary =
|
let decompression_dictionary = index.document_decompression_dictionary(rtxn)?;
|
||||||
index.document_compression_dictionary(rtxn)?.map(DecoderDictionary::copy);
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||||
let documents_iter = index.compressed_documents(rtxn, documents_ids)?;
|
let documents_iter = index.compressed_documents(rtxn, documents_ids)?;
|
||||||
for ((id, compressed), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
for ((id, compressed), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||||
let obkv = match compression_dictionary.as_ref() {
|
let obkv = compressed
|
||||||
// TODO manage this unwrap correctly
|
.decompress_with_optional_dictionary(&mut buffer, decompression_dictionary.as_ref())
|
||||||
Some(dict) => compressed.decompress_with(&mut buffer, dict).unwrap(),
|
// TODO use a better error?
|
||||||
None => compressed.as_non_compressed(),
|
.map_err(|e| MeilisearchHttpError::HeedError(e.into()))?;
|
||||||
};
|
|
||||||
// First generate a document with all the displayed fields
|
// First generate a document with all the displayed fields
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||||
|
|
||||||
|
@ -260,6 +260,7 @@ fn export_a_dump(
|
|||||||
|
|
||||||
// 4. Dump the indexes
|
// 4. Dump the indexes
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
let mut buffer = Vec::new();
|
||||||
for result in index_mapping.iter(&rtxn)? {
|
for result in index_mapping.iter(&rtxn)? {
|
||||||
let (uid, uuid) = result?;
|
let (uid, uuid) = result?;
|
||||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||||
@ -268,6 +269,7 @@ fn export_a_dump(
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let metadata = IndexMetadata {
|
let metadata = IndexMetadata {
|
||||||
uid: uid.to_owned(),
|
uid: uid.to_owned(),
|
||||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||||
@ -281,7 +283,10 @@ fn export_a_dump(
|
|||||||
|
|
||||||
// 4.1. Dump the documents
|
// 4.1. Dump the documents
|
||||||
for ret in index.all_compressed_documents(&rtxn)? {
|
for ret in index.all_compressed_documents(&rtxn)? {
|
||||||
let (_id, doc) = ret?;
|
let (_id, compressed_doc) = ret?;
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||||
index_dumper.push_document(&document)?;
|
index_dumper.push_document(&document)?;
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
|
|
||||||
let index = Index::new(options, dataset)?;
|
let index = Index::new(options, dataset)?;
|
||||||
let txn = index.read_txn()?;
|
let txn = index.read_txn()?;
|
||||||
|
let dictionary = index.document_decompression_dictionary(&txn).unwrap();
|
||||||
let mut query = String::new();
|
let mut query = String::new();
|
||||||
while stdin().read_line(&mut query)? > 0 {
|
while stdin().read_line(&mut query)? > 0 {
|
||||||
for _ in 0..2 {
|
for _ in 0..2 {
|
||||||
@ -49,6 +50,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut ctx = SearchContext::new(&index, &txn)?;
|
let mut ctx = SearchContext::new(&index, &txn)?;
|
||||||
|
let mut buffer = Vec::new();
|
||||||
let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
|
let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
|
||||||
|
|
||||||
let docs = execute_search(
|
let docs = execute_search(
|
||||||
@ -75,11 +77,14 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
let elapsed = start.elapsed();
|
let elapsed = start.elapsed();
|
||||||
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
|
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||||
if print_documents {
|
if print_documents {
|
||||||
let documents = index
|
let compressed_documents = index
|
||||||
.compressed_documents(&txn, docs.documents_ids.iter().copied())
|
.compressed_documents(&txn, docs.documents_ids.iter().copied())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(id, obkv)| {
|
.map(|(id, compressed_obkv)| {
|
||||||
|
let obkv = compressed_obkv
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let mut object = serde_json::Map::default();
|
let mut object = serde_json::Map::default();
|
||||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||||
let value = obkv.get(fid).unwrap();
|
let value = obkv.get(fid).unwrap();
|
||||||
@ -90,17 +95,20 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
for (id, document) in documents {
|
for (id, document) in compressed_documents {
|
||||||
println!("{id}:");
|
println!("{id}:");
|
||||||
println!("{document}");
|
println!("{document}");
|
||||||
}
|
}
|
||||||
|
|
||||||
let documents = index
|
let compressed_documents = index
|
||||||
.compressed_documents(&txn, docs.documents_ids.iter().copied())
|
.compressed_documents(&txn, docs.documents_ids.iter().copied())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(id, obkv)| {
|
.map(|(id, compressed_obkv)| {
|
||||||
let mut object = serde_json::Map::default();
|
let mut object = serde_json::Map::default();
|
||||||
|
let obkv = compressed_obkv
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||||
let value = obkv.get(fid).unwrap();
|
let value = obkv.get(fid).unwrap();
|
||||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||||
@ -110,7 +118,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||||
for (id, document) in documents {
|
for (id, document) in compressed_documents {
|
||||||
println!("{id}:");
|
println!("{id}:");
|
||||||
println!("{document}");
|
println!("{document}");
|
||||||
}
|
}
|
||||||
|
@ -7,9 +7,6 @@ use obkv::KvReaderU16;
|
|||||||
use zstd::bulk::{Compressor, Decompressor};
|
use zstd::bulk::{Compressor, Decompressor};
|
||||||
use zstd::dict::{DecoderDictionary, EncoderDictionary};
|
use zstd::dict::{DecoderDictionary, EncoderDictionary};
|
||||||
|
|
||||||
// TODO move that elsewhere
|
|
||||||
pub const COMPRESSION_LEVEL: i32 = 12;
|
|
||||||
|
|
||||||
pub struct CompressedObkvCodec;
|
pub struct CompressedObkvCodec;
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for CompressedObkvCodec {
|
impl<'a> heed::BytesDecode<'a> for CompressedObkvCodec {
|
||||||
@ -63,10 +60,13 @@ impl<'a> CompressedKvReaderU16<'a> {
|
|||||||
|
|
||||||
/// Decompresses this KvReader if necessary.
|
/// Decompresses this KvReader if necessary.
|
||||||
pub fn decompress_with_optional_dictionary<'b>(
|
pub fn decompress_with_optional_dictionary<'b>(
|
||||||
&'b self,
|
&self,
|
||||||
buffer: &'b mut Vec<u8>,
|
buffer: &'b mut Vec<u8>,
|
||||||
dictionary: Option<&DecoderDictionary>,
|
dictionary: Option<&DecoderDictionary>,
|
||||||
) -> io::Result<KvReaderU16<'b>> {
|
) -> io::Result<KvReaderU16<'b>>
|
||||||
|
where
|
||||||
|
'a: 'b,
|
||||||
|
{
|
||||||
match dictionary {
|
match dictionary {
|
||||||
Some(dict) => self.decompress_with(buffer, dict),
|
Some(dict) => self.decompress_with(buffer, dict),
|
||||||
None => Ok(self.as_non_compressed()),
|
None => Ok(self.as_non_compressed()),
|
||||||
|
@ -21,7 +21,7 @@ use thiserror::Error;
|
|||||||
pub use self::beu16_str_codec::BEU16StrCodec;
|
pub use self::beu16_str_codec::BEU16StrCodec;
|
||||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||||
pub use self::compressed_obkv_codec::{
|
pub use self::compressed_obkv_codec::{
|
||||||
CompressedKvReaderU16, CompressedKvWriterU16, CompressedObkvCodec, COMPRESSION_LEVEL,
|
CompressedKvReaderU16, CompressedKvWriterU16, CompressedObkvCodec,
|
||||||
};
|
};
|
||||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||||
pub use self::fst_set_codec::FstSetCodec;
|
pub use self::fst_set_codec::FstSetCodec;
|
||||||
|
@ -11,7 +11,7 @@ use roaring::RoaringBitmap;
|
|||||||
use rstar::RTree;
|
use rstar::RTree;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use zstd::dict::DecoderDictionary;
|
use zstd::dict::{DecoderDictionary, EncoderDictionary};
|
||||||
|
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
@ -362,14 +362,30 @@ impl Index {
|
|||||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DOCUMENT_COMPRESSION_DICTIONARY)
|
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DOCUMENT_COMPRESSION_DICTIONARY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the optional dictionnary to be used when reading the OBKV documents.
|
/// Returns the optional raw bytes dictionary to be used when reading or writing the OBKV documents.
|
||||||
pub fn document_compression_dictionary<'t>(
|
pub fn document_compression_raw_dictionary<'t>(
|
||||||
&self,
|
&self,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
) -> heed::Result<Option<&'t [u8]>> {
|
) -> heed::Result<Option<&'t [u8]>> {
|
||||||
self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::DOCUMENT_COMPRESSION_DICTIONARY)
|
self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::DOCUMENT_COMPRESSION_DICTIONARY)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn document_decompression_dictionary<'t>(
|
||||||
|
&self,
|
||||||
|
rtxn: &'t RoTxn,
|
||||||
|
) -> heed::Result<Option<DecoderDictionary<'t>>> {
|
||||||
|
self.document_compression_raw_dictionary(rtxn).map(|opt| opt.map(DecoderDictionary::copy))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn document_compression_dictionary(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
) -> heed::Result<Option<EncoderDictionary<'static>>> {
|
||||||
|
const COMPRESSION_LEVEL: i32 = 19;
|
||||||
|
self.document_compression_raw_dictionary(rtxn)
|
||||||
|
.map(|opt| opt.map(|bytes| EncoderDictionary::copy(bytes, COMPRESSION_LEVEL)))
|
||||||
|
}
|
||||||
|
|
||||||
/* documents ids */
|
/* documents ids */
|
||||||
|
|
||||||
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
|
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
|
||||||
@ -1342,7 +1358,8 @@ impl Index {
|
|||||||
process: "external_id_of",
|
process: "external_id_of",
|
||||||
})
|
})
|
||||||
})?;
|
})?;
|
||||||
let dictionary = self.document_compression_dictionary(rtxn)?.map(DecoderDictionary::copy);
|
let dictionary =
|
||||||
|
self.document_compression_raw_dictionary(rtxn)?.map(DecoderDictionary::copy);
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
Ok(self.iter_compressed_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
|
Ok(self.iter_compressed_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
|
||||||
let (_docid, compressed_obkv) = entry?;
|
let (_docid, compressed_obkv) = entry?;
|
||||||
@ -2476,11 +2493,12 @@ pub(crate) mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let dictionary = index.document_compression_dictionary(&rtxn).unwrap();
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let (_docid, compressed_obkv) = index.compressed_documents(&rtxn, [0]).unwrap()[0];
|
let (_docid, compressed_obkv) = index.compressed_documents(&rtxn, [0]).unwrap().remove(0);
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let obkv =
|
let obkv = compressed_obkv
|
||||||
compressed_obkv.decompress_with_optional_dictionary(&mut buffer, dictionary).unwrap();
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||||
insta::assert_debug_snapshot!(json, @r###"
|
insta::assert_debug_snapshot!(json, @r###"
|
||||||
{
|
{
|
||||||
@ -2489,9 +2507,10 @@ pub(crate) mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
// Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
|
// Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
|
||||||
let (_docid, compressed_obkv) = index.compressed_documents(&rtxn, [2]).unwrap()[0];
|
let (_docid, compressed_obkv) = index.compressed_documents(&rtxn, [2]).unwrap().remove(0);
|
||||||
let obkv =
|
let obkv = compressed_obkv
|
||||||
compressed_obkv.decompress_with_optional_dictionary(&mut buffer, dictionary).unwrap();
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||||
insta::assert_debug_snapshot!(json, @r###"
|
insta::assert_debug_snapshot!(json, @r###"
|
||||||
{
|
{
|
||||||
@ -2500,6 +2519,7 @@ pub(crate) mod tests {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
drop(dictionary);
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// Add new documents again
|
// Add new documents again
|
||||||
@ -2698,11 +2718,16 @@ pub(crate) mod tests {
|
|||||||
} = search.execute().unwrap();
|
} = search.execute().unwrap();
|
||||||
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
||||||
documents_ids.sort_unstable();
|
documents_ids.sort_unstable();
|
||||||
let docs = index.compressed_documents(&rtxn, documents_ids).unwrap();
|
let compressed_docs = index.compressed_documents(&rtxn, documents_ids).unwrap();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
let mut all_ids = HashSet::new();
|
let mut all_ids = HashSet::new();
|
||||||
for (_docid, obkv) in docs {
|
for (_docid, compressed) in compressed_docs {
|
||||||
let id = obkv.get(primary_key_id).unwrap();
|
let doc = compressed
|
||||||
assert!(all_ids.insert(id));
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
let id = doc.get(primary_key_id).unwrap();
|
||||||
|
assert!(all_ids.insert(id.to_vec()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,10 +25,11 @@ fn collect_field_values(
|
|||||||
let mut values = vec![];
|
let mut values = vec![];
|
||||||
let fid = index.fields_ids_map(txn).unwrap().id(fid).unwrap();
|
let fid = index.fields_ids_map(txn).unwrap().id(fid).unwrap();
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let dictionary = index.document_compression_dictionary(txn).unwrap();
|
let dictionary = index.document_decompression_dictionary(txn).unwrap();
|
||||||
for (_id, compressed_doc) in index.compressed_documents(txn, docids.iter().copied()).unwrap() {
|
for (_id, compressed_doc) in index.compressed_documents(txn, docids.iter().copied()).unwrap() {
|
||||||
let doc =
|
let doc = compressed_doc
|
||||||
compressed_doc.decompress_with_optional_dictionary(&mut buffer, dictionary).unwrap();
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
if let Some(v) = doc.get(fid) {
|
if let Some(v) = doc.get(fid) {
|
||||||
let v: serde_json::Value = serde_json::from_slice(v).unwrap();
|
let v: serde_json::Value = serde_json::from_slice(v).unwrap();
|
||||||
let v = v.to_string();
|
let v = v.to_string();
|
||||||
|
@ -407,13 +407,13 @@ pub fn snap_documents(index: &Index) -> String {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let display = fields_ids_map.ids().collect::<Vec<_>>();
|
let display = fields_ids_map.ids().collect::<Vec<_>>();
|
||||||
let dictionary = index.document_compression_dictionary(&rtxn).unwrap();
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
|
||||||
for result in index.all_compressed_documents(&rtxn).unwrap() {
|
for result in index.all_compressed_documents(&rtxn).unwrap() {
|
||||||
let (_id, compressed_document) = result.unwrap();
|
let (_id, compressed_document) = result.unwrap();
|
||||||
let document = compressed_document
|
let document = compressed_document
|
||||||
.decompress_with_optional_dictionary(&mut buffer, dictionary)
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let doc = obkv_to_json(&display, &fields_ids_map, document).unwrap();
|
let doc = obkv_to_json(&display, &fields_ids_map, document).unwrap();
|
||||||
snap.push_str(&serde_json::to_string(&doc).unwrap());
|
snap.push_str(&serde_json::to_string(&doc).unwrap());
|
||||||
|
@ -21,7 +21,6 @@ use serde::{Deserialize, Serialize};
|
|||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};
|
use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};
|
||||||
use zstd::dict::EncoderDictionary;
|
|
||||||
|
|
||||||
use self::enrich::enrich_documents_batch;
|
use self::enrich::enrich_documents_batch;
|
||||||
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
|
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
|
||||||
@ -35,7 +34,7 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
|||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
use crate::error::{Error, InternalError, UserError};
|
use crate::error::{Error, InternalError, UserError};
|
||||||
use crate::heed_codec::{CompressedKvWriterU16, CompressedObkvCodec, COMPRESSION_LEVEL};
|
use crate::heed_codec::{CompressedKvWriterU16, CompressedObkvCodec};
|
||||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
@ -784,8 +783,8 @@ where
|
|||||||
// TODO make this 64_000 const
|
// TODO make this 64_000 const
|
||||||
let dictionary = zstd::dict::from_continuous(&sample_data, &sample_sizes, 64_000)?;
|
let dictionary = zstd::dict::from_continuous(&sample_data, &sample_sizes, 64_000)?;
|
||||||
self.index.put_document_compression_dictionary(self.wtxn, &dictionary)?;
|
self.index.put_document_compression_dictionary(self.wtxn, &dictionary)?;
|
||||||
// TODO use declare the level 3 as a const
|
// safety: We just set the dictionary above, it must be there when we get it back.
|
||||||
let dictionary = EncoderDictionary::copy(&dictionary, COMPRESSION_LEVEL);
|
let dictionary = self.index.document_compression_dictionary(self.wtxn)?.unwrap();
|
||||||
|
|
||||||
// TODO do not remap types here but rather expose the &[u8] for the KvReaderU16
|
// TODO do not remap types here but rather expose the &[u8] for the KvReaderU16
|
||||||
let mut iter = self.index.documents.iter_mut(self.wtxn)?;
|
let mut iter = self.index.documents.iter_mut(self.wtxn)?;
|
||||||
@ -901,6 +900,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn simple_document_merge() {
|
fn simple_document_merge() {
|
||||||
let mut index = TempIndex::new();
|
let mut index = TempIndex::new();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
||||||
|
|
||||||
// First we send 3 documents with duplicate ids and
|
// First we send 3 documents with duplicate ids and
|
||||||
@ -919,16 +919,21 @@ mod tests {
|
|||||||
assert_eq!(count, 1);
|
assert_eq!(count, 1);
|
||||||
|
|
||||||
// Check that we get only one document from the database.
|
// Check that we get only one document from the database.
|
||||||
let docs = index.compressed_documents(&rtxn, Some(0)).unwrap();
|
let mut compressed_docs = index.compressed_documents(&rtxn, Some(0)).unwrap();
|
||||||
assert_eq!(docs.len(), 1);
|
assert_eq!(compressed_docs.len(), 1);
|
||||||
let (id, doc) = docs[0];
|
let (id, compressed_doc) = compressed_docs.remove(0);
|
||||||
assert_eq!(id, 0);
|
assert_eq!(id, 0);
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
// Check that this document is equal to the last one sent.
|
// Check that this document is equal to the last one sent.
|
||||||
let mut doc_iter = doc.iter();
|
let mut doc_iter = doc.iter();
|
||||||
assert_eq!(doc_iter.next(), Some((0, &b"1"[..])));
|
assert_eq!(doc_iter.next(), Some((0, &b"1"[..])));
|
||||||
assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
|
assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
|
||||||
assert_eq!(doc_iter.next(), None);
|
assert_eq!(doc_iter.next(), None);
|
||||||
|
drop(dictionary);
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// Second we send 1 document with id 1, to force it to be merged with the previous one.
|
// Second we send 1 document with id 1, to force it to be merged with the previous one.
|
||||||
@ -940,10 +945,14 @@ mod tests {
|
|||||||
assert_eq!(count, 1);
|
assert_eq!(count, 1);
|
||||||
|
|
||||||
// Check that we get only one document from the database.
|
// Check that we get only one document from the database.
|
||||||
let docs = index.compressed_documents(&rtxn, Some(0)).unwrap();
|
let mut compressed_docs = index.compressed_documents(&rtxn, Some(0)).unwrap();
|
||||||
assert_eq!(docs.len(), 1);
|
assert_eq!(compressed_docs.len(), 1);
|
||||||
let (id, doc) = docs[0];
|
let (id, compressed_doc) = compressed_docs.remove(0);
|
||||||
assert_eq!(id, 0);
|
assert_eq!(id, 0);
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
// Check that this document is equal to the last one sent.
|
// Check that this document is equal to the last one sent.
|
||||||
let mut doc_iter = doc.iter();
|
let mut doc_iter = doc.iter();
|
||||||
@ -951,6 +960,7 @@ mod tests {
|
|||||||
assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
|
assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
|
||||||
assert_eq!(doc_iter.next(), Some((2, &b"25"[..])));
|
assert_eq!(doc_iter.next(), Some((2, &b"25"[..])));
|
||||||
assert_eq!(doc_iter.next(), None);
|
assert_eq!(doc_iter.next(), None);
|
||||||
|
drop(dictionary);
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -975,6 +985,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn simple_auto_generated_documents_ids() {
|
fn simple_auto_generated_documents_ids() {
|
||||||
let mut index = TempIndex::new();
|
let mut index = TempIndex::new();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
index.index_documents_config.autogenerate_docids = true;
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
// First we send 3 documents with ids from 1 to 3.
|
// First we send 3 documents with ids from 1 to 3.
|
||||||
index
|
index
|
||||||
@ -987,12 +998,26 @@ mod tests {
|
|||||||
|
|
||||||
// Check that there is 3 documents now.
|
// Check that there is 3 documents now.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let count = index.number_of_documents(&rtxn).unwrap();
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
assert_eq!(count, 3);
|
assert_eq!(count, 3);
|
||||||
|
|
||||||
let docs = index.compressed_documents(&rtxn, vec![0, 1, 2]).unwrap();
|
let compressed_docs = index.compressed_documents(&rtxn, vec![0, 1, 2]).unwrap();
|
||||||
let (_id, obkv) = docs.iter().find(|(_id, kv)| kv.get(0) == Some(br#""kevin""#)).unwrap();
|
let (_id, compressed_obkv) = compressed_docs
|
||||||
|
.iter()
|
||||||
|
.find(|(_id, compressed_doc)| {
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
doc.get(0) == Some(br#""kevin""#)
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let obkv = compressed_obkv
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let kevin_uuid: String = serde_json::from_slice(obkv.get(1).unwrap()).unwrap();
|
let kevin_uuid: String = serde_json::from_slice(obkv.get(1).unwrap()).unwrap();
|
||||||
|
drop(dictionary);
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// Second we send 1 document with the generated uuid, to erase the previous ones.
|
// Second we send 1 document with the generated uuid, to erase the previous ones.
|
||||||
@ -1000,21 +1025,34 @@ mod tests {
|
|||||||
|
|
||||||
// Check that there is **always** 3 documents.
|
// Check that there is **always** 3 documents.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let count = index.number_of_documents(&rtxn).unwrap();
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
assert_eq!(count, 3);
|
assert_eq!(count, 3);
|
||||||
|
|
||||||
// the document 0 has been deleted and reinserted with the id 3
|
// the document 0 has been deleted and reinserted with the id 3
|
||||||
let docs = index.compressed_documents(&rtxn, vec![1, 2, 0]).unwrap();
|
let mut compressed_docs = index.compressed_documents(&rtxn, vec![1, 2, 0]).unwrap();
|
||||||
let kevin_position =
|
let kevin_position = compressed_docs
|
||||||
docs.iter().position(|(_, d)| d.get(0).unwrap() == br#""updated kevin""#).unwrap();
|
.iter()
|
||||||
|
.position(|(_, compressed_doc)| {
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
doc.get(0).unwrap() == br#""updated kevin""#
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
assert_eq!(kevin_position, 2);
|
assert_eq!(kevin_position, 2);
|
||||||
let (_, doc) = docs[kevin_position];
|
let (_, compressed_doc) = compressed_docs.remove(kevin_position);
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
// Check that this document is equal to the last
|
// Check that this document is equal to the last
|
||||||
// one sent and that an UUID has been generated.
|
// one sent and that an UUID has been generated.
|
||||||
assert_eq!(doc.get(0), Some(&br#""updated kevin""#[..]));
|
assert_eq!(doc.get(0), Some(&br#""updated kevin""#[..]));
|
||||||
// This is an UUID, it must be 36 bytes long plus the 2 surrounding string quotes (").
|
// This is an UUID, it must be 36 bytes long plus the 2 surrounding string quotes (").
|
||||||
assert_eq!(doc.get(1).unwrap().len(), 36 + 2);
|
assert_eq!(doc.get(1).unwrap().len(), 36 + 2);
|
||||||
|
drop(dictionary);
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,7 +11,6 @@ use obkv::{KvReader, KvReaderU16, KvWriter};
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use smartstring::SmartString;
|
use smartstring::SmartString;
|
||||||
use zstd::dict::DecoderDictionary;
|
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, create_writer, keep_first, obkvs_keep_last_addition_merge_deletions,
|
create_sorter, create_writer, keep_first, obkvs_keep_last_addition_merge_deletions,
|
||||||
@ -169,8 +168,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let external_documents_ids = self.index.external_documents_ids();
|
let external_documents_ids = self.index.external_documents_ids();
|
||||||
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
|
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
|
||||||
|
|
||||||
let dictionary =
|
let dictionary = self.index.document_decompression_dictionary(wtxn)?;
|
||||||
self.index.document_compression_dictionary(wtxn)?.map(DecoderDictionary::copy);
|
|
||||||
let primary_key = cursor.primary_key().to_string();
|
let primary_key = cursor.primary_key().to_string();
|
||||||
let primary_key_id =
|
let primary_key_id =
|
||||||
self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;
|
self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;
|
||||||
@ -350,9 +348,12 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
documents_seen: documents_count,
|
documents_seen: documents_count,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
drop(dictionary);
|
||||||
|
|
||||||
self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
|
self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
|
||||||
self.index.put_primary_key(wtxn, &primary_key)?;
|
self.index.put_primary_key(wtxn, &primary_key)?;
|
||||||
self.documents_count += documents_count;
|
self.documents_count += documents_count;
|
||||||
|
|
||||||
// Now that we have a valid sorter that contains the user id and the obkv we
|
// Now that we have a valid sorter that contains the user id and the obkv we
|
||||||
// give it to the last transforming function which returns the TransformOutput.
|
// give it to the last transforming function which returns the TransformOutput.
|
||||||
Ok(documents_count)
|
Ok(documents_count)
|
||||||
@ -1037,8 +1038,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
|
|
||||||
if original_sorter.is_some() || flattened_sorter.is_some() {
|
if original_sorter.is_some() || flattened_sorter.is_some() {
|
||||||
let modified_faceted_fields = settings_diff.modified_faceted_fields();
|
let modified_faceted_fields = settings_diff.modified_faceted_fields();
|
||||||
let dictionary =
|
let dictionary = self.index.document_decompression_dictionary(wtxn)?;
|
||||||
self.index.document_compression_dictionary(wtxn)?.map(DecoderDictionary::copy);
|
|
||||||
|
|
||||||
let mut original_obkv_buffer = Vec::new();
|
let mut original_obkv_buffer = Vec::new();
|
||||||
let mut flattened_obkv_buffer = Vec::new();
|
let mut flattened_obkv_buffer = Vec::new();
|
||||||
|
@ -10,7 +10,6 @@ use heed::types::Bytes;
|
|||||||
use heed::{BytesDecode, RwTxn};
|
use heed::{BytesDecode, RwTxn};
|
||||||
use obkv::{KvReader, KvWriter};
|
use obkv::{KvReader, KvWriter};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use zstd::dict::EncoderDictionary;
|
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
self, keep_first, merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
|
self, keep_first, merge_deladd_btreeset_string, merge_deladd_cbo_roaring_bitmaps,
|
||||||
@ -20,7 +19,7 @@ use super::helpers::{
|
|||||||
use super::MergeFn;
|
use super::MergeFn;
|
||||||
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
|
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::{CompressedKvWriterU16, COMPRESSION_LEVEL};
|
use crate::heed_codec::CompressedKvWriterU16;
|
||||||
use crate::index::db_name::DOCUMENTS;
|
use crate::index::db_name::DOCUMENTS;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::proximity::MAX_DISTANCE;
|
use crate::proximity::MAX_DISTANCE;
|
||||||
@ -164,10 +163,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|IndexEmbeddingConfig { name, .. }| name)
|
.map(|IndexEmbeddingConfig { name, .. }| name)
|
||||||
.collect();
|
.collect();
|
||||||
// TODO declare the compression ratio as a const
|
let dictionary = index.document_compression_dictionary(wtxn)?;
|
||||||
let dictionary = index
|
|
||||||
.document_compression_dictionary(wtxn)?
|
|
||||||
.map(|dict| EncoderDictionary::copy(dict, COMPRESSION_LEVEL));
|
|
||||||
let mut vectors_buffer = Vec::new();
|
let mut vectors_buffer = Vec::new();
|
||||||
while let Some((key, reader)) = iter.next()? {
|
while let Some((key, reader)) = iter.next()? {
|
||||||
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
||||||
|
@ -1769,6 +1769,8 @@ mod tests {
|
|||||||
|
|
||||||
// Check that the searchable field is correctly set to "name" only.
|
// Check that the searchable field is correctly set to "name" only.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
// When we search for something that is not in
|
// When we search for something that is not in
|
||||||
// the searchable fields it must not return any document.
|
// the searchable fields it must not return any document.
|
||||||
let result = index.search(&rtxn).query("23").execute().unwrap();
|
let result = index.search(&rtxn).query("23").execute().unwrap();
|
||||||
@ -1777,10 +1779,17 @@ mod tests {
|
|||||||
// When we search for something that is in the searchable fields
|
// When we search for something that is in the searchable fields
|
||||||
// we must find the appropriate document.
|
// we must find the appropriate document.
|
||||||
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
||||||
let documents = index.compressed_documents(&rtxn, result.documents_ids).unwrap();
|
let mut compressed_documents =
|
||||||
|
index.compressed_documents(&rtxn, result.documents_ids).unwrap();
|
||||||
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
assert_eq!(documents.len(), 1);
|
assert_eq!(compressed_documents.len(), 1);
|
||||||
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
let (_id, compressed_document) = compressed_documents.remove(0);
|
||||||
|
let document = compressed_document
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(document.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||||
|
drop(dictionary);
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
@ -1805,6 +1814,7 @@ mod tests {
|
|||||||
|
|
||||||
// Check that the searchable field have been reset and documents are found now.
|
// Check that the searchable field have been reset and documents are found now.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
|
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
|
||||||
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
|
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
|
||||||
@ -1813,8 +1823,13 @@ mod tests {
|
|||||||
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
|
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
|
||||||
let result = index.search(&rtxn).query("23").execute().unwrap();
|
let result = index.search(&rtxn).query("23").execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
let documents = index.compressed_documents(&rtxn, result.documents_ids).unwrap();
|
let mut compressed_documents =
|
||||||
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
index.compressed_documents(&rtxn, result.documents_ids).unwrap();
|
||||||
|
let (_id, compressed_document) = compressed_documents.remove(0);
|
||||||
|
let document = compressed_document
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(document.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -1949,15 +1964,20 @@ mod tests {
|
|||||||
|
|
||||||
// Check that the displayed fields are correctly set.
|
// Check that the displayed fields are correctly set.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||||
assert_eq!(fields_ids, hashset! { S("age") });
|
assert_eq!(fields_ids, hashset! { S("age") });
|
||||||
// Only count the field_id 0 and level 0 facet values.
|
// Only count the field_id 0 and level 0 facet values.
|
||||||
// TODO we must support typed CSVs for numbers to be understood.
|
// TODO we must support typed CSVs for numbers to be understood.
|
||||||
let fidmap = index.fields_ids_map(&rtxn).unwrap();
|
let fidmap = index.fields_ids_map(&rtxn).unwrap();
|
||||||
for document in index.all_compressed_documents(&rtxn).unwrap() {
|
for result in index.all_compressed_documents(&rtxn).unwrap() {
|
||||||
let document = document.unwrap();
|
let (_id, compressed_document) = result.unwrap();
|
||||||
let json = crate::obkv_to_json(&fidmap.ids().collect::<Vec<_>>(), &fidmap, document.1)
|
let document = compressed_document
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let json =
|
||||||
|
crate::obkv_to_json(&fidmap.ids().collect::<Vec<_>>(), &fidmap, document).unwrap();
|
||||||
println!("json: {:?}", json);
|
println!("json: {:?}", json);
|
||||||
}
|
}
|
||||||
let count = index
|
let count = index
|
||||||
@ -1968,6 +1988,7 @@ mod tests {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.count();
|
.count();
|
||||||
assert_eq!(count, 3);
|
assert_eq!(count, 3);
|
||||||
|
drop(dictionary);
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// Index a little more documents with new and current facets values.
|
// Index a little more documents with new and current facets values.
|
||||||
@ -2057,6 +2078,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn set_asc_desc_field() {
|
fn set_asc_desc_field() {
|
||||||
let mut index = TempIndex::new();
|
let mut index = TempIndex::new();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
index.index_documents_config.autogenerate_docids = true;
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
// Set the filterable fields to be the age.
|
// Set the filterable fields to be the age.
|
||||||
@ -2078,12 +2100,16 @@ mod tests {
|
|||||||
|
|
||||||
// Run an empty query just to ensure that the search results are ordered.
|
// Run an empty query just to ensure that the search results are ordered.
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
||||||
let documents = index.compressed_documents(&rtxn, documents_ids).unwrap();
|
let compressed_documents = index.compressed_documents(&rtxn, documents_ids).unwrap();
|
||||||
|
|
||||||
// Fetch the documents "age" field in the ordre in which the documents appear.
|
// Fetch the documents "age" field in the ordre in which the documents appear.
|
||||||
let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap();
|
let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap();
|
||||||
let iter = documents.into_iter().map(|(_, doc)| {
|
let iter = compressed_documents.into_iter().map(|(_, compressed_doc)| {
|
||||||
|
let doc = compressed_doc
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
let bytes = doc.get(age_field_id).unwrap();
|
let bytes = doc.get(age_field_id).unwrap();
|
||||||
let string = std::str::from_utf8(bytes).unwrap();
|
let string = std::str::from_utf8(bytes).unwrap();
|
||||||
string.parse::<u32>().unwrap()
|
string.parse::<u32>().unwrap()
|
||||||
@ -2480,6 +2506,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn setting_impact_relevancy() {
|
fn setting_impact_relevancy() {
|
||||||
let mut index = TempIndex::new();
|
let mut index = TempIndex::new();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
index.index_documents_config.autogenerate_docids = true;
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
// Set the genres setting
|
// Set the genres setting
|
||||||
@ -2513,7 +2540,11 @@ mod tests {
|
|||||||
let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap();
|
let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap();
|
||||||
let first_id = documents_ids[0];
|
let first_id = documents_ids[0];
|
||||||
let documents = index.compressed_documents(&rtxn, documents_ids).unwrap();
|
let documents = index.compressed_documents(&rtxn, documents_ids).unwrap();
|
||||||
let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap();
|
let (_, compressed_content) = documents.iter().find(|(id, _)| *id == first_id).unwrap();
|
||||||
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
|
let content = compressed_content
|
||||||
|
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap();
|
let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap();
|
||||||
let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap();
|
let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap();
|
||||||
|
@ -317,8 +317,20 @@ fn criteria_ascdesc() {
|
|||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let documents =
|
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||||
index.all_compressed_documents(&rtxn).unwrap().map(|doc| doc.unwrap()).collect::<Vec<_>>();
|
let mut buffers = vec![Vec::new(); index.number_of_documents(&rtxn).unwrap() as usize];
|
||||||
|
let documents = index
|
||||||
|
.all_compressed_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.zip(buffers.iter_mut())
|
||||||
|
.map(|(compressed, buffer)| {
|
||||||
|
let (id, compressed) = compressed.unwrap();
|
||||||
|
let doc = compressed
|
||||||
|
.decompress_with_optional_dictionary(buffer, dictionary.as_ref())
|
||||||
|
.unwrap();
|
||||||
|
(id, doc)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
for criterion in [Asc(S("name")), Desc(S("name")), Asc(S("age")), Desc(S("age"))] {
|
for criterion in [Asc(S("name")), Desc(S("name")), Asc(S("age")), Desc(S("age"))] {
|
||||||
eprintln!("Testing with criterion: {:?}", &criterion);
|
eprintln!("Testing with criterion: {:?}", &criterion);
|
||||||
|
Loading…
Reference in New Issue
Block a user