mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Use smartstring to store the external id in our hashmap
We need to store all the external id (primary key) in a hashmap associated to their internal id during. The smartstring remove heap allocation / memory usage and should improve the cache locality.
This commit is contained in:
parent
456887a54a
commit
ee64f4a936
@ -32,6 +32,7 @@ serde_json = { version = "1.0.79", features = ["preserve_order"] }
|
|||||||
slice-group-by = "0.3.0"
|
slice-group-by = "0.3.0"
|
||||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||||
smallvec = "1.8.0"
|
smallvec = "1.8.0"
|
||||||
|
smartstring = "1.0.1"
|
||||||
tempfile = "3.3.0"
|
tempfile = "3.3.0"
|
||||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
uuid = { version = "0.8.2", features = ["v4"] }
|
uuid = { version = "0.8.2", features = ["v4"] }
|
||||||
|
@ -1109,8 +1109,11 @@ mod tests {
|
|||||||
|
|
||||||
let mut big_object = HashMap::new();
|
let mut big_object = HashMap::new();
|
||||||
big_object.insert(S("id"), "wow");
|
big_object.insert(S("id"), "wow");
|
||||||
let content: String =
|
let content: String = (0..=u16::MAX)
|
||||||
(0..=u16::MAX).into_iter().map(|p| p.to_string()).reduce(|a, b| a + " " + &b).unwrap();
|
.into_iter()
|
||||||
|
.map(|p| p.to_string())
|
||||||
|
.reduce(|a, b| a + " " + b.as_ref())
|
||||||
|
.unwrap();
|
||||||
big_object.insert("content".to_string(), &content);
|
big_object.insert("content".to_string(), &content);
|
||||||
|
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut cursor = Cursor::new(Vec::new());
|
||||||
|
@ -11,6 +11,7 @@ use itertools::Itertools;
|
|||||||
use obkv::{KvReader, KvWriter};
|
use obkv::{KvReader, KvWriter};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
use smartstring::SmartString;
|
||||||
|
|
||||||
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
|
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
|
||||||
use super::{IndexDocumentsMethod, IndexerConfig};
|
use super::{IndexDocumentsMethod, IndexerConfig};
|
||||||
@ -55,7 +56,8 @@ pub struct Transform<'a, 'i> {
|
|||||||
flattened_sorter: grenad::Sorter<MergeFn>,
|
flattened_sorter: grenad::Sorter<MergeFn>,
|
||||||
replaced_documents_ids: RoaringBitmap,
|
replaced_documents_ids: RoaringBitmap,
|
||||||
new_documents_ids: RoaringBitmap,
|
new_documents_ids: RoaringBitmap,
|
||||||
new_external_documents_ids_builder: FxHashMap<Vec<u8>, u64>,
|
// To increase the cache locality and the heap usage we use smartstring.
|
||||||
|
new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>,
|
||||||
documents_count: usize,
|
documents_count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,10 +256,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
None => {
|
None => {
|
||||||
// if the document has already been inserted in this
|
// if the document has already been inserted in this
|
||||||
// batch we need to get its docid
|
// batch we need to get its docid
|
||||||
match self
|
match self.new_external_documents_ids_builder.entry(external_id.into()) {
|
||||||
.new_external_documents_ids_builder
|
|
||||||
.entry(external_id.as_bytes().to_vec())
|
|
||||||
{
|
|
||||||
Entry::Occupied(entry) => (*entry.get() as u32, false),
|
Entry::Occupied(entry) => (*entry.get() as u32, false),
|
||||||
// if the document has never been encountered we give it a new docid
|
// if the document has never been encountered we give it a new docid
|
||||||
// and push this new docid to the external documents ids builder
|
// and push this new docid to the external documents ids builder
|
||||||
|
Loading…
Reference in New Issue
Block a user