Remove the now, useless AppendOnlyVec library

This commit is contained in:
Clément Renault 2024-10-07 16:38:45 +02:00
parent c11b7e5c0f
commit 83c09d0db0
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
5 changed files with 21 additions and 184 deletions

View File

@ -1,158 +0,0 @@
use std::sync::atomic::AtomicPtr;
use std::{fmt, mem};
/// An append-only linked-list that returns a mutable references to the pushed items.
pub struct AppendOnlyLinkedList<T> {
head: AtomicPtr<Node<T>>,
}
struct Node<T> {
item: T,
next: AtomicPtr<Node<T>>,
}
impl<T> AppendOnlyLinkedList<T> {
/// Creates an empty list.
pub fn new() -> AppendOnlyLinkedList<T> {
AppendOnlyLinkedList { head: AtomicPtr::default() }
}
/// Pushes the item at the front of the linked-list and returns a unique and mutable reference to it.
#[allow(clippy::mut_from_ref)] // the mut ref is derived from T and unique each time
pub fn push(&self, item: T) -> &mut T {
use std::sync::atomic::Ordering::{Relaxed, SeqCst};
let node = Box::leak(Box::new(Node { item, next: AtomicPtr::default() }));
let mut head = self.head.load(SeqCst);
loop {
std::hint::spin_loop();
node.next = AtomicPtr::new(head);
match self.head.compare_exchange_weak(head, node, SeqCst, Relaxed) {
Ok(_) => break,
Err(new) => head = new,
}
}
&mut node.item
}
}
impl<T> Default for AppendOnlyLinkedList<T> {
fn default() -> Self {
Self::new()
}
}
impl<T> Drop for AppendOnlyLinkedList<T> {
fn drop(&mut self) {
// Let's use the drop implementation of the IntoIter struct
IntoIter(mem::take(&mut self.head));
}
}
impl<T> fmt::Debug for AppendOnlyLinkedList<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("AppendOnlyLinkedList").finish()
}
}
impl<T> IntoIterator for AppendOnlyLinkedList<T> {
type Item = T;
type IntoIter = IntoIter<T>;
fn into_iter(mut self) -> Self::IntoIter {
IntoIter(mem::take(&mut self.head))
}
}
pub struct IntoIter<T>(AtomicPtr<Node<T>>);
impl<T> Iterator for IntoIter<T> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
let ptr = *self.0.get_mut();
if ptr.is_null() {
None
} else {
let node = unsafe { Box::from_raw(ptr) };
// Let's set the next node to read to be the next of this one
self.0 = node.next;
Some(node.item)
}
}
}
impl<T> Drop for IntoIter<T> {
fn drop(&mut self) {
let mut ptr = *self.0.get_mut();
while !ptr.is_null() {
let mut node = unsafe { Box::from_raw(ptr) };
// Let's set the next node to read to be the next of this one
ptr = *node.next.get_mut();
}
}
}
#[test]
fn test_parallel_pushing() {
use std::sync::Arc;
let v = Arc::new(AppendOnlyLinkedList::<u64>::new());
let mut threads = Vec::new();
const N: u64 = 100;
for thread_num in 0..N {
let v = v.clone();
threads.push(std::thread::spawn(move || {
let which1 = v.push(thread_num);
let which2 = v.push(thread_num);
assert_eq!(*which1, thread_num);
assert_eq!(*which2, thread_num);
}));
}
for t in threads {
t.join().unwrap();
}
let v = Arc::into_inner(v).unwrap().into_iter().collect::<Vec<_>>();
for thread_num in (0..N).rev() {
assert_eq!(2, v.iter().copied().filter(|&x| x == thread_num).count());
}
}
#[test]
fn test_into_vec() {
struct SafeToDrop(bool);
impl Drop for SafeToDrop {
fn drop(&mut self) {
assert!(self.0);
}
}
let v = AppendOnlyLinkedList::new();
for _ in 0..50 {
v.push(SafeToDrop(false));
}
let mut v = v.into_iter().collect::<Vec<_>>();
assert_eq!(v.len(), 50);
for i in v.iter_mut() {
i.0 = true;
}
}
#[test]
fn test_push_then_index_mut() {
let v = AppendOnlyLinkedList::<usize>::new();
let mut w = Vec::new();
for i in 0..1024 {
*v.push(i) += 1;
w.push(i + 1);
}
let mut v = v.into_iter().collect::<Vec<_>>();
v.reverse();
assert_eq!(v, w);
}

View File

@ -14,7 +14,6 @@ use super::super::cache::CboCachedSorter;
use super::facet_document::extract_document_facets; use super::facet_document::extract_document_facets;
use super::FacetKind; use super::FacetKind;
use crate::facet::value_encoding::f64_into_bytes; use crate::facet::value_encoding::f64_into_bytes;
use crate::update::new::append_only_linked_list::AppendOnlyLinkedList;
use crate::update::new::extract::DocidsExtractor; use crate::update::new::extract::DocidsExtractor;
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt; use crate::update::new::parallel_iterator_ext::ParallelIteratorExt;
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
@ -212,18 +211,17 @@ impl DocidsExtractor for FacetedDocidsExtractor {
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
let attributes_to_extract: Vec<_> = let attributes_to_extract: Vec<_> =
attributes_to_extract.iter().map(|s| s.as_ref()).collect(); attributes_to_extract.iter().map(|s| s.as_ref()).collect();
let caches = AppendOnlyLinkedList::new(); let thread_local = ThreadLocal::new();
{ {
let span = let span =
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
let _entered = span.enter(); let _entered = span.enter();
let local = ThreadLocal::new();
document_changes.into_par_iter().try_arc_for_each_try_init( document_changes.into_par_iter().try_arc_for_each_try_init(
|| { || {
local.get_or_try(|| { thread_local.get_or_try(|| {
let rtxn = index.read_txn().map_err(Error::from)?; let rtxn = index.read_txn().map_err(Error::from)?;
let cache = caches.push(CboCachedSorter::new( let cache = CboCachedSorter::new(
/// TODO use a better value /// TODO use a better value
100.try_into().unwrap(), 100.try_into().unwrap(),
create_sorter( create_sorter(
@ -234,7 +232,7 @@ impl DocidsExtractor for FacetedDocidsExtractor {
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
), ),
)); );
Ok((rtxn, RefCell::new((fields_ids_map.clone(), Vec::new(), cache)))) Ok((rtxn, RefCell::new((fields_ids_map.clone(), Vec::new(), cache))))
}) })
}, },
@ -259,10 +257,11 @@ impl DocidsExtractor for FacetedDocidsExtractor {
tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
let _entered = span.enter(); let _entered = span.enter();
let readers: Vec<_> = caches let readers: Vec<_> = thread_local
.into_iter() .into_iter()
.par_bridge() .par_bridge()
.map(|cached_sorter| { .map(|(_, rc)| {
let (_, _, cached_sorter) = rc.into_inner();
let sorter = cached_sorter.into_sorter()?; let sorter = cached_sorter.into_sorter()?;
sorter.into_reader_cursors() sorter.into_reader_cursors()
}) })

View File

@ -11,7 +11,6 @@ use thread_local::ThreadLocal;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use super::SearchableExtractor; use super::SearchableExtractor;
use crate::update::new::append_only_linked_list::AppendOnlyLinkedList;
use crate::update::new::extract::cache::CboCachedSorter; use crate::update::new::extract::cache::CboCachedSorter;
use crate::update::new::extract::perm_json_p::contained_in; use crate::update::new::extract::perm_json_p::contained_in;
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt; use crate::update::new::parallel_iterator_ext::ParallelIteratorExt;
@ -343,24 +342,23 @@ impl WordDocidsExtractors {
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE, max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
}; };
let caches = AppendOnlyLinkedList::new(); let thread_local = ThreadLocal::new();
{ {
let span = let span =
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
let _entered = span.enter(); let _entered = span.enter();
let local = ThreadLocal::new();
document_changes.into_par_iter().try_arc_for_each_try_init( document_changes.into_par_iter().try_arc_for_each_try_init(
|| { || {
local.get_or_try(|| { thread_local.get_or_try(|| {
let rtxn = index.read_txn().map_err(Error::from)?; let rtxn = index.read_txn().map_err(Error::from)?;
let fields_ids_map = fields_ids_map.clone(); let fields_ids_map = fields_ids_map.clone();
let cache = caches.push(WordDocidsCachedSorters::new( let cache = WordDocidsCachedSorters::new(
indexer, indexer,
max_memory, max_memory,
// TODO use a better value // TODO use a better value
200_000.try_into().unwrap(), 200_000.try_into().unwrap(),
)); );
Ok((rtxn, &document_tokenizer, RefCell::new((fields_ids_map, cache)))) Ok((rtxn, &document_tokenizer, RefCell::new((fields_ids_map, cache))))
}) })
}, },
@ -384,9 +382,8 @@ impl WordDocidsExtractors {
tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
let _entered = span.enter(); let _entered = span.enter();
let mut builder = WordDocidsMergerBuilders::new(); let mut builder = WordDocidsMergerBuilders::new();
let mut count = 0; for (_, _, rc) in thread_local.into_iter() {
for cache in caches.into_iter() { let (_, cache) = rc.into_inner();
count += 1;
builder.add_sorters(cache)?; builder.add_sorters(cache)?;
} }

View File

@ -16,7 +16,6 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
use super::cache::CboCachedSorter; use super::cache::CboCachedSorter;
use super::DocidsExtractor; use super::DocidsExtractor;
use crate::update::new::append_only_linked_list::AppendOnlyLinkedList;
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt; use crate::update::new::parallel_iterator_ext::ParallelIteratorExt;
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
@ -60,18 +59,18 @@ pub trait SearchableExtractor {
localized_attributes_rules: &localized_attributes_rules, localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE, max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
}; };
let caches = AppendOnlyLinkedList::new();
let thread_local = ThreadLocal::new();
{ {
let span = let span =
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
let _entered = span.enter(); let _entered = span.enter();
let local = ThreadLocal::new();
document_changes.into_par_iter().try_arc_for_each_try_init( document_changes.into_par_iter().try_arc_for_each_try_init(
|| { || {
local.get_or_try(|| { thread_local.get_or_try(|| {
let rtxn = index.read_txn().map_err(Error::from)?; let rtxn = index.read_txn().map_err(Error::from)?;
let cache = caches.push(CboCachedSorter::new( let cache = CboCachedSorter::new(
/// TODO use a better value /// TODO use a better value
1_000_000.try_into().unwrap(), 1_000_000.try_into().unwrap(),
create_sorter( create_sorter(
@ -82,7 +81,7 @@ pub trait SearchableExtractor {
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
), ),
)); );
Ok(( Ok((
rtxn, rtxn,
&document_tokenizer, &document_tokenizer,
@ -110,10 +109,11 @@ pub trait SearchableExtractor {
tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
let _entered = span.enter(); let _entered = span.enter();
let readers: Vec<_> = caches let readers: Vec<_> = thread_local
.into_iter() .into_iter()
.par_bridge() .par_bridge()
.map(|cached_sorter| { .map(|(_, _, rc)| {
let (_, cached_sorter) = rc.into_inner();
let sorter = cached_sorter.into_sorter()?; let sorter = cached_sorter.into_sorter()?;
sorter.into_reader_cursors() sorter.into_reader_cursors()
}) })

View File

@ -4,7 +4,6 @@ pub use top_level_map::{CowStr, TopLevelMap};
use super::del_add::DelAdd; use super::del_add::DelAdd;
use crate::FieldId; use crate::FieldId;
mod append_only_linked_list;
mod channel; mod channel;
mod document_change; mod document_change;
mod extract; mod extract;