diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 71f281e98..4cf56b563 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::collections::HashSet; use std::fs::File; -use std::io::{self, Seek, SeekFrom}; +use std::io::{self, Seek, SeekFrom, BufReader, BufRead}; use std::num::{NonZeroU32, NonZeroUsize}; use std::str; use std::sync::mpsc::sync_channel; @@ -327,6 +327,16 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { R: io::Read, F: Fn(UpdateIndexingStep, u64) + Sync, { + let mut reader = BufReader::new(reader); + reader.fill_buf()?; + + // Early return when there is no document to add + if reader.buffer().is_empty() { + return Ok(DocumentAdditionResult { + nb_documents: 0, + }) + } + self.index.set_updated_at(self.wtxn, &Utc::now())?; let before_transform = Instant::now(); let update_id = self.update_id; diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index e029a5135..fd508d6a4 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -47,6 +47,10 @@ pub struct Transform<'t, 'i> { pub autogenerate_docids: bool, } +fn is_primary_key(field: impl AsRef) -> bool { + field.as_ref().to_lowercase().contains(DEFAULT_PRIMARY_KEY_NAME) +} + impl Transform<'_, '_> { pub fn output_from_json(self, reader: R, progress_callback: F) -> anyhow::Result where @@ -91,8 +95,12 @@ impl Transform<'_, '_> { // We extract the primary key from the first document in // the batch if it hasn't already been defined in the index - let first = documents.peek().and_then(|r| r.as_ref().ok()); - let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned()); + let first = match documents.peek().map(Result::as_ref).transpose() { + Ok(first) => first, + Err(_) => return Err(documents.next().unwrap().unwrap_err().into()), + }; + + let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned()); let (primary_key_id, primary_key) = compute_primary_key_pair( self.index.primary_key(self.rtxn)?, &mut fields_ids_map, @@ -232,7 +240,7 @@ impl Transform<'_, '_> { // The primary key is known so we must find the position in the CSV headers. headers.iter().position(|h| h == primary_key) }, - None => headers.iter().position(|h| h.contains("id")), + None => headers.iter().position(is_primary_key), }; // Returns the field id in the fields ids map, create an "id" field