193: Fix primary key behavior r=Kerollmops a=MarinPostma

this pr:
- Adds early returns on empty document additions, avoiding error messages to be returned when adding no documents and no primary key was set.
- Changes the primary key inference logic to match that of legacy meilisearch.

close #194 

Co-authored-by: Marin Postma <postma.marin@protonmail.com>
Co-authored-by: marin postma <postma.marin@protonmail.com>
This commit is contained in:
bors[bot] 2021-06-03 10:24:21 +00:00 committed by GitHub
commit 39ed133f9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 4 deletions

View File

@ -1,7 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashSet; use std::collections::HashSet;
use std::fs::File; use std::fs::File;
use std::io::{self, Seek, SeekFrom}; use std::io::{self, Seek, SeekFrom, BufReader, BufRead};
use std::num::{NonZeroU32, NonZeroUsize}; use std::num::{NonZeroU32, NonZeroUsize};
use std::str; use std::str;
use std::sync::mpsc::sync_channel; use std::sync::mpsc::sync_channel;
@ -327,6 +327,16 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
R: io::Read, R: io::Read,
F: Fn(UpdateIndexingStep, u64) + Sync, F: Fn(UpdateIndexingStep, u64) + Sync,
{ {
let mut reader = BufReader::new(reader);
reader.fill_buf()?;
// Early return when there is no document to add
if reader.buffer().is_empty() {
return Ok(DocumentAdditionResult {
nb_documents: 0,
})
}
self.index.set_updated_at(self.wtxn, &Utc::now())?; self.index.set_updated_at(self.wtxn, &Utc::now())?;
let before_transform = Instant::now(); let before_transform = Instant::now();
let update_id = self.update_id; let update_id = self.update_id;

View File

@ -47,6 +47,10 @@ pub struct Transform<'t, 'i> {
pub autogenerate_docids: bool, pub autogenerate_docids: bool,
} }
fn is_primary_key(field: impl AsRef<str>) -> bool {
field.as_ref().to_lowercase().contains(DEFAULT_PRIMARY_KEY_NAME)
}
impl Transform<'_, '_> { impl Transform<'_, '_> {
pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput> pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
where where
@ -91,8 +95,12 @@ impl Transform<'_, '_> {
// We extract the primary key from the first document in // We extract the primary key from the first document in
// the batch if it hasn't already been defined in the index // the batch if it hasn't already been defined in the index
let first = documents.peek().and_then(|r| r.as_ref().ok()); let first = match documents.peek().map(Result::as_ref).transpose() {
let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned()); Ok(first) => first,
Err(_) => return Err(documents.next().unwrap().unwrap_err().into()),
};
let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
let (primary_key_id, primary_key) = compute_primary_key_pair( let (primary_key_id, primary_key) = compute_primary_key_pair(
self.index.primary_key(self.rtxn)?, self.index.primary_key(self.rtxn)?,
&mut fields_ids_map, &mut fields_ids_map,
@ -232,7 +240,7 @@ impl Transform<'_, '_> {
// The primary key is known so we must find the position in the CSV headers. // The primary key is known so we must find the position in the CSV headers.
headers.iter().position(|h| h == primary_key) headers.iter().position(|h| h == primary_key)
}, },
None => headers.iter().position(|h| h.contains("id")), None => headers.iter().position(is_primary_key),
}; };
// Returns the field id in the fields ids map, create an "id" field // Returns the field id in the fields ids map, create an "id" field