meilisearch/milli/src/update/index_documents/validate.rs

use std::io::{Read, Seek};
use std::iter;
use std::result::Result as StdResult;

use serde_json::Value;

use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader, EnrichedDocumentsBatchReader};
use crate::error::{GeoError, InternalError, UserError};
use crate::update::index_documents::{obkv_to_object, writer_into_reader};
use crate::{FieldId, Index, Object, Result};

/// The symbol used to define levels in a nested primary key.
const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';

/// This function validates and enrich the documents by checking that:
///  - we can infer a primary key,
///  - all the documents id exist and are extracted,
///  - the validity of them but also,
///  - the validity of the `_geo` field depending on the settings.
pub fn validate_and_enrich_documents_batch<R: Read + Seek>(
    rtxn: &heed::RoTxn,
    index: &Index,
    autogenerate_docids: bool,
    reader: DocumentsBatchReader<R>,
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
    let mut cursor = reader.into_cursor();
    let mut documents_batch_index = cursor.documents_batch_index().clone();
    let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;

    // The primary key *field id* that has already been set for this index or the one
    // we will guess by searching for the first key that contains "id" as a substring.
    let primary_key = match index.primary_key(rtxn)? {
        Some(primary_key) if primary_key.contains(PRIMARY_KEY_SPLIT_SYMBOL) => {
            PrimaryKey::nested(primary_key)
        }
        Some(primary_key) => match documents_batch_index.id(primary_key) {
            Some(id) => PrimaryKey::flat(primary_key, id),
            None if autogenerate_docids => {
                PrimaryKey::flat(primary_key, documents_batch_index.insert(primary_key))
            }
            None => {
                return match cursor.next_document()? {
                    Some(first_document) => Ok(Err(UserError::MissingDocumentId {
                        primary_key: primary_key.to_string(),
                        document: obkv_to_object(&first_document, &documents_batch_index)?,
                    })),
                    None => Ok(Err(UserError::MissingPrimaryKey)),
                };
            }
        },
        None => {
            let guessed = documents_batch_index
                .iter()
                .filter(|(_, name)| name.to_lowercase().contains("id"))
                .min_by_key(|(fid, _)| *fid);
            match guessed {
                Some((id, name)) => PrimaryKey::flat(name.as_str(), *id),
                None if autogenerate_docids => {
                    PrimaryKey::flat("id", documents_batch_index.insert("id"))
                }
                None => return Ok(Err(UserError::MissingPrimaryKey)),
            }
        }
    };

    // If the settings specifies that a _geo field must be used therefore we must check the
    // validity of it in all the documents of this batch and this is when we return `Some`.
    let geo_field_id = match documents_batch_index.id("_geo") {
        Some(geo_field_id) if index.sortable_fields(rtxn)?.contains("_geo") => Some(geo_field_id),
        _otherwise => None,
    };

    let mut count = 0;
    while let Some(document) = cursor.next_document()? {
        let document_id = match fetch_document_id(
            &document,
            &documents_batch_index,
            primary_key,
            autogenerate_docids,
            count,
        )? {
            Ok(document_id) => document_id,
            Err(user_error) => return Ok(Err(user_error)),
        };

        external_ids.insert(count.to_be_bytes(), &document_id)?;

        if let Some(geo_value) = geo_field_id.and_then(|fid| document.get(fid)) {
            if let Err(user_error) = validate_geo_from_json(Value::from(document_id), geo_value)? {
                return Ok(Err(UserError::from(user_error)));
            }
        }
        count += 1;
    }

    let external_ids = writer_into_reader(external_ids)?;
    let reader = EnrichedDocumentsBatchReader::new(cursor.into_reader(), external_ids)?;

    Ok(Ok(reader))
}

/// Retrieve the document id after validating it, returning a `UserError`
/// if the id is invalid or can't be guessed.
fn fetch_document_id(
    document: &obkv::KvReader<FieldId>,
    documents_batch_index: &DocumentsBatchIndex,
    primary_key: PrimaryKey,
    autogenerate_docids: bool,
    count: u32,
) -> Result<StdResult<String, UserError>> {
    match primary_key {
        PrimaryKey::Flat { name: primary_key, field_id: primary_key_id } => {
            match document.get(primary_key_id) {
                Some(document_id_bytes) => {
                    let document_id = serde_json::from_slice(document_id_bytes)
                        .map_err(InternalError::SerdeJson)?;
                    match validate_document_id_value(document_id)? {
                        Ok(document_id) => Ok(Ok(document_id)),
                        Err(user_error) => Ok(Err(user_error)),
                    }
                }
                None if autogenerate_docids => {
                    Ok(Ok(format!("{{auto-generated id of the {}nth document}}", count)))
                }
                None => Ok(Err(UserError::MissingDocumentId {
                    primary_key: primary_key.to_string(),
                    document: obkv_to_object(&document, &documents_batch_index)?,
                })),
            }
        }
        nested @ PrimaryKey::Nested { .. } => {
            let mut matching_documents_ids = Vec::new();
            for (first_level_name, right) in nested.possible_level_names() {
                if let Some(field_id) = documents_batch_index.id(first_level_name) {
                    if let Some(value_bytes) = document.get(field_id) {
                        let object = serde_json::from_slice(value_bytes)
                            .map_err(InternalError::SerdeJson)?;
                        fetch_matching_values(object, right, &mut matching_documents_ids);

                        if matching_documents_ids.len() >= 2 {
                            return Ok(Err(UserError::TooManyDocumentIds {
                                primary_key: nested.primary_key().to_string(),
                                document: obkv_to_object(&document, &documents_batch_index)?,
                            }));
                        }
                    }
                }
            }

            match matching_documents_ids.pop() {
                Some(document_id) => match validate_document_id_value(document_id)? {
                    Ok(document_id) => Ok(Ok(document_id)),
                    Err(user_error) => Ok(Err(user_error)),
                },
                None => Ok(Err(UserError::MissingDocumentId {
                    primary_key: nested.primary_key().to_string(),
                    document: obkv_to_object(&document, &documents_batch_index)?,
                })),
            }
        }
    }
}

/// A type that represent the type of primary key that has been set
/// for this index, a classic flat one or a nested one.
#[derive(Debug, Clone, Copy)]
enum PrimaryKey<'a> {
    Flat { name: &'a str, field_id: FieldId },
    Nested { name: &'a str },
}

impl PrimaryKey<'_> {
    fn flat(name: &str, field_id: FieldId) -> PrimaryKey {
        PrimaryKey::Flat { name, field_id }
    }

    fn nested(name: &str) -> PrimaryKey {
        PrimaryKey::Nested { name }
    }

    fn primary_key(&self) -> &str {
        match self {
            PrimaryKey::Flat { name, .. } => name,
            PrimaryKey::Nested { name } => name,
        }
    }

    /// Returns an `Iterator` that gives all the possible fields names the primary key
    /// can have depending of the first level name and deepnes of the objects.
    fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
        let name = self.primary_key();
        iter::successors(Some((name, "")), |(curr, _)| curr.rsplit_once(PRIMARY_KEY_SPLIT_SYMBOL))
    }
}

fn contained_in(selector: &str, key: &str) -> bool {
    selector.starts_with(key)
        && selector[key.len()..]
            .chars()
            .next()
            .map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL)
            .unwrap_or(true)
}

pub fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
    match value {
        Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
        otherwise => output.push(otherwise),
    }
}

pub fn fetch_matching_values_in_object(
    object: Object,
    selector: &str,
    base_key: &str,
    output: &mut Vec<Value>,
) {
    for (key, value) in object {
        let base_key = if base_key.is_empty() {
            key.to_string()
        } else {
            format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
        };

        // here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
        // so we check the contained_in on both side.
        let should_continue =
            contained_in(selector, &base_key) || contained_in(&base_key, selector);

        if should_continue {
            match value {
                Value::Object(object) => {
                    fetch_matching_values_in_object(object, selector, &base_key, output)
                }
                value => output.push(value),
            }
        }
    }
}

/// Returns a trimmed version of the document id or `None` if it is invalid.
pub fn validate_document_id(document_id: &str) -> Option<&str> {
    let document_id = document_id.trim();
    if !document_id.is_empty()
        && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
    {
        Some(document_id)
    } else {
        None
    }
}

/// Parses a Json encoded document id and validate it, returning a user error when it is one.
pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
    match document_id {
        Value::String(string) => match validate_document_id(&string) {
            Some(s) if s.len() == string.len() => Ok(Ok(string)),
            Some(s) => Ok(Ok(s.to_string())),
            None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
        },
        Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
        content => Ok(Err(UserError::InvalidDocumentId { document_id: content.clone() })),
    }
}

/// Try to extract an `f64` from a JSON `Value` and return the `Value`
/// in the `Err` variant if it failed.
pub fn extract_float_from_value(value: Value) -> StdResult<f64, Value> {
    match value {
        Value::Number(ref n) => n.as_f64().ok_or(value),
        Value::String(ref s) => s.parse::<f64>().map_err(|_| value),
        value => Err(value),
    }
}

pub fn validate_geo_from_json(document_id: Value, bytes: &[u8]) -> Result<StdResult<(), GeoError>> {
    match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? {
        Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) {
            (Some(lat), Some(lng)) => {
                match (extract_float_from_value(lat), extract_float_from_value(lng)) {
                    (Ok(_), Ok(_)) => Ok(Ok(())),
                    (Err(value), Ok(_)) => Ok(Err(GeoError::BadLatitude { document_id, value })),
                    (Ok(_), Err(value)) => Ok(Err(GeoError::BadLongitude { document_id, value })),
                    (Err(lat), Err(lng)) => {
                        Ok(Err(GeoError::BadLatitudeAndLongitude { document_id, lat, lng }))
                    }
                }
            }
            (None, Some(_)) => Ok(Err(GeoError::MissingLatitude { document_id })),
            (Some(_), None) => Ok(Err(GeoError::MissingLongitude { document_id })),
            (None, None) => Ok(Err(GeoError::MissingLatitudeAndLongitude { document_id })),
        },
        value => Ok(Err(GeoError::NotAnObject { document_id, value })),
    }
}
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`use std::io::{Read, Seek};`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`use std::iter;`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`use std::result::Result as StdResult;`

			`use serde_json::Value;`

Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader, EnrichedDocumentsBatchReader};`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`use crate::error::{GeoError, InternalError, UserError};`
Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`use crate::update::index_documents::{obkv_to_object, writer_into_reader};`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`use crate::{FieldId, Index, Object, Result};`

			`/// The symbol used to define levels in a nested primary key.`
			`const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00
Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`/// This function validates and enrich the documents by checking that:`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`/// - we can infer a primary key,`
Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`/// - all the documents id exist and are extracted,`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`/// - the validity of them but also,`
			/// - the validity of the `_geo` field depending on the settings.
Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`pub fn validate_and_enrich_documents_batch<R: Read + Seek>(`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`rtxn: &heed::RoTxn,`
			`index: &Index,`
Fix the indexation tests 2022-06-15 20:35:19 +08:00			`autogenerate_docids: bool,`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`reader: DocumentsBatchReader<R>,`
Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`let mut cursor = reader.into_cursor();`
Support the auto-generated ids when validating documents 2022-06-15 21:14:20 +08:00			`let mut documents_batch_index = cursor.documents_batch_index().clone();`
Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00
			`// The primary key field id that has already been set for this index or the one`
			`// we will guess by searching for the first key that contains "id" as a substring.`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`let primary_key = match index.primary_key(rtxn)? {`
			`Some(primary_key) if primary_key.contains(PRIMARY_KEY_SPLIT_SYMBOL) => {`
			`PrimaryKey::nested(primary_key)`
			`}`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`Some(primary_key) => match documents_batch_index.id(primary_key) {`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`Some(id) => PrimaryKey::flat(primary_key, id),`
			`None if autogenerate_docids => {`
			`PrimaryKey::flat(primary_key, documents_batch_index.insert(primary_key))`
			`}`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`None => {`
			`return match cursor.next_document()? {`
			`Some(first_document) => Ok(Err(UserError::MissingDocumentId {`
			`primary_key: primary_key.to_string(),`
			`document: obkv_to_object(&first_document, &documents_batch_index)?,`
			`})),`
			`None => Ok(Err(UserError::MissingPrimaryKey)),`
			`};`
			`}`
			`},`
			`None => {`
			`let guessed = documents_batch_index`
			`.iter()`
Support the auto-generated ids when validating documents 2022-06-15 21:14:20 +08:00			`.filter(\|(_, name)\| name.to_lowercase().contains("id"))`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`.min_by_key(\|(fid, _)\| *fid);`
			`match guessed {`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`Some((id, name)) => PrimaryKey::flat(name.as_str(), *id),`
			`None if autogenerate_docids => {`
			`PrimaryKey::flat("id", documents_batch_index.insert("id"))`
			`}`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`None => return Ok(Err(UserError::MissingPrimaryKey)),`
			`}`
			`}`
			`};`

			`// If the settings specifies that a _geo field must be used therefore we must check the`
			// validity of it in all the documents of this batch and this is when we return `Some`.
			`let geo_field_id = match documents_batch_index.id("_geo") {`
			`Some(geo_field_id) if index.sortable_fields(rtxn)?.contains("_geo") => Some(geo_field_id),`
			`_otherwise => None,`
			`};`

Support the auto-generated ids when validating documents 2022-06-15 21:14:20 +08:00			`let mut count = 0;`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`while let Some(document) = cursor.next_document()? {`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`let document_id = match fetch_document_id(`
			`&document,`
			`&documents_batch_index,`
			`primary_key,`
			`autogenerate_docids,`
			`count,`
			`)? {`
			`Ok(document_id) => document_id,`
			`Err(user_error) => return Ok(Err(user_error)),`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`};`

Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`external_ids.insert(count.to_be_bytes(), &document_id)?;`

Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`if let Some(geo_value) = geo_field_id.and_then(\|fid\| document.get(fid)) {`
			`if let Err(user_error) = validate_geo_from_json(Value::from(document_id), geo_value)? {`
			`return Ok(Err(UserError::from(user_error)));`
			`}`
			`}`
Support the auto-generated ids when validating documents 2022-06-15 21:14:20 +08:00			`count += 1;`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`}`

Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`let external_ids = writer_into_reader(external_ids)?;`
			`let reader = EnrichedDocumentsBatchReader::new(cursor.into_reader(), external_ids)?;`

			`Ok(Ok(reader))`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`}`

Make the nested primary key work 2022-06-15 23:58:52 +08:00			/// Retrieve the document id after validating it, returning a `UserError`
			`/// if the id is invalid or can't be guessed.`
			`fn fetch_document_id(`
			`document: &obkv::KvReader<FieldId>,`
			`documents_batch_index: &DocumentsBatchIndex,`
			`primary_key: PrimaryKey,`
			`autogenerate_docids: bool,`
Make the Transform read from an EnrichedDocumentsBatchReader 2022-06-20 19:48:02 +08:00			`count: u32,`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`) -> Result<StdResult<String, UserError>> {`
			`match primary_key {`
			`PrimaryKey::Flat { name: primary_key, field_id: primary_key_id } => {`
			`match document.get(primary_key_id) {`
			`Some(document_id_bytes) => {`
			`let document_id = serde_json::from_slice(document_id_bytes)`
			`.map_err(InternalError::SerdeJson)?;`
			`match validate_document_id_value(document_id)? {`
			`Ok(document_id) => Ok(Ok(document_id)),`
			`Err(user_error) => Ok(Err(user_error)),`
			`}`
			`}`
			`None if autogenerate_docids => {`
			`Ok(Ok(format!("{{auto-generated id of the {}nth document}}", count)))`
			`}`
			`None => Ok(Err(UserError::MissingDocumentId {`
			`primary_key: primary_key.to_string(),`
			`document: obkv_to_object(&document, &documents_batch_index)?,`
			`})),`
			`}`
			`}`
			`nested @ PrimaryKey::Nested { .. } => {`
			`let mut matching_documents_ids = Vec::new();`
			`for (first_level_name, right) in nested.possible_level_names() {`
			`if let Some(field_id) = documents_batch_index.id(first_level_name) {`
			`if let Some(value_bytes) = document.get(field_id) {`
			`let object = serde_json::from_slice(value_bytes)`
			`.map_err(InternalError::SerdeJson)?;`
			`fetch_matching_values(object, right, &mut matching_documents_ids);`

			`if matching_documents_ids.len() >= 2 {`
			`return Ok(Err(UserError::TooManyDocumentIds {`
			`primary_key: nested.primary_key().to_string(),`
			`document: obkv_to_object(&document, &documents_batch_index)?,`
			`}));`
			`}`
			`}`
			`}`
			`}`

			`match matching_documents_ids.pop() {`
			`Some(document_id) => match validate_document_id_value(document_id)? {`
			`Ok(document_id) => Ok(Ok(document_id)),`
			`Err(user_error) => Ok(Err(user_error)),`
			`},`
			`None => Ok(Err(UserError::MissingDocumentId {`
			`primary_key: nested.primary_key().to_string(),`
			`document: obkv_to_object(&document, &documents_batch_index)?,`
			`})),`
			`}`
			`}`
			`}`
			`}`

			`/// A type that represent the type of primary key that has been set`
			`/// for this index, a classic flat one or a nested one.`
			`#[derive(Debug, Clone, Copy)]`
			`enum PrimaryKey<'a> {`
			`Flat { name: &'a str, field_id: FieldId },`
			`Nested { name: &'a str },`
			`}`

			`impl PrimaryKey<'_> {`
			`fn flat(name: &str, field_id: FieldId) -> PrimaryKey {`
			`PrimaryKey::Flat { name, field_id }`
			`}`

			`fn nested(name: &str) -> PrimaryKey {`
			`PrimaryKey::Nested { name }`
			`}`

			`fn primary_key(&self) -> &str {`
			`match self {`
			`PrimaryKey::Flat { name, .. } => name,`
			`PrimaryKey::Nested { name } => name,`
			`}`
			`}`

			/// Returns an `Iterator` that gives all the possible fields names the primary key
			`/// can have depending of the first level name and deepnes of the objects.`
			`fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {`
			`let name = self.primary_key();`
			`iter::successors(Some((name, "")), \|(curr, _)\| curr.rsplit_once(PRIMARY_KEY_SPLIT_SYMBOL))`
			`}`
			`}`

			`fn contained_in(selector: &str, key: &str) -> bool {`
			`selector.starts_with(key)`
			`&& selector[key.len()..]`
			`.chars()`
			`.next()`
			`.map(\|c\| c == PRIMARY_KEY_SPLIT_SYMBOL)`
			`.unwrap_or(true)`
			`}`

			`pub fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {`
			`match value {`
			`Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),`
			`otherwise => output.push(otherwise),`
			`}`
			`}`

			`pub fn fetch_matching_values_in_object(`
			`object: Object,`
			`selector: &str,`
			`base_key: &str,`
			`output: &mut Vec<Value>,`
			`) {`
			`for (key, value) in object {`
			`let base_key = if base_key.is_empty() {`
			`key.to_string()`
			`} else {`
			`format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)`
			`};`

			// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
			`// so we check the contained_in on both side.`
			`let should_continue =`
			`contained_in(selector, &base_key) \|\| contained_in(&base_key, selector);`

			`if should_continue {`
			`match value {`
			`Value::Object(object) => {`
			`fetch_matching_values_in_object(object, selector, &base_key, output)`
			`}`
			`value => output.push(value),`
			`}`
			`}`
			`}`
			`}`

Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			/// Returns a trimmed version of the document id or `None` if it is invalid.
			`pub fn validate_document_id(document_id: &str) -> Option<&str> {`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`let document_id = document_id.trim();`
			`if !document_id.is_empty()`
			`&& document_id.chars().all(\|c\| matches!(c, 'a'..='z' \| 'A'..='Z' \| '0'..='9' \| '-' \| '_'))`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`{`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`Some(document_id)`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`} else {`
			`None`
			`}`
			`}`

			`/// Parses a Json encoded document id and validate it, returning a user error when it is one.`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {`
			`match document_id {`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`Value::String(string) => match validate_document_id(&string) {`
			`Some(s) if s.len() == string.len() => Ok(Ok(string)),`
			`Some(s) => Ok(Ok(s.to_string())),`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`},`
Make sur that we do not accept floats as documents ids 2022-06-15 22:06:52 +08:00			`Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`content => Ok(Err(UserError::InvalidDocumentId { document_id: content.clone() })),`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`}`
			`}`

			/// Try to extract an `f64` from a JSON `Value` and return the `Value`
			/// in the `Err` variant if it failed.
			`pub fn extract_float_from_value(value: Value) -> StdResult<f64, Value> {`
			`match value {`
			`Value::Number(ref n) => n.as_f64().ok_or(value),`
			`Value::String(ref s) => s.parse::<f64>().map_err(\|_\| value),`
			`value => Err(value),`
			`}`
			`}`

			`pub fn validate_geo_from_json(document_id: Value, bytes: &[u8]) -> Result<StdResult<(), GeoError>> {`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? {`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) {`
			`(Some(lat), Some(lng)) => {`
			`match (extract_float_from_value(lat), extract_float_from_value(lng)) {`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`(Ok(_), Ok(_)) => Ok(Ok(())),`
			`(Err(value), Ok(_)) => Ok(Err(GeoError::BadLatitude { document_id, value })),`
			`(Ok(_), Err(value)) => Ok(Err(GeoError::BadLongitude { document_id, value })),`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`(Err(lat), Err(lng)) => {`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`Ok(Err(GeoError::BadLatitudeAndLongitude { document_id, lat, lng }))`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`}`
			`}`
			`}`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`(None, Some(_)) => Ok(Err(GeoError::MissingLatitude { document_id })),`
			`(Some(_), None) => Ok(Err(GeoError::MissingLongitude { document_id })),`
			`(None, None) => Ok(Err(GeoError::MissingLatitudeAndLongitude { document_id })),`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`},`
Make the nested primary key work 2022-06-15 23:58:52 +08:00			`value => Ok(Err(GeoError::NotAnObject { document_id, value })),`
			`}`
Introduce the validate_documents_batch function 2022-06-15 00:12:15 +08:00			`}`