From 10f09c531fada3d4a2734eb95f10e360c368eb92 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 5 Sep 2024 18:22:16 +0200 Subject: [PATCH] add some commented code to read from json with raw values --- .../update/new/indexer/document_operation.rs | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/milli/src/update/new/indexer/document_operation.rs b/milli/src/update/new/indexer/document_operation.rs index c54ffd140..29f36a82e 100644 --- a/milli/src/update/new/indexer/document_operation.rs +++ b/milli/src/update/new/indexer/document_operation.rs @@ -338,3 +338,122 @@ fn merge_document_for_updates( } } } + +/* + + +use std::{ + borrow::{Borrow, Cow}, + collections::BTreeMap, + ops::Deref, +}; + +use serde::Deserialize; +use serde_json::{value::RawValue, Value}; +/* +#[repr(transparent)] +pub struct my_str(str); + +impl ToOwned for my_str { + type Owned = Box; + + fn to_owned(&self) -> Self::Owned { + self.0.to_string().into_boxed_str() + } +} + +impl Borrow for Box { + fn borrow(&self) -> &my_str { + unsafe { std::mem::transmute(self.as_ref()) } + } +} +*/ + +#[derive(Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct CowKey<'doc>(#[serde(borrow)] Cow<'doc, str>); + +impl<'doc> Borrow for CowKey<'doc> { + fn borrow(&self) -> &str { + self.0.borrow() + } +} + +#[derive(Deserialize)] +pub struct TopLevelMap<'doc>(#[serde(borrow)] BTreeMap, &'doc RawValue>); + +#[derive(Deserialize)] +pub struct FlatDocs<'doc>(#[serde(borrow)] Vec<&'doc RawValue>); + +fn read_docs<'doc>( + ndjson: &'doc str, +) -> impl Iterator, serde_json::Error>> { + serde_json::Deserializer::from_str(ndjson).into_iter::() +} + +fn main() { + let ndjson_data = r#" + { + "id": { + "nested": "kefir" + }, + "name": "Alice", + "age": 30 + } + { + "id": { + "nested": "intel" + }, + "name\n": "Bob", + "age": 22 + } + "#; + + let primary_key: Vec<_> = "id.nested".split('.').collect(); // dynamic + + for doc in read_docs(ndjson_data) { + let doc = doc.unwrap(); + let docid = get_docid(&doc, &primary_key).unwrap().expect("missingno"); + println!("docid={docid}"); + } +} + +pub struct Document<'payload> { + fields: TopLevelMap<'payload>, + docid: String, +} + +/*impl<'payload> Document<'payload> { + pub fn get(name: &str) -> Option<&'payload RawValue> {} + + pub fn get_nested(name: &[&str]) {} +}*/ + +fn get_docid<'payload>( + map: &TopLevelMap<'payload>, + primary_key: &[&str], +) -> serde_json::Result>> { + match primary_key { + [] => unreachable!("arrrgh"), + [primary_key] => match map.0.get(*primary_key) { + Some(value) => { + let value = value.get(); + let value_number: Result = serde_json::from_str(value); + Ok(Some(match value_number { + Ok(value) => CowKey(Cow::Owned(value.to_string())), + Err(_) => serde_json::from_str(value)?, + })) + } + None => Ok(None), + }, + [head, tail @ ..] => match map.0.get(*head) { + Some(value) => { + let map = serde_json::from_str(value.get())?; + get_docid(&map, tail) + } + None => Ok(None), + }, + } +} + + +*/