96: Check json payload on document addition r=curquiza a=MarinPostma

Check if the json payload in updates is valid. It uses a json validator to avoid allocation, and only serializes the json in case of error, to return a pretty message.

Co-authored-by: mpostma <postma.marin@protonmail.com>
This commit is contained in:
bors[bot] 2021-03-16 17:20:44 +00:00 committed by GitHub
commit ca3b343b1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 4 deletions

7
Cargo.lock generated
View File

@ -1823,6 +1823,7 @@ dependencies = [
"milli",
"mime",
"once_cell",
"oxidized-json-checker",
"parking_lot",
"rand 0.7.3",
"rayon",
@ -2122,6 +2123,12 @@ dependencies = [
"num-traits",
]
[[package]]
name = "oxidized-json-checker"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "938464aebf563f48ab86d1cfc0e2df952985c0b814d3108f41d1b85e7f5b0dac"
[[package]]
name = "page_size"
version = "0.4.2"

View File

@ -61,6 +61,7 @@ tempfile = "3.1.0"
thiserror = "1.0.24"
tokio = { version = "1", features = ["full"] }
uuid = "0.8.2"
oxidized-json-checker = "0.3.2"
[dependencies.sentry]
default-features = false

View File

@ -1,13 +1,15 @@
use std::collections::{hash_map::Entry, HashMap};
use std::io::SeekFrom;
use std::fs::{create_dir_all, remove_dir_all};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use super::index_actor::IndexActorHandle;
use log::info;
use oxidized_json_checker::JsonChecker;
use super::index_actor::IndexActorHandle;
use thiserror::Error;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tokio::fs::OpenOptions;
use tokio::io::{AsyncWriteExt, AsyncSeekExt};
use tokio::sync::{mpsc, oneshot, RwLock};
use uuid::Uuid;
@ -125,7 +127,11 @@ where
let update_store = self.store.get_or_create(uuid).await?;
let update_file_id = uuid::Uuid::new_v4();
let path = self.path.join(format!("update_{}", update_file_id));
let mut file = File::create(&path)
let mut file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
@ -146,7 +152,31 @@ where
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
file.seek(SeekFrom::Start(0))
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let mut file = file.into_std().await;
tokio::task::spawn_blocking(move || {
use std::io::{BufReader, sink, copy, Seek};
// If the payload is empty, ignore the check.
if file.metadata().map_err(|e| UpdateError::Error(Box::new(e)))?.len() > 0 {
// Check that the json payload is valid:
let reader = BufReader::new(&mut file);
let mut checker = JsonChecker::new(reader);
if copy(&mut checker, &mut sink()).is_err() || checker.finish().is_err() {
// The json file is invalid, we use Serde to get a nice error message:
file.seek(SeekFrom::Start(0))
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let _: serde_json::Value = serde_json::from_reader(file)
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
}
// The payload is valid, we can register it to the update store.
update_store
.register_update(meta, path, uuid)
.map(UpdateStatus::Pending)