From f172d2850929fb2dc3432cb43fa3983dd340d3a1 Mon Sep 17 00:00:00 2001 From: Ben Bangert Date: Thu, 29 Nov 2018 12:23:55 -0800 Subject: [PATCH] feat: fix extractors, middleware for e2e tests Extractors now use proper limits from the state object. BSO handling during deserialization/validation matches Python more closely. Closes #97 --- src/db/mysql/batch.rs | 1 + src/db/mysql/models.rs | 2 +- src/db/mysql/test.rs | 2 + src/db/params.rs | 2 + src/error.rs | 47 +++++++++- src/web/error.rs | 6 ++ src/web/extractors.rs | 200 +++++++++++++++++++++++++++++------------ src/web/handlers.rs | 1 + src/web/middleware.rs | 3 +- 9 files changed, 201 insertions(+), 63 deletions(-) diff --git a/src/db/mysql/batch.rs b/src/db/mysql/batch.rs index d3537b04..574e51cb 100644 --- a/src/db/mysql/batch.rs +++ b/src/db/mysql/batch.rs @@ -94,6 +94,7 @@ pub fn commit(db: &MysqlDb, params: params::CommitBatch) -> Result Result<()> { postbso("b1", Some("payload 1"), Some(1000000000), None), postbso("b2", Some("payload 2"), Some(100), None), ], + failed: Default::default(), })?; assert!(result.success.contains(&"b0".to_owned())); @@ -748,6 +749,7 @@ fn post_bsos() -> Result<()> { postbso("b0", Some("updated 0"), Some(11), Some(100000)), postbso("b2", Some("updated 2"), Some(22), Some(10000)), ], + failed: Default::default(), })?; assert_eq!(result2.success.len(), 2); diff --git a/src/db/params.rs b/src/db/params.rs index debf135a..f9ee39a9 100644 --- a/src/db/params.rs +++ b/src/db/params.rs @@ -1,6 +1,7 @@ //! Parameter types for database methods. #![allow(proc_macro_derive_resolution_fallback)] +use std::collections::HashMap; use web::extractors::{BatchBsoBody, BsoQueryParams, HawkIdentifier}; @@ -65,6 +66,7 @@ collection_data! { }, PostBsos { bsos: Vec, + failed: HashMap, }, CreateBatch { diff --git a/src/error.rs b/src/error.rs index 5b11f2a3..7b25e7e3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -7,7 +7,26 @@ use failure::{Backtrace, Context, Fail}; use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer}; use db::error::{DbError, DbErrorKind}; -use web::error::{HawkError, ValidationError}; +use web::error::{HawkError, ValidationError, ValidationErrorKind}; +use web::extractors::RequestErrorLocation; + +/// Legacy Sync 1.1 error codes, which Sync 1.5 also returns by replacing the descriptive JSON +/// information and replacing it with one of these error codes. +#[derive(Serialize)] +enum WeaveError { + /// Unknown error + UnknownError = 0, + /// Illegal method/protocol + IllegalMethod = 1, + /// Json parse failure + MalformedJson = 6, + /// Invalid Weave Basic Object + InvalidWbo = 8, + /// User over quota + OverQuota = 14, + /// Size limit exceeded + SizeLimitExceeded = 17, +} /// Common `Result` type. pub type ApiResult = Result; @@ -75,6 +94,24 @@ impl ApiError { } false } + + fn weave_error_code(&self) -> WeaveError { + match self.kind() { + ApiErrorKind::Validation(ver) => match ver.kind() { + ValidationErrorKind::FromDetails(ref _description, ref location, name) => { + let name = name.clone().unwrap_or("".to_owned()); + if *location == RequestErrorLocation::Body + && ["bso", "bsos"].contains(&name.as_str()) + { + return WeaveError::InvalidWbo; + } + WeaveError::UnknownError + } + _ => WeaveError::UnknownError, + }, + _ => WeaveError::UnknownError, + } + } } impl From for HttpResponse { @@ -98,10 +135,16 @@ impl From> for ApiError { impl ResponseError for ApiError { fn error_response(&self) -> HttpResponse { + // To return a descriptive error response, this would work. We do not + // unfortunately do that so that we can retain Sync 1.1 backwards compatibility + // as the Python one does. + // HttpResponse::build(self.status).json(self) + // + // So instead we translate our error to a backwards compatible one HttpResponse::build(self.status) .if_true(self.is_conflict(), |resp| { resp.header("Retry-After", RETRY_AFTER.to_string()); - }).json(self) + }).json(self.weave_error_code() as i32) } } diff --git a/src/web/error.rs b/src/web/error.rs index e6f9dd67..8637aacf 100644 --- a/src/web/error.rs +++ b/src/web/error.rs @@ -71,6 +71,12 @@ pub struct ValidationError { pub status: StatusCode, } +impl ValidationError { + pub fn kind(&self) -> &ValidationErrorKind { + self.inner.get_context() + } +} + /// Causes of extractor errors. #[derive(Debug, Fail)] pub enum ValidationErrorKind { diff --git a/src/web/extractors.rs b/src/web/extractors.rs index 04cb922b..dfec9f82 100644 --- a/src/web/extractors.rs +++ b/src/web/extractors.rs @@ -2,6 +2,7 @@ //! //! Handles ensuring the header's, body, and query parameters are correct, extraction to //! relevant types, and failing correctly with the appropriate errors if issues arise. +use std::collections::HashMap; use std::str::FromStr; use actix_web::http::header::{HeaderValue, CONTENT_TYPE}; @@ -13,7 +14,7 @@ use actix_web::{ use futures::{future, Future}; use regex::Regex; use serde::de::{Deserialize, Deserializer, Error as SerdeError}; -use serde_json; +use serde_json::Value; use validator::{Validate, ValidationError}; use db::{util::SyncTimestamp, Db, Sorting}; @@ -24,15 +25,10 @@ use web::{auth::HawkPayload, error::ValidationErrorKind}; const BATCH_MAX_IDS: usize = 100; // BSO const restrictions -const BSO_MAX_PAYLOAD_SIZE: usize = 2 * 1024 * 1024; const BSO_MAX_TTL: u32 = 31536000; const BSO_MAX_SORTINDEX_VALUE: i32 = 999999999; const BSO_MIN_SORTINDEX_VALUE: i32 = -999999999; -// TODO: These should come from config using actix with_config -const BATCH_MAX_RECORDS: usize = 100; -const BATCH_MAX_BYTES: usize = BSO_MAX_PAYLOAD_SIZE * 150; - lazy_static! { static ref KNOWN_BAD_PAYLOAD_REGEX: Regex = Regex::new(r#"IV":\s*"AAAAAAAAAAAAAAAAAAAAAA=="#).unwrap(); @@ -46,18 +42,41 @@ pub struct UidParam { uid: u64, } -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Deserialize, Serialize, Validate)] pub struct BatchBsoBody { + #[validate(custom = "validate_body_bso_id")] pub id: String, + #[validate(custom = "validate_body_bso_sortindex")] pub sortindex: Option, pub payload: Option, + #[validate(custom = "validate_body_bso_ttl")] pub ttl: Option, } +impl BatchBsoBody { + /// Function to convert valid raw JSON BSO body to a BatchBsoBody + fn from_raw_bso(val: &Value) -> Result { + let map = val.as_object().ok_or("invalid json")?; + // Verify all the keys are valid + let valid_keys = ["id", "sortindex", "payload", "ttl"]; + for key_name in map.keys() { + if !valid_keys.contains(&key_name.as_str()) { + return Err(format!("unknown field {}", key_name)); + } + } + serde_json::from_value(val.clone()) + .map_err(|_| "invalid json".to_string()) + .and_then(|v: BatchBsoBody| match v.validate() { + Ok(()) => Ok(v), + Err(e) => Err(format!("invalid bso: {}", e)), + }) + } +} + #[derive(Default, Deserialize)] pub struct BsoBodies { pub valid: Vec, - pub invalid: Vec, + pub invalid: HashMap, } impl FromRequest for BsoBodies { @@ -92,9 +111,12 @@ impl FromRequest for BsoBodies { } } + // Get the max request size we'll parse + let max_request_bytes = req.state().limits.max_request_bytes; + // Load the entire request into a String let mut config = PayloadConfig::default(); - config.limit(BATCH_MAX_BYTES); + config.limit(max_request_bytes as usize); let fut = if let Ok(result) = ::from_request(req, &config) { result } else { @@ -120,27 +142,33 @@ impl FromRequest for BsoBodies { // content_type header let newlines: bool = content_type == b"application/newlines"; + // Grab the max sizes + let max_payload_size = req.state().limits.max_record_payload_bytes as usize; + let max_post_bytes = req.state().limits.max_post_bytes as usize; + let fut = fut.and_then(move |body| { - // Parse out the body per the content type - let bsos: Vec = if newlines { + // Get all the raw JSON values + let bsos: Vec = if newlines { let mut bsos = Vec::new(); for item in body.lines() { // Skip any blanks if item == "" { continue; } - if let Ok(item) = serde_json::from_str(&item) { - bsos.push(item); + // Check that its a valid JSON map like we expect + if let Ok(raw_json) = serde_json::from_str::(&item) { + bsos.push(raw_json); } else { - // Per Python version, all BSO's must parse or we error out + // Per Python version, BSO's must json deserialize return future::err(make_error()); } } bsos } else { - if let Ok(bsos) = serde_json::from_str(&body) { - bsos + if let Ok(json_vals) = serde_json::from_str::>(&body) { + json_vals } else { + // Per Python version, BSO's must json deserialize return future::err(make_error()); } }; @@ -148,27 +176,61 @@ impl FromRequest for BsoBodies { // Validate all the BSO's, move invalid to our other list. Assume they'll all make // it with our pre-allocation let mut valid: Vec = Vec::with_capacity(bsos.len()); - let mut invalid: Vec = Vec::new(); - for mut bso in bsos { - if bso.validate().is_ok() { - // They're only valid if they include an id. - // XXX: Verify the id is optional, this may be a legacy thing - match bso { - BsoBody { - id: Some(id), - sortindex, - payload, - ttl, - } => valid.push(BatchBsoBody { - id, - sortindex, - payload, - ttl, - }), - bso => invalid.push(bso), + + // Invalid BSO's are any BSO that can deserialize despite how wrong the contents are + // per the way the Python version works. + let mut invalid: HashMap = HashMap::new(); + + // Keep track of our total payload size + let mut total_payload_size = 0; + + // Temporarily track the bso id's for dupe detection + let mut bso_ids: Vec = Vec::with_capacity(bsos.len()); + + for bso in bsos { + // Error out if its not a JSON mapping type + if !bso.is_object() { + return future::err(make_error()); + } + // Save all id's we get, check for missing id, or duplicate. + let bso_id = if let Some(id) = bso.get("id").and_then(|s| s.as_str()) { + let id = id.to_string(); + if bso_ids.contains(&id) { + return future::err( + ValidationErrorKind::FromDetails( + "Input BSO has duplicate ID".to_owned(), + RequestErrorLocation::Body, + Some("bsos".to_owned()), + ).into(), + ); + } else { + bso_ids.push(id.clone()); + id } } else { - invalid.push(bso); + return future::err( + ValidationErrorKind::FromDetails( + "Input BSO has no ID".to_owned(), + RequestErrorLocation::Body, + Some("bsos".to_owned()), + ).into(), + ); + }; + match BatchBsoBody::from_raw_bso(&bso) { + Ok(b) => { + // Is this record too large? Deny if it is. + let payload_size = b.payload.as_ref().map(|v| v.len()).unwrap_or_default(); + total_payload_size += payload_size; + if payload_size <= max_payload_size && total_payload_size <= max_post_bytes + { + valid.push(b); + } else { + invalid.insert(b.id, "retry bytes".to_string()); + } + } + Err(e) => { + invalid.insert(bso_id, e); + } } } future::ok(BsoBodies { valid, invalid }) @@ -184,7 +246,6 @@ pub struct BsoBody { pub id: Option, #[validate(custom = "validate_body_bso_sortindex")] pub sortindex: Option, - #[validate(custom = "validate_body_bso_payload")] pub payload: Option, #[validate(custom = "validate_body_bso_ttl")] pub ttl: Option, @@ -211,7 +272,10 @@ impl FromRequest for BsoBody { } } let mut config = JsonConfig::default(); - config.limit(BSO_MAX_PAYLOAD_SIZE); + let max_request_size = req.state().limits.max_request_bytes as usize; + config.limit(max_request_size); + + let max_payload_size = req.state().limits.max_record_payload_bytes as usize; let fut = >::from_request(req, &config) .map_err(|e| { let err: ApiError = ValidationErrorKind::FromDetails( @@ -220,7 +284,16 @@ impl FromRequest for BsoBody { Some("bso".to_owned()), ).into(); err.into() - }).and_then(|bso: Json| { + }).and_then(move |bso: Json| { + // Check the max payload size manually with our desired limit + if bso.payload.as_ref().map(|s| s.len()).unwrap_or_default() > max_payload_size { + let err: ApiError = ValidationErrorKind::FromDetails( + "payload too large".to_owned(), + RequestErrorLocation::Body, + Some("bso".to_owned()), + ).into(); + return future::err(err.into()); + } if let Err(e) = bso.validate() { let err: ApiError = ValidationErrorKind::FromValidationErrors(e, RequestErrorLocation::Body) @@ -369,13 +442,14 @@ impl FromRequest for CollectionPostRequest { /// - If the collection is 'crypto', known bad payloads are checked for /// - Any valid BSO's beyond `BATCH_MAX_RECORDS` are moved to invalid fn from_request(req: &HttpRequest, _: &Self::Config) -> Self::Result { + let max_post_records = req.state().limits.max_post_records as i64; let fut = <( HawkIdentifier, Box, CollectionParam, BsoQueryParams, BsoBodies, - )>::extract(req).and_then(|(user_id, db, collection, query, mut bsos)| { + )>::extract(req).and_then(move |(user_id, db, collection, query, mut bsos)| { let collection = collection.collection.clone(); if collection == "crypto" { // Verify the client didn't mess up the crypto if we have a payload @@ -395,16 +469,11 @@ impl FromRequest for CollectionPostRequest { } // Trim the excess BSO's to be under the batch size - let overage: i64 = (bsos.valid.len() as i64) - (BATCH_MAX_RECORDS as i64); + let overage: i64 = (bsos.valid.len() as i64) - max_post_records; if overage > 0 { for _ in 1..=overage { if let Some(last) = bsos.valid.pop() { - bsos.invalid.push(BsoBody { - id: Some(last.id), - sortindex: last.sortindex, - payload: last.payload, - ttl: last.ttl, - }); + bsos.invalid.insert(last.id, "retry bso".to_string()); } } } @@ -763,14 +832,6 @@ fn validate_body_bso_sortindex(sort: i32) -> Result<(), ValidationError> { } } -/// Verifies the BSO payload size is valid -fn validate_body_bso_payload(payload: &str) -> Result<(), ValidationError> { - if payload.len() > BSO_MAX_PAYLOAD_SIZE { - return Err(request_error("invalid size", RequestErrorLocation::Body)); - } - Ok(()) -} - /// Verifies the BSO id string is valid fn validate_body_bso_id(id: &String) -> Result<(), ValidationError> { if !VALID_ID_REGEX.is_match(id) { @@ -933,7 +994,9 @@ mod tests { let response: HttpResponse = result.err().unwrap().into(); assert_eq!(response.status(), 400); let body = extract_body_as_str(&response); + assert_eq!(body, "0"); + /* New tests for when we can use descriptive errors let err: serde_json::Value = serde_json::from_str(&body).unwrap(); assert_eq!(err["status"], 400); assert_eq!(err["reason"], "Bad Request"); @@ -945,6 +1008,7 @@ mod tests { }; assert_eq!(sort_error["location"], "querystring"); + */ } #[test] @@ -1012,7 +1076,9 @@ mod tests { let response: HttpResponse = result.err().unwrap().into(); assert_eq!(response.status(), 400); let body = extract_body_as_str(&response); + assert_eq!(body, "0"); + /* New tests for when we can use descriptive errors let err: serde_json::Value = serde_json::from_str(&body).unwrap(); assert_eq!(err["status"], 400); @@ -1020,6 +1086,7 @@ mod tests { assert_eq!(err["errors"][0]["location"], "path"); assert_eq!(err["errors"][0]["name"], "bso"); assert_eq!(err["errors"][0]["value"], INVALID_BSO_NAME); + */ } #[test] @@ -1085,12 +1152,15 @@ mod tests { let response: HttpResponse = result.err().unwrap().into(); assert_eq!(response.status(), 400); let body = extract_body_as_str(&response); + assert_eq!(body, "8") + /* New tests for when we can use descriptive errors let err: serde_json::Value = serde_json::from_str(&body).unwrap(); assert_eq!(err["status"], 400); assert_eq!(err["errors"][0]["location"], "body"); assert_eq!(&err["errors"][0]["name"], "bso"); + */ } #[test] @@ -1143,6 +1213,9 @@ mod tests { let response: HttpResponse = result.err().unwrap().into(); assert_eq!(response.status(), 400); let body = extract_body_as_str(&response); + assert_eq!(body, "0"); + + /* New tests for when we can use descriptive errors let err: serde_json::Value = serde_json::from_str(&body).unwrap(); assert_eq!(err["status"], 400); @@ -1151,6 +1224,7 @@ mod tests { assert_eq!(err["errors"][0]["location"], "path"); assert_eq!(err["errors"][0]["name"], "collection"); assert_eq!(err["errors"][0]["value"], INVALID_COLLECTION_NAME); + */ } #[test] @@ -1198,10 +1272,10 @@ mod tests { "localhost", 5000, ); - // Leave off id's, these will be invalid + // Add extra fields, these will be invalid let bso_body = json!([ - {"payload": "xxx", "sortindex": 23}, - {"payload": "xxx", "sortindex": -99} + {"id": "1", "sortindex": 23, "jump": 1}, + {"id": "2", "sortindex": -99, "hop": "low"} ]); let req = TestRequest::with_state(state) .header("authorization", header) @@ -1223,8 +1297,8 @@ mod tests { fn test_invalid_precondition_headers() { fn assert_invalid_header( req: &HttpRequest, - error_header: &str, - error_message: &str, + _error_header: &str, + _error_message: &str, ) { let result = as FromRequest>::extract(&req); assert!(result.is_err()); @@ -1232,12 +1306,16 @@ mod tests { assert_eq!(response.status(), 400); let body = extract_body_as_str(&response); + assert_eq!(body, "0"); + + /* New tests for when we can use descriptive errors let err: serde_json::Value = serde_json::from_str(&body).unwrap(); assert_eq!(err["status"], 400); assert_eq!(err["errors"][0]["description"], error_message); assert_eq!(err["errors"][0]["location"], "header"); assert_eq!(err["errors"][0]["name"], error_header); + */ } let req = TestRequest::with_state(make_state()) .header("X-If-Modified-Since", "32124.32") @@ -1327,6 +1405,9 @@ mod tests { let response: HttpResponse = result.err().unwrap().into(); assert_eq!(response.status(), 400); let body = extract_body_as_str(&response); + assert_eq!(body, "0"); + + /* New tests for when we can use descriptive errors let err: serde_json::Value = serde_json::from_str(&body).unwrap(); assert_eq!(err["status"], 400); @@ -1334,5 +1415,6 @@ mod tests { assert_eq!(err["errors"][0]["description"], "conflicts with payload"); assert_eq!(err["errors"][0]["location"], "path"); assert_eq!(err["errors"][0]["name"], "uid"); + */ } } diff --git a/src/web/handlers.rs b/src/web/handlers.rs index 37cb12f0..4e20be0e 100644 --- a/src/web/handlers.rs +++ b/src/web/handlers.rs @@ -161,6 +161,7 @@ pub fn post_collection(coll: CollectionPostRequest) -> FutureResponse for PreConditionCheck { match as FromRequest>::from_request(&req, &()) { Ok(Some(precondition)) => precondition, - Ok(None) | Err(_) => return Ok(Started::Done), + Ok(None) => return Ok(Started::Done), + Err(e) => return Ok(Started::Response(e.into())), }; let user_id = HawkIdentifier::from_request(&req, &())?; let db = >::from_request(&req, &())?;