From 772e0d32a286c2bb75c12e6bf892f578cd778c03 Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 09:02:09 +0000 Subject: [PATCH 1/8] init parse.rs with function sigs --- spansy/src/http/parse.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 spansy/src/http/parse.rs diff --git a/spansy/src/http/parse.rs b/spansy/src/http/parse.rs new file mode 100644 index 0000000..65f7eba --- /dev/null +++ b/spansy/src/http/parse.rs @@ -0,0 +1,10 @@ +// Imports + +// Parsing functions for Transfer-Encoding header types +fn parse_chunked_body() {} + +fn parse_gzip_body() {} + +fn parse_deflate_body() {} + +fn parse_idenity_body() {} \ No newline at end of file From ba1d44b8051b66a3d8e7d62cc28f474187c662cb Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 09:47:20 +0000 Subject: [PATCH 2/8] chunky body --- spansy/src/http/mod.rs | 1 + spansy/src/http/parse.rs | 47 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/spansy/src/http/mod.rs b/spansy/src/http/mod.rs index 9723712..c083434 100644 --- a/spansy/src/http/mod.rs +++ b/spansy/src/http/mod.rs @@ -2,6 +2,7 @@ mod span; mod types; +mod parse; use bytes::Bytes; diff --git a/spansy/src/http/parse.rs b/spansy/src/http/parse.rs index 65f7eba..7e3a3bc 100644 --- a/spansy/src/http/parse.rs +++ b/spansy/src/http/parse.rs @@ -1,7 +1,50 @@ -// Imports +use crate::ParseError; +use bytes::{Bytes, BytesMut}; // Parsing functions for Transfer-Encoding header types -fn parse_chunked_body() {} +// Parse Transfer-Encoding: chunked body +fn parse_chunked_body(src: &Bytes, offset: usize) -> Result<(Bytes, usize), ParseError> { + let mut body = BytesMut::new(); + let mut pos = offset; + + loop { + // Find the end of the chunk size line + let chunk_size_end = src[pos..] + .windows(2) + .position(|w| w == b"\r\n") + .ok_or_else(|| ParseError("Invalid chunked encoding: missing chunk size CRLF".to_string()))? + + pos; + + // Parse the chunk size + let chunk_size_str = std::str::from_utf8(&src[pos..chunk_size_end]) + .map_err(|_| ParseError("Invalid chunk size encoding".to_string()))?; + let chunk_size = usize::from_str_radix(chunk_size_str.trim(), 16) + .map_err(|_| ParseError("Invalid chunk size value".to_string()))?; + + // Move past the chunk size line + pos = chunk_size_end + 2; + + // If chunk size is zero, this is the last chunk + if chunk_size == 0 { + break; + } + + // Extract the chunk data + let chunk_data_end = pos + chunk_size; + if chunk_data_end > src.len() { + return Err(ParseError("Chunk data exceeds source length".to_string())); + } + body.extend_from_slice(&src[pos..chunk_data_end]); + + // Move past the chunk data and the trailing CRLF + pos = chunk_data_end + 2; + } + + // Move past the final CRLF after the last chunk + pos += 2; + + Ok((body.freeze(), pos)) +} fn parse_gzip_body() {} From b9fffbfd21e50e1e5bca9356a1e6062de83246b5 Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 09:55:00 +0000 Subject: [PATCH 3/8] gzip body --- spansy/Cargo.toml | 1 + spansy/src/http/parse.rs | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/spansy/Cargo.toml b/spansy/Cargo.toml index f774a09..732f6f9 100644 --- a/spansy/Cargo.toml +++ b/spansy/Cargo.toml @@ -20,3 +20,4 @@ thiserror.workspace = true httparse = "1.8" pest = { version = "2.7" } pest_derive = { version = "2.7" } +flate2 = "1.0.30" diff --git a/spansy/src/http/parse.rs b/spansy/src/http/parse.rs index 7e3a3bc..ad383fd 100644 --- a/spansy/src/http/parse.rs +++ b/spansy/src/http/parse.rs @@ -1,5 +1,7 @@ use crate::ParseError; use bytes::{Bytes, BytesMut}; +use flate2::read::{GzDecoder, DeflateDecoder}; +use std::io::Read; // Parsing functions for Transfer-Encoding header types // Parse Transfer-Encoding: chunked body @@ -46,7 +48,12 @@ fn parse_chunked_body(src: &Bytes, offset: usize) -> Result<(Bytes, usize), Pars Ok((body.freeze(), pos)) } -fn parse_gzip_body() {} +fn parse_gzip_body(src: &Bytes) -> Result { + let mut decoder = GzDecoder::new(&src[..]); + let mut decompressed = Vec::new(); + decoder.read_to_end(&mut decompressed).map_err(|e| ParseError(format!("Failed to decompress gzip body: {}", e)))?; + Ok(Bytes::from(decompressed)) +} fn parse_deflate_body() {} From 8b3756319af4d29f66b358a3fdf08850347897a7 Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 12:42:03 +0000 Subject: [PATCH 4/8] deflate body --- spansy/src/http/parse.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/spansy/src/http/parse.rs b/spansy/src/http/parse.rs index ad383fd..5ea0bd0 100644 --- a/spansy/src/http/parse.rs +++ b/spansy/src/http/parse.rs @@ -55,6 +55,11 @@ fn parse_gzip_body(src: &Bytes) -> Result { Ok(Bytes::from(decompressed)) } -fn parse_deflate_body() {} +fn parse_deflate_body(src: &Bytes) -> Result { + let mut decoder = DeflateDecoder::new(&src[..]); + let mut decompressed = Vec::new(); + decoder.read_to_end(&mut decompressed).map_err(|e| ParseError(format!("Failed to decompress deflate body: {}", e)))?; + Ok(Bytes::from(decompressed)) +} fn parse_idenity_body() {} \ No newline at end of file From cafcfaab5a450e5245c152aeff4094c4782a8cd4 Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 12:42:29 +0000 Subject: [PATCH 5/8] identity body --- spansy/src/http/parse.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spansy/src/http/parse.rs b/spansy/src/http/parse.rs index 5ea0bd0..7e0da9f 100644 --- a/spansy/src/http/parse.rs +++ b/spansy/src/http/parse.rs @@ -62,4 +62,6 @@ fn parse_deflate_body(src: &Bytes) -> Result { Ok(Bytes::from(decompressed)) } -fn parse_idenity_body() {} \ No newline at end of file +fn parse_identity_body(src: &Bytes) -> Result { + Ok(src.clone()) +} \ No newline at end of file From 213c9b109d50ebd5c425e57b24a0b24449f9fac3 Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 12:48:18 +0000 Subject: [PATCH 6/8] exporting functions to span --- spansy/src/http/mod.rs | 1 + spansy/src/http/parse.rs | 13 ++++++++----- spansy/src/http/span.rs | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/spansy/src/http/mod.rs b/spansy/src/http/mod.rs index c083434..a8ff63b 100644 --- a/spansy/src/http/mod.rs +++ b/spansy/src/http/mod.rs @@ -11,6 +11,7 @@ pub use types::{ Body, BodyContent, Code, Header, HeaderName, HeaderValue, Method, Reason, Request, RequestLine, Response, Status, Target, }; +pub use parse::{parse_chunked_body, parse_deflate_body, parse_gzip_body, parse_identity_body}; use crate::ParseError; diff --git a/spansy/src/http/parse.rs b/spansy/src/http/parse.rs index 7e0da9f..b820cbe 100644 --- a/spansy/src/http/parse.rs +++ b/spansy/src/http/parse.rs @@ -4,8 +4,8 @@ use flate2::read::{GzDecoder, DeflateDecoder}; use std::io::Read; // Parsing functions for Transfer-Encoding header types -// Parse Transfer-Encoding: chunked body -fn parse_chunked_body(src: &Bytes, offset: usize) -> Result<(Bytes, usize), ParseError> { +/// Parse Transfer-Encoding: chunked body +pub fn parse_chunked_body(src: &Bytes, offset: usize) -> Result<(Bytes, usize), ParseError> { let mut body = BytesMut::new(); let mut pos = offset; @@ -48,20 +48,23 @@ fn parse_chunked_body(src: &Bytes, offset: usize) -> Result<(Bytes, usize), Pars Ok((body.freeze(), pos)) } -fn parse_gzip_body(src: &Bytes) -> Result { +/// Parse Transfer-Encoding: gzip body +pub fn parse_gzip_body(src: &Bytes) -> Result { let mut decoder = GzDecoder::new(&src[..]); let mut decompressed = Vec::new(); decoder.read_to_end(&mut decompressed).map_err(|e| ParseError(format!("Failed to decompress gzip body: {}", e)))?; Ok(Bytes::from(decompressed)) } -fn parse_deflate_body(src: &Bytes) -> Result { +/// Parse Transfer-Encoding: deflate body +pub fn parse_deflate_body(src: &Bytes) -> Result { let mut decoder = DeflateDecoder::new(&src[..]); let mut decompressed = Vec::new(); decoder.read_to_end(&mut decompressed).map_err(|e| ParseError(format!("Failed to decompress deflate body: {}", e)))?; Ok(Bytes::from(decompressed)) } -fn parse_identity_body(src: &Bytes) -> Result { +/// Parse Transfer-Encoding: identity body +pub fn parse_identity_body(src: &Bytes) -> Result { Ok(src.clone()) } \ No newline at end of file diff --git a/spansy/src/http/span.rs b/spansy/src/http/span.rs index a93b40e..80d57fb 100644 --- a/spansy/src/http/span.rs +++ b/spansy/src/http/span.rs @@ -6,7 +6,7 @@ use crate::{ helpers::get_span_range, http::{ Body, BodyContent, Code, Header, HeaderName, HeaderValue, Method, Reason, Request, - RequestLine, Response, Status, Target, + RequestLine, Response, Status, Target, parse_chunked_body, parse_deflate_body, parse_gzip_body, parse_identity_body }, json, ParseError, Span, }; From c57955d9012481cd2907e4b6259c43283f0ad72c Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 13:00:06 +0000 Subject: [PATCH 7/8] support chunked, gzip, deflate, identity in response_body_len --- spansy/src/http/span.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/spansy/src/http/span.rs b/spansy/src/http/span.rs index 80d57fb..efa5ba1 100644 --- a/spansy/src/http/span.rs +++ b/spansy/src/http/span.rs @@ -256,14 +256,12 @@ fn response_body_len(response: &Response) -> Result { _ => {} } - if response - .headers_with_name("Transfer-Encoding") - .next() - .is_some() - { - Err(ParseError( - "Transfer-Encoding not supported yet".to_string(), - )) + if let Some(transfer_encoding) = response.headers_with_name("Transfer-Encoding").next() { + match transfer_encoding.value.as_bytes() { + b"chunked" => Ok(usize::MAX), + b"gzip" | b"deflate" | b"identity" => Ok(usize::MAX), + _ => Err(ParseError("Unsupported Transfer-Encoding".to_string())), + } } else if let Some(h) = response.headers_with_name("Content-Length").next() { // If a valid Content-Length header field is present without Transfer-Encoding, its decimal value // defines the expected message body length in octets. From e1b6ecabda9bff2ba0c89f4dcf74335311cdd5bd Mon Sep 17 00:00:00 2001 From: lancenonce Date: Tue, 2 Jul 2024 13:33:16 +0000 Subject: [PATCH 8/8] new parse_response_from_bytes; typing fixxes --- spansy/src/http/parse.rs | 7 ------- spansy/src/http/span.rs | 18 +++++++++++++++++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/spansy/src/http/parse.rs b/spansy/src/http/parse.rs index b820cbe..a7a7b67 100644 --- a/spansy/src/http/parse.rs +++ b/spansy/src/http/parse.rs @@ -10,39 +10,32 @@ pub fn parse_chunked_body(src: &Bytes, offset: usize) -> Result<(Bytes, usize), let mut pos = offset; loop { - // Find the end of the chunk size line let chunk_size_end = src[pos..] .windows(2) .position(|w| w == b"\r\n") .ok_or_else(|| ParseError("Invalid chunked encoding: missing chunk size CRLF".to_string()))? + pos; - // Parse the chunk size let chunk_size_str = std::str::from_utf8(&src[pos..chunk_size_end]) .map_err(|_| ParseError("Invalid chunk size encoding".to_string()))?; let chunk_size = usize::from_str_radix(chunk_size_str.trim(), 16) .map_err(|_| ParseError("Invalid chunk size value".to_string()))?; - // Move past the chunk size line pos = chunk_size_end + 2; - // If chunk size is zero, this is the last chunk if chunk_size == 0 { break; } - // Extract the chunk data let chunk_data_end = pos + chunk_size; if chunk_data_end > src.len() { return Err(ParseError("Chunk data exceeds source length".to_string())); } body.extend_from_slice(&src[pos..chunk_data_end]); - // Move past the chunk data and the trailing CRLF pos = chunk_data_end + 2; } - // Move past the final CRLF after the last chunk pos += 2; Ok((body.freeze(), pos)) diff --git a/spansy/src/http/span.rs b/spansy/src/http/span.rs index efa5ba1..d4c825e 100644 --- a/spansy/src/http/span.rs +++ b/spansy/src/http/span.rs @@ -168,7 +168,23 @@ pub(crate) fn parse_response_from_bytes( let body_len = response_body_len(&response)?; - if body_len > 0 { + if body_len == usize::MAX { + // Handle different transfer encodings + let transfer_encoding = response.headers_with_name("Transfer-Encoding").next().unwrap().value.as_bytes(); + let (body_bytes, end_pos) = match transfer_encoding { + b"chunked" => parse_chunked_body(src, head_end)?, + b"gzip" => (parse_gzip_body(&src.slice(head_end..))?, src.len()), + b"deflate" => (parse_deflate_body(&src.slice(head_end..))?, src.len()), + b"identity" => (parse_identity_body(&src.slice(head_end..))?, src.len()), + _ => return Err(ParseError("Unsupported Transfer-Encoding".to_string())), + }; + let body_span = Span::new_bytes(body_bytes.clone(), 0..body_bytes.len()); + response.body = Some(Body { + span: Span::new_bytes(src.clone(), head_end..end_pos), + content: BodyContent::Unknown(body_span), + }); + response.span = Span::new_bytes(src.clone(), offset..end_pos); + } else if body_len > 0 { let range = head_end..head_end + body_len; if range.end > src.len() {