Skip to content

Commit

Permalink
allow multipart body without preceding CRLF on first boundary
Browse files Browse the repository at this point in the history
RFC 2046[1] is somewhat ambiguous on whether or not it's valid to omit the
preceding CRLF for the first boundary. The prose on page 19 suggests
that it is not:

> The boundary delimiter MUST occur at the beginning of a line, i.e.,
> following a CRLF, and the initial CRLF is considered to be attached
> to the boundary delimiter line rather than part of the preceding
> part. The boundary may be followed by zero or more characters of
> linear whitespace. It is then terminated by either another CRLF and
> the header fields for the next part, or by two CRLFs, in which case
> there are no header fields for the next part. If no Content-Type
> field is present it is assumed to be "message/rfc822" in a
> "multipart/digest" and "text/plain" otherwise.
>
> NOTE: The CRLF preceding the boundary delimiter line is conceptually
> attached to the boundary so that it is possible to have a part that
> does not end with a CRLF (line break). Body parts that must be
> considered to end with line breaks, therefore, must have two CRLFs
> preceding the boundary delimiter line, the first of which is part of
> the preceding body part, and the second of which is part of the
> encapsulation boundary.

But the BNF on page 22 suggests that it is, as long as there is no
preamble:

> dash-boundary := "--" boundary
>                  ; boundary taken from the value of
>                  ; boundary parameter of the
>                  ; Content-Type field.
>
> multipart-body := [preamble CRLF]
>                   dash-boundary transport-padding CRLF
>                   body-part *encapsulation
>                   close-delimiter transport-padding
>                   [CRLF epilogue]

Dendrite currently generates multipart responses without a preceding CRLF
for the first boundary[2], which were rejected by the previous ruma
parsing logic.

[1]: https://datatracker.ietf.org/doc/html/rfc2046
[2]: matrix-org/dendrite#3414
  • Loading branch information
olivia-fl committed Sep 17, 2024
1 parent 7cfa3be commit 13a9ce7
Showing 1 changed file with 50 additions and 3 deletions.
53 changes: 50 additions & 3 deletions crates/ruma-federation-api/src/authenticated_media.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,20 @@ fn try_from_multipart_mixed_response<T: AsRef<[u8]>>(
let mut full_boundary = Vec::with_capacity(boundary.len() + 4);
full_boundary.extend_from_slice(b"\r\n--");
full_boundary.extend_from_slice(boundary);
let full_boundary_no_crlf = full_boundary.strip_prefix(b"\r\n").unwrap();

let mut boundaries = memchr::memmem::find_iter(body, &full_boundary);

let metadata_start = boundaries.next().ok_or_else(|| {
MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 0 }
})? + full_boundary.len();
let metadata_start = if body.starts_with(full_boundary_no_crlf) {
// If there is no preamble before the first boundary, it may omit the
// preceding CRLF.
full_boundary_no_crlf.len()
} else {
boundaries.next().ok_or_else(|| MultipartMixedDeserializationError::MissingBodyParts {
expected: 2,
found: 0,
})? + full_boundary.len()
};
let metadata_end = boundaries.next().ok_or_else(|| {
MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 0 }
})?;
Expand Down Expand Up @@ -411,6 +419,15 @@ mod tests {
.unwrap();

try_from_multipart_mixed_response(response).unwrap_err();

// Boundary without CRLF with preamble.
let body = "foo--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
let response = http::Response::builder()
.header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
.body(body)
.unwrap();

try_from_multipart_mixed_response(response).unwrap_err();
}

#[test]
Expand Down Expand Up @@ -477,6 +494,36 @@ mod tests {
assert_eq!(file_content.content_type.unwrap(), "text/plain");
assert_eq!(file_content.content_disposition, None);

// No leading CRLF (and no preamble)
let body = "--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
let response = http::Response::builder()
.header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
.body(body)
.unwrap();

let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();

assert_matches!(content, FileOrLocation::File(file_content));
assert_eq!(file_content.file, b"some plain text");
assert_eq!(file_content.content_type, None);
assert_eq!(file_content.content_disposition, None);

// Boundary text in preamble, but no leading CRLF, so it should be
// ignored.
let body =
"foo--abcdef\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
let response = http::Response::builder()
.header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
.body(body)
.unwrap();

let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();

assert_matches!(content, FileOrLocation::File(file_content));
assert_eq!(file_content.file, b"some plain text");
assert_eq!(file_content.content_type, None);
assert_eq!(file_content.content_disposition, None);

// No body part headers.
let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
let response = http::Response::builder()
Expand Down

0 comments on commit 13a9ce7

Please sign in to comment.