Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTTPCLIENT-2159: Invalid handling of charset content type parameter #483

Merged
merged 1 commit into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 152 additions & 40 deletions httpcore5/src/main/java/org/apache/hc/core5/http/ContentType.java
michael-o marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

import org.apache.hc.core5.annotation.Contract;
import org.apache.hc.core5.annotation.ThreadingBehavior;
import org.apache.hc.core5.http.message.BasicHeaderValueFormatter;
import org.apache.hc.core5.http.message.BasicNameValuePair;
import org.apache.hc.core5.http.message.MessageSupport;
import org.apache.hc.core5.http.message.ParserCursor;
Expand All @@ -67,105 +68,125 @@ public final class ContentType implements Serializable {
*/
private static final String CHARSET = "charset";

/**
* Flag indicating whether the charset is implicit.
* <p>
* When {@code implicitCharset} is {@code true}, the charset will not be explicitly
* included in the string representation of this {@link ContentType} (i.e., in the {@code toString} method),
* unless it is required for the given MIME type.
* If {@code implicitCharset} is {@code false}, the charset will always be included in the string representation,
* unless the MIME type explicitly disallows charset parameters (e.g., certain binary or multipart types).
* </p>
* <p>
* This flag is essential for proper handling of content types where the charset is either implied by the specification
* (e.g., JSON is always UTF-8) or where including a charset is not meaningful (e.g., binary types like
* {@code application/octet-stream}).
* </p>
*
* @since 5.5
*/
private final boolean implicitCharset;


// constants
public static final ContentType APPLICATION_ATOM_XML = create(
"application/atom+xml", StandardCharsets.UTF_8);
"application/atom+xml", StandardCharsets.UTF_8, false);
public static final ContentType APPLICATION_FORM_URLENCODED = create(
"application/x-www-form-urlencoded", StandardCharsets.ISO_8859_1);
"application/x-www-form-urlencoded", StandardCharsets.ISO_8859_1, true);
public static final ContentType APPLICATION_JSON = create(
"application/json", StandardCharsets.UTF_8);
"application/json", StandardCharsets.UTF_8, true);

/**
* Public constant media type for {@code application/x-ndjson}.
* @since 5.1
*/
public static final ContentType APPLICATION_NDJSON = create(
"application/x-ndjson", StandardCharsets.UTF_8);
"application/x-ndjson", StandardCharsets.UTF_8, true);

public static final ContentType APPLICATION_OCTET_STREAM = create(
"application/octet-stream", (Charset) null);
"application/octet-stream", (Charset) null, true);
/**
* Public constant media type for {@code application/pdf}.
* @since 5.1
*/
public static final ContentType APPLICATION_PDF = create(
"application/pdf", StandardCharsets.UTF_8);
"application/pdf", (Charset) null, true);

public static final ContentType APPLICATION_SOAP_XML = create(
"application/soap+xml", StandardCharsets.UTF_8);
"application/soap+xml", StandardCharsets.UTF_8, false);
public static final ContentType APPLICATION_SVG_XML = create(
"application/svg+xml", StandardCharsets.UTF_8);
"application/svg+xml", StandardCharsets.UTF_8, false);
public static final ContentType APPLICATION_XHTML_XML = create(
"application/xhtml+xml", StandardCharsets.UTF_8);
"application/xhtml+xml", StandardCharsets.UTF_8, false);
public static final ContentType APPLICATION_XML = create(
"application/xml", StandardCharsets.UTF_8);
"application/xml", StandardCharsets.UTF_8, false);
/**
* Public constant media type for {@code application/problem+json}.
* @see <a href="https://tools.ietf.org/html/rfc7807#section-6.1">Problem Details for HTTP APIs, 6.1. application/problem+json</a>
* @since 5.1
*/
public static final ContentType APPLICATION_PROBLEM_JSON = create(
"application/problem+json", StandardCharsets.UTF_8);
"application/problem+json", StandardCharsets.UTF_8, true);
/**
* Public constant media type for {@code application/problem+xml}.
* @see <a href="https://tools.ietf.org/html/rfc7807#section-6.2">Problem Details for HTTP APIs, 6.2. application/problem+xml</a>
* @since 5.1
*/
public static final ContentType APPLICATION_PROBLEM_XML = create(
"application/problem+xml", StandardCharsets.UTF_8);
"application/problem+xml", StandardCharsets.UTF_8, false);

/**
* Public constant media type for {@code application/rss+xml}.
* @since 5.1
*/
public static final ContentType APPLICATION_RSS_XML = create(
"application/rss+xml", StandardCharsets.UTF_8);
"application/rss+xml", StandardCharsets.UTF_8, false);

public static final ContentType IMAGE_BMP = create(
"image/bmp");
"image/bmp", (Charset) null, true);
public static final ContentType IMAGE_GIF = create(
"image/gif");
"image/gif", (Charset) null, true);
public static final ContentType IMAGE_JPEG = create(
"image/jpeg");
"image/jpeg", (Charset) null, true);
public static final ContentType IMAGE_PNG = create(
"image/png");
"image/png", (Charset) null, true);
public static final ContentType IMAGE_SVG = create(
"image/svg+xml");
"image/svg+xml", (Charset) null, false);
michael-o marked this conversation as resolved.
Show resolved Hide resolved
public static final ContentType IMAGE_TIFF = create(
"image/tiff");
"image/tiff", (Charset) null, true);
public static final ContentType IMAGE_WEBP = create(
"image/webp");
"image/webp", (Charset) null, true);
public static final ContentType MULTIPART_FORM_DATA = create(
"multipart/form-data", StandardCharsets.ISO_8859_1);
"multipart/form-data", StandardCharsets.ISO_8859_1, true);

/**
* Public constant media type for {@code multipart/mixed}.
* @since 5.1
*/
public static final ContentType MULTIPART_MIXED = create(
"multipart/mixed", StandardCharsets.ISO_8859_1);
"multipart/mixed", StandardCharsets.ISO_8859_1, true);

/**
* Public constant media type for {@code multipart/related}.
* @since 5.1
*/
public static final ContentType MULTIPART_RELATED = create(
"multipart/related", StandardCharsets.ISO_8859_1);
"multipart/related", StandardCharsets.ISO_8859_1, true);

public static final ContentType TEXT_HTML = create(
"text/html", StandardCharsets.UTF_8);
"text/html", StandardCharsets.UTF_8, true);

/**
* Public constant media type for {@code text/markdown}.
* @since 5.1
*/
public static final ContentType TEXT_MARKDOWN = create(
"text/markdown", StandardCharsets.UTF_8);
"text/markdown", StandardCharsets.UTF_8, false);

public static final ContentType TEXT_PLAIN = create(
"text/plain", StandardCharsets.UTF_8);
"text/plain", StandardCharsets.UTF_8, false);
public static final ContentType TEXT_XML = create(
"text/xml", StandardCharsets.UTF_8);
"text/xml", StandardCharsets.UTF_8, false);
/**
* Public constant media type for {@code text/event-stream}.
* @see <a href="https://www.w3.org/TR/eventsource/">Server-Sent Events W3C recommendation</a>
Expand All @@ -175,7 +196,7 @@ public final class ContentType implements Serializable {
"text/event-stream", StandardCharsets.UTF_8);

public static final ContentType WILDCARD = create(
"*/*", (Charset) null);
"*/*", (Charset) null, true);

/**
* An empty immutable {@code NameValuePair} array.
Expand Down Expand Up @@ -225,18 +246,42 @@ public final class ContentType implements Serializable {
ContentType(
final String mimeType,
final Charset charset) {
this.mimeType = mimeType;
this.charset = charset;
this.params = null;
this (mimeType,charset,null, false);
}

ContentType(
final String mimeType,
final Charset charset,
final NameValuePair[] params) {

this (mimeType,charset,params, false);
}

/**
* Constructs a new instance of {@link ContentType} with the given MIME type, charset, parameters,
* and an implicit charset flag.
* <p>
* If {@code implicitCharset} is set to {@code true}, the charset will not be explicitly
* included in the string representation of this content type (i.e., the {@code toString} method)
* unless it is required for the given MIME type.
* If {@code implicitCharset} is {@code false}, the charset will always be included in the
* string representation unless the MIME type is one of those that should not include a charset.
* </p>
*
* @param mimeType the MIME type. It must not be {@code null} or empty and must not contain
* reserved characters such as {@code <">, <;>, <,>}.
* @param charset the character set for this content type. This can be {@code null}.
* @param params optional parameters for this content type. If {@code null}, no additional
* parameters will be included.
* @param implicitCharset whether the charset is implicit. If {@code true}, the charset is not
* included in the {@code toString} output unless required.
* @since 5.5
*/
ContentType(final String mimeType, final Charset charset, final NameValuePair[] params, final boolean implicitCharset) {
this.mimeType = mimeType;
this.charset = charset;
this.params = params;
this.implicitCharset = implicitCharset;
this.params = params != null ? params.clone() : null;
}

public String getMimeType() {
Expand Down Expand Up @@ -288,8 +333,21 @@ public String toString() {
buf.append(this.mimeType);
if (this.params != null) {
buf.append("; ");
MessageSupport.formatParameters(buf, this.params);
} else if (this.charset != null) {
boolean first = true;
for (int i = 0; i < params.length; i++) {
final NameValuePair param = params[i];
if (!first) {
buf.append("; ");
}
if (param.getName().equalsIgnoreCase("charset") && implicitCharset) {
continue;
}
BasicHeaderValueFormatter.INSTANCE.formatNameValuePair(buf, param, false);
first = false;
}

} else if (this.charset != null && !implicitCharset) {
// Append charset only if it's not one of the types that shouldn't have charset
buf.append("; charset=");
buf.append(this.charset.name());
}
Expand All @@ -306,6 +364,58 @@ private static boolean valid(final String s) {
return true;
}


/**
* Creates a new instance of {@link ContentType} with the given MIME type, charset,
* and an implicit charset flag.
* <p>
* This method allows specifying whether the charset should be implicit or explicit.
* If {@code implicitCharset} is set to {@code true}, the charset will not be explicitly
* included in the string representation of this content type (i.e., the {@code toString} method),
* unless it is required for the given MIME type. If {@code implicitCharset} is {@code false},
* the charset will always be included unless the MIME type does not allow a charset.
* </p>
*
* @param mimeType the MIME type. It must not be {@code null} or empty and must not contain
* reserved characters such as {@code <">, <;>, <,>}.
* @param charset the character set for this content type. This can be {@code null}.
* @param implicitCharset whether the charset is implicit. If {@code true}, the charset is
* not included in the {@code toString} output unless required.
* @return a new instance of {@link ContentType}.
* @throws IllegalArgumentException if the MIME type is invalid or contains reserved characters.
* @since 5.5
*/
public static ContentType create(final String mimeType, final Charset charset, final boolean implicitCharset) {
final String normalizedMimeType = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type"));
Args.check(valid(normalizedMimeType), "MIME type may not contain reserved characters");
return new ContentType(normalizedMimeType, charset, null, implicitCharset);
}

/**
* Creates a new instance of {@link ContentType} with the given MIME type, parameters,
* and an implicit charset flag.
* <p>
* This method allows specifying additional parameters for the content type and whether
* the charset should be implicit or explicit. If {@code implicitCharset} is {@code true},
* the charset will not be included in the string representation unless required.
* </p>
*
* @param mimeType the MIME type. It must not be {@code null} or empty and must not contain
* reserved characters such as {@code <">, <;>, <,>}.
* @param implicitCharset whether the charset is implicit. If {@code true}, the charset is
* not included in the {@code toString} output unless required.
* @param params optional parameters for the content type. Can be {@code null}.
* @return a new instance of {@link ContentType}.
* @throws IllegalArgumentException if the MIME type is invalid or contains reserved characters.
* @throws UnsupportedCharsetException if the charset provided in the parameters is not supported.
* @since 5.5
*/
public static ContentType create(final String mimeType, final boolean implicitCharset, final NameValuePair... params) throws UnsupportedCharsetException {
final String type = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type"));
Args.check(valid(type), "MIME type may not contain reserved characters");
return create(mimeType, params != null ? params.clone() : null, implicitCharset);
}

/**
* Creates a new instance of {@link ContentType}.
*
Expand All @@ -315,9 +425,7 @@ private static boolean valid(final String s) {
* @return content type
*/
public static ContentType create(final String mimeType, final Charset charset) {
final String normalizedMimeType = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type"));
Args.check(valid(normalizedMimeType), "MIME type may not contain reserved characters");
return new ContentType(normalizedMimeType, charset);
return create(mimeType, charset, false);
}

/**
Expand Down Expand Up @@ -356,6 +464,10 @@ private static ContentType create(final HeaderElement helem, final boolean stric
}

private static ContentType create(final String mimeType, final NameValuePair[] params, final boolean strict) {
return create(mimeType, params != null ? params.clone() : null, strict, false);
}

private static ContentType create(final String mimeType, final NameValuePair[] params, final boolean strict, final boolean implicitCharset) {
Charset charset = null;
if (params != null) {
for (final NameValuePair param : params) {
Expand All @@ -374,7 +486,7 @@ private static ContentType create(final String mimeType, final NameValuePair[] p
}
}
}
return new ContentType(mimeType, charset, params != null && params.length > 0 ? params : null);
return new ContentType(mimeType, charset, params != null && params.length > 0 ? params : null, implicitCharset);
}

/**
Expand Down Expand Up @@ -517,11 +629,11 @@ public ContentType withParameters(
for (final Map.Entry<String, String> entry: paramMap.entrySet()) {
newParams.add(new BasicNameValuePair(entry.getKey(), entry.getValue()));
}
return create(this.getMimeType(), newParams.toArray(EMPTY_NAME_VALUE_PAIR_ARRAY), true);
return create(this.getMimeType(), newParams.toArray(EMPTY_NAME_VALUE_PAIR_ARRAY), true, this.implicitCharset);
}

public boolean isSameMimeType(final ContentType contentType) {
return contentType != null && mimeType.equalsIgnoreCase(contentType.getMimeType());
}

}
}
Loading