diff --git a/httpcore5/src/main/java/org/apache/hc/core5/http/ContentType.java b/httpcore5/src/main/java/org/apache/hc/core5/http/ContentType.java index b8e049961..9ce5121bb 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/http/ContentType.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/http/ContentType.java @@ -41,6 +41,7 @@ import org.apache.hc.core5.annotation.Contract; import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.http.message.BasicHeaderValueFormatter; import org.apache.hc.core5.http.message.BasicNameValuePair; import org.apache.hc.core5.http.message.MessageSupport; import org.apache.hc.core5.http.message.ParserCursor; @@ -67,105 +68,125 @@ public final class ContentType implements Serializable { */ private static final String CHARSET = "charset"; + /** + * Flag indicating whether the charset is implicit. + *
+ * When {@code implicitCharset} is {@code true}, the charset will not be explicitly + * included in the string representation of this {@link ContentType} (i.e., in the {@code toString} method), + * unless it is required for the given MIME type. + * If {@code implicitCharset} is {@code false}, the charset will always be included in the string representation, + * unless the MIME type explicitly disallows charset parameters (e.g., certain binary or multipart types). + *
+ *+ * This flag is essential for proper handling of content types where the charset is either implied by the specification + * (e.g., JSON is always UTF-8) or where including a charset is not meaningful (e.g., binary types like + * {@code application/octet-stream}). + *
+ * + * @since 5.5 + */ + private final boolean implicitCharset; + + // constants public static final ContentType APPLICATION_ATOM_XML = create( - "application/atom+xml", StandardCharsets.UTF_8); + "application/atom+xml", StandardCharsets.UTF_8, false); public static final ContentType APPLICATION_FORM_URLENCODED = create( - "application/x-www-form-urlencoded", StandardCharsets.ISO_8859_1); + "application/x-www-form-urlencoded", StandardCharsets.ISO_8859_1, true); public static final ContentType APPLICATION_JSON = create( - "application/json", StandardCharsets.UTF_8); + "application/json", StandardCharsets.UTF_8, true); /** * Public constant media type for {@code application/x-ndjson}. * @since 5.1 */ public static final ContentType APPLICATION_NDJSON = create( - "application/x-ndjson", StandardCharsets.UTF_8); + "application/x-ndjson", StandardCharsets.UTF_8, true); public static final ContentType APPLICATION_OCTET_STREAM = create( - "application/octet-stream", (Charset) null); + "application/octet-stream", (Charset) null, true); /** * Public constant media type for {@code application/pdf}. * @since 5.1 */ public static final ContentType APPLICATION_PDF = create( - "application/pdf", StandardCharsets.UTF_8); + "application/pdf", (Charset) null, true); public static final ContentType APPLICATION_SOAP_XML = create( - "application/soap+xml", StandardCharsets.UTF_8); + "application/soap+xml", StandardCharsets.UTF_8, false); public static final ContentType APPLICATION_SVG_XML = create( - "application/svg+xml", StandardCharsets.UTF_8); + "application/svg+xml", StandardCharsets.UTF_8, false); public static final ContentType APPLICATION_XHTML_XML = create( - "application/xhtml+xml", StandardCharsets.UTF_8); + "application/xhtml+xml", StandardCharsets.UTF_8, false); public static final ContentType APPLICATION_XML = create( - "application/xml", StandardCharsets.UTF_8); + "application/xml", StandardCharsets.UTF_8, false); /** * Public constant media type for {@code application/problem+json}. * @see Problem Details for HTTP APIs, 6.1. application/problem+json * @since 5.1 */ public static final ContentType APPLICATION_PROBLEM_JSON = create( - "application/problem+json", StandardCharsets.UTF_8); + "application/problem+json", StandardCharsets.UTF_8, true); /** * Public constant media type for {@code application/problem+xml}. * @see Problem Details for HTTP APIs, 6.2. application/problem+xml * @since 5.1 */ public static final ContentType APPLICATION_PROBLEM_XML = create( - "application/problem+xml", StandardCharsets.UTF_8); + "application/problem+xml", StandardCharsets.UTF_8, false); /** * Public constant media type for {@code application/rss+xml}. * @since 5.1 */ public static final ContentType APPLICATION_RSS_XML = create( - "application/rss+xml", StandardCharsets.UTF_8); + "application/rss+xml", StandardCharsets.UTF_8, false); public static final ContentType IMAGE_BMP = create( - "image/bmp"); + "image/bmp", (Charset) null, true); public static final ContentType IMAGE_GIF = create( - "image/gif"); + "image/gif", (Charset) null, true); public static final ContentType IMAGE_JPEG = create( - "image/jpeg"); + "image/jpeg", (Charset) null, true); public static final ContentType IMAGE_PNG = create( - "image/png"); + "image/png", (Charset) null, true); public static final ContentType IMAGE_SVG = create( - "image/svg+xml"); + "image/svg+xml", (Charset) null, false); public static final ContentType IMAGE_TIFF = create( - "image/tiff"); + "image/tiff", (Charset) null, true); public static final ContentType IMAGE_WEBP = create( - "image/webp"); + "image/webp", (Charset) null, true); public static final ContentType MULTIPART_FORM_DATA = create( - "multipart/form-data", StandardCharsets.ISO_8859_1); + "multipart/form-data", StandardCharsets.ISO_8859_1, true); /** * Public constant media type for {@code multipart/mixed}. * @since 5.1 */ public static final ContentType MULTIPART_MIXED = create( - "multipart/mixed", StandardCharsets.ISO_8859_1); + "multipart/mixed", StandardCharsets.ISO_8859_1, true); /** * Public constant media type for {@code multipart/related}. * @since 5.1 */ public static final ContentType MULTIPART_RELATED = create( - "multipart/related", StandardCharsets.ISO_8859_1); + "multipart/related", StandardCharsets.ISO_8859_1, true); public static final ContentType TEXT_HTML = create( - "text/html", StandardCharsets.UTF_8); + "text/html", StandardCharsets.UTF_8, true); /** * Public constant media type for {@code text/markdown}. * @since 5.1 */ public static final ContentType TEXT_MARKDOWN = create( - "text/markdown", StandardCharsets.UTF_8); + "text/markdown", StandardCharsets.UTF_8, false); public static final ContentType TEXT_PLAIN = create( - "text/plain", StandardCharsets.UTF_8); + "text/plain", StandardCharsets.UTF_8, true); public static final ContentType TEXT_XML = create( - "text/xml", StandardCharsets.UTF_8); + "text/xml", StandardCharsets.UTF_8, false); /** * Public constant media type for {@code text/event-stream}. * @see Server-Sent Events W3C recommendation @@ -175,7 +196,7 @@ public final class ContentType implements Serializable { "text/event-stream", StandardCharsets.UTF_8); public static final ContentType WILDCARD = create( - "*/*", (Charset) null); + "*/*", (Charset) null, true); /** * An empty immutable {@code NameValuePair} array. @@ -225,18 +246,42 @@ public final class ContentType implements Serializable { ContentType( final String mimeType, final Charset charset) { - this.mimeType = mimeType; - this.charset = charset; - this.params = null; + this (mimeType,charset,null, false); } ContentType( final String mimeType, final Charset charset, final NameValuePair[] params) { + + this (mimeType,charset,params, false); + } + + /** + * Constructs a new instance of {@link ContentType} with the given MIME type, charset, parameters, + * and an implicit charset flag. + *+ * If {@code implicitCharset} is set to {@code true}, the charset will not be explicitly + * included in the string representation of this content type (i.e., the {@code toString} method) + * unless it is required for the given MIME type. + * If {@code implicitCharset} is {@code false}, the charset will always be included in the + * string representation unless the MIME type is one of those that should not include a charset. + *
+ * + * @param mimeType the MIME type. It must not be {@code null} or empty and must not contain + * reserved characters such as {@code <">, <;>, <,>}. + * @param charset the character set for this content type. This can be {@code null}. + * @param params optional parameters for this content type. If {@code null}, no additional + * parameters will be included. + * @param implicitCharset whether the charset is implicit. If {@code true}, the charset is not + * included in the {@code toString} output unless required. + * @since 5.5 + */ + ContentType(final String mimeType, final Charset charset, final NameValuePair[] params, final boolean implicitCharset) { this.mimeType = mimeType; this.charset = charset; - this.params = params; + this.implicitCharset = implicitCharset; + this.params = params != null ? params.clone() : null; } public String getMimeType() { @@ -288,8 +333,19 @@ public String toString() { buf.append(this.mimeType); if (this.params != null) { buf.append("; "); - MessageSupport.formatParameters(buf, this.params); - } else if (this.charset != null) { + boolean first = true; + for (final NameValuePair param : params) { + if (!first) { + buf.append("; "); + } + if (param.getName().equalsIgnoreCase("charset") && implicitCharset) { + continue; + } + BasicHeaderValueFormatter.INSTANCE.formatNameValuePair(buf, param, false); + first = false; + } + } else if (this.charset != null && !implicitCharset) { + // Append charset only if it's not one of the types that shouldn't have charset buf.append("; charset="); buf.append(this.charset.name()); } @@ -306,6 +362,58 @@ private static boolean valid(final String s) { return true; } + + /** + * Creates a new instance of {@link ContentType} with the given MIME type, charset, + * and an implicit charset flag. + *+ * This method allows specifying whether the charset should be implicit or explicit. + * If {@code implicitCharset} is set to {@code true}, the charset will not be explicitly + * included in the string representation of this content type (i.e., the {@code toString} method), + * unless it is required for the given MIME type. If {@code implicitCharset} is {@code false}, + * the charset will always be included unless the MIME type does not allow a charset. + *
+ * + * @param mimeType the MIME type. It must not be {@code null} or empty and must not contain + * reserved characters such as {@code <">, <;>, <,>}. + * @param charset the character set for this content type. This can be {@code null}. + * @param implicitCharset whether the charset is implicit. If {@code true}, the charset is + * not included in the {@code toString} output unless required. + * @return a new instance of {@link ContentType}. + * @throws IllegalArgumentException if the MIME type is invalid or contains reserved characters. + * @since 5.5 + */ + public static ContentType create(final String mimeType, final Charset charset, final boolean implicitCharset) { + final String normalizedMimeType = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type")); + Args.check(valid(normalizedMimeType), "MIME type may not contain reserved characters"); + return new ContentType(normalizedMimeType, charset, null, implicitCharset); + } + + /** + * Creates a new instance of {@link ContentType} with the given MIME type, parameters, + * and an implicit charset flag. + *+ * This method allows specifying additional parameters for the content type and whether + * the charset should be implicit or explicit. If {@code implicitCharset} is {@code true}, + * the charset will not be included in the string representation unless required. + *
+ * + * @param mimeType the MIME type. It must not be {@code null} or empty and must not contain + * reserved characters such as {@code <">, <;>, <,>}. + * @param implicitCharset whether the charset is implicit. If {@code true}, the charset is + * not included in the {@code toString} output unless required. + * @param params optional parameters for the content type. Can be {@code null}. + * @return a new instance of {@link ContentType}. + * @throws IllegalArgumentException if the MIME type is invalid or contains reserved characters. + * @throws UnsupportedCharsetException if the charset provided in the parameters is not supported. + * @since 5.5 + */ + public static ContentType create(final String mimeType, final boolean implicitCharset, final NameValuePair... params) throws UnsupportedCharsetException { + final String type = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type")); + Args.check(valid(type), "MIME type may not contain reserved characters"); + return create(mimeType, params != null ? params.clone() : null, implicitCharset); + } + /** * Creates a new instance of {@link ContentType}. * @@ -315,9 +423,7 @@ private static boolean valid(final String s) { * @return content type */ public static ContentType create(final String mimeType, final Charset charset) { - final String normalizedMimeType = TextUtils.toLowerCase(Args.notBlank(mimeType, "MIME type")); - Args.check(valid(normalizedMimeType), "MIME type may not contain reserved characters"); - return new ContentType(normalizedMimeType, charset); + return create(mimeType, charset, false); } /** @@ -326,7 +432,9 @@ public static ContentType create(final String mimeType, final Charset charset) { * @param mimeType MIME type. It may not be {@code null} or empty. It may not contain * characters {@code <">, <;>, <,>} reserved by the HTTP specification. * @return content type + * @deprecated Use {@link #create(String, Charset, boolean)} with an explicit charset or null, and specify if the charset is implicit. */ + @Deprecated public static ContentType create(final String mimeType) { return create(mimeType, (Charset) null); } @@ -356,6 +464,10 @@ private static ContentType create(final HeaderElement helem, final boolean stric } private static ContentType create(final String mimeType, final NameValuePair[] params, final boolean strict) { + return create(mimeType, params != null ? params.clone() : null, strict, false); + } + + private static ContentType create(final String mimeType, final NameValuePair[] params, final boolean strict, final boolean implicitCharset) { Charset charset = null; if (params != null) { for (final NameValuePair param : params) { @@ -374,7 +486,7 @@ private static ContentType create(final String mimeType, final NameValuePair[] p } } } - return new ContentType(mimeType, charset, params != null && params.length > 0 ? params : null); + return new ContentType(mimeType, charset, params != null && params.length > 0 ? params : null, implicitCharset); } /** @@ -517,11 +629,11 @@ public ContentType withParameters( for (final Map.Entry