HeaderUtil.java
/*
* The contents of this file are subject to the terms of the Common Development and
* Distribution License (the License). You may not use this file except in compliance with the
* License.
*
* You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
* specific language governing permission and limitations under the License.
*
* When distributing Covered Software, include this CDDL Header Notice in each file and include
* the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
* Header, with the fields enclosed by brackets [] replaced by your own identifying
* information: "Portions Copyright [year] [name of copyright owner]".
*
* Copyright 2010–2011 ApexIdentity Inc.
* Portions Copyright 2011-2016 ForgeRock AS.
*/
package org.forgerock.http.header;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import java.util.regex.Pattern;
import org.forgerock.http.protocol.Message;
import org.forgerock.http.util.CaseInsensitiveMap;
/**
* Utility class for processing values in HTTP header fields.
*/
public final class HeaderUtil {
/**
* RFC 1123 {@code HTTP-date} format from <a href="https://tools.ietf.org/html/rfc2616#page-20">RFC 2616</a>
* section 3.3, which is the preferred standard format.
*/
private static final String HTTP_DATE_RFC_1123_DATE_FORMAT = "EEE, dd MMM yyyy HH:mm:ss zzz";
/**
* Legacy RFC 850 date format from <a href="https://tools.ietf.org/html/rfc2616#page-20">RFC 2616</a> section 3.3,
* which should be supported for parsing only.
*/
private static final String LEGACY_RFC_850_DATE_FORMAT = "E, dd-MMM-yy HH:mm:ss zzz";
/**
* Legacy ANSI C {@code asctime()} date format from
* <a href="https://tools.ietf.org/html/rfc2616#page-20">RFC 2616</a>
* section 3.3, which should be supported for parsing only.
*/
private static final String LEGACY_ANSI_C_DATE_FORMAT = "EEE MMM d HH:mm:ss yyyy";
/**
* Regex that matches escaped backslash and double-quote characters, into two matching groups where group 1
* contains the escape character (which we will discard) and group 2 contains the character we want to retain.
*/
private static final Pattern UNQUOTE_PATTERN = Pattern.compile("(\\\\)(\\\\|[\"])");
/** Static methods only. */
private HeaderUtil() {
// No implementation required.
}
/**
* Parses an HTTP header value, splitting it into multiple values around the
* specified separator. Quoted strings are not split into multiple values if
* they contain separator characters. All leading and trailing white space
* in values is trimmed. All quotations remain intact.
* <p>
* Note: This method is liberal in its interpretation of malformed header
* values; namely the incorrect use of string and character quoting
* mechanisms and unquoted white space. If a {@code null} or empty string is
* supplied as a value, this method yields an empty list.
*
* @param value
* the header value to be split.
* @param separator
* the separator character to split headers around.
* @return A list of string representing the split values of the header.
*/
public static List<String> split(final String value, final char separator) {
if (separator == '"' || separator == '\\') {
throw new IllegalArgumentException("invalid separator: " + separator);
}
final ArrayList<String> values = new ArrayList<>();
if (value != null) {
int length = value.length();
final StringBuilder sb = new StringBuilder();
boolean escaped = false;
boolean quoted = false;
for (int n = 0, cp; n < length; n += Character.charCount(cp)) {
cp = value.codePointAt(n);
if (escaped) {
// single-character quoting mechanism per RFC 2616 §2.2
sb.appendCodePoint(cp);
escaped = false;
} else if (cp == '\\') {
sb.appendCodePoint(cp);
if (quoted) {
// single-character quoting mechanism per RFC 2616 §2.2
escaped = true;
}
} else if (cp == '"') {
// quotation marks remain intact here
sb.appendCodePoint(cp);
quoted = !quoted;
} else if (cp == separator && !quoted) {
// only separator if not in quoted string
String s = sb.toString().trim();
if (s.length() > 0) {
values.add(s);
}
// reset for next token
sb.setLength(0);
} else {
sb.appendCodePoint(cp);
}
}
final String s = sb.toString().trim();
if (s.length() > 0) {
values.add(s);
}
}
return values;
}
/**
* Joins a collection of header values into a single header value, with a
* specified specified separator. A {@code null} or empty collection of
* header values yeilds a {@code null} return value.
*
* @param values
* the values to be joined.
* @param separator
* the separator to separate values within the returned value.
* @return a single header value, with values separated by the separator.
*/
public static String join(final Collection<String> values, final char separator) {
if (separator == '"' || separator == '\\') {
throw new IllegalArgumentException("invalid separator: " + separator);
}
final StringBuilder sb = new StringBuilder();
if (values != null) {
for (final String s : values) {
if (s != null) {
if (sb.length() > 0) {
sb.append(separator).append(' ');
}
sb.append(s);
}
}
}
return sb.length() > 0 ? sb.toString() : null;
}
/**
* Splits a single HTTP header parameter name and value from an input string
* value. The input string value is presumed to have been extracted from a
* collection provided by the {@link #split(String, char)} method.
* <p>
* This method returns the parameter name-value pair split into an array of
* {@code String}s. Element {@code [0]} contains the parameter name; element
* {@code [1]} contains contains the parameter value or {@code null} if
* there is no value.
* <p>
* A value that is contained within a quoted-string is processed such that
* the surrounding '"' (quotation mark) characters are removed and
* single-character quotations hold the character being quoted without the
* escape '\' (backslash) character. All white space outside of the
* quoted-string is removed. White space within the quoted-string is
* retained.
* <p>
* Note: This method is liberal in its interpretation of a malformed header
* value; namely the incorrect use of string and character quoting
* mechanisms and unquoted white space.
*
* @param value
* the string to parse the name-value parameter from.
* @return the name-value pair split into a {@code String} array.
*/
public static String[] parseParameter(final String value) {
String[] ss = new String[2];
boolean inValue = false;
boolean quoted = false;
boolean escaped = false;
int length = value.length();
final StringBuilder sb = new StringBuilder();
for (int n = 0, cp; n < length; n += Character.charCount(cp)) {
cp = value.codePointAt(n);
if (escaped) {
// single-character quoting mechanism per RFC 2616 §2.2
sb.appendCodePoint(cp);
escaped = false;
} else if (cp == '\\') {
if (quoted) {
// next character is literal
escaped = true;
} else {
// not quoted, push the backslash literal (header probably malformed)
sb.appendCodePoint(cp);
}
} else if (cp == '"') {
// toggle quoted status
quoted = !quoted;
} else if (!quoted && !inValue && cp == '=') {
// only separator if in key and not in quoted-string
ss[0] = sb.toString().trim();
// reset for next token
sb.setLength(0);
inValue = true;
} else if (!quoted && Character.isWhitespace(cp)) {
// drop unquoted white space (header probably malformed if not at beginning or end)
} else {
sb.appendCodePoint(cp);
}
}
if (!inValue) {
ss[0] = sb.toString().trim();
} else {
ss[1] = sb.toString();
}
return ss;
}
/**
* Parses a set of HTTP header parameters from a collection of values. The
* input collection of values is presumed to have been provided from the
* {@link #split(String, char)} method.
* <p>
* A well-formed parameter contains an attribute and optional value,
* separated by an '=' (equals sign) character. If the parameter contains no
* value, it is represented by a {@code null} value in the returned map.
* <p>
* Values that are contained in quoted-strings are processed such that the
* surrounding '"' (quotation mark) characters are removed and
* single-character quotations hold the character being quoted without the
* escape '\' (backslash) character. All white space outside of
* quoted-strings is removed. White space within quoted-strings is retained.
* <p>
* Note: This method is liberal in its interpretation of malformed header
* values; namely the incorrect use of string and character quoting
* mechanisms and unquoted white space.
*
* @param values
* the HTTP header parameters.
* @return a map of parameter name-value pairs.
*/
public static Map<String, String> parseParameters(final Collection<String> values) {
final CaseInsensitiveMap<String> map = new CaseInsensitiveMap<>(new HashMap<String, String>());
if (values != null) {
for (final String value : values) {
final String[] param = parseParameter(value);
if (param[0] != null && param[0].length() > 0 && !map.containsKey(param[0])) {
map.put(param[0], param[1]);
}
}
}
return map;
}
/**
* Encloses a string in quotation marks. Quotation marks and backslash
* characters are escaped with the single-character quoting mechanism. For
* more information, see <a href="http://www.ietf.org/rfc/rfc2616.txt">RFC
* 2616</a> §2.2.
*
* @param value
* the value to be enclosed in quotation marks.
* @return the value enclosed in quotation marks.
*/
public static String quote(final String value) {
if (value == null) {
return null;
}
final StringBuilder sb = new StringBuilder("\"");
int length = value.length();
for (int n = 0, cp; n < length; n += Character.charCount(cp)) {
cp = value.codePointAt(n);
if (cp == '\\' || cp == '"') {
sb.append('\\');
}
sb.appendCodePoint(cp);
}
sb.append('"');
return sb.toString();
}
/**
* Unquotes a string following the logic of {@link #quote(String)}.
*
* @param value Value to unquote
* @return Unquoted value
*/
public static String unquote(final String value) {
if (value == null) {
return null;
}
final int n = value.length();
if (n < 2 || value.charAt(0) != '"' || value.charAt(n - 1) != '"') {
throw new IllegalArgumentException("value is not quoted");
}
return UNQUOTE_PATTERN.matcher(value.substring(1, n - 1)).replaceAll("$2");
}
/**
* Parses the named header from the message as a multi-valued comma
* separated value. If there are multiple headers present then they are
* first merged and then {@link #split(String, char) split}.
*
* @param message
* The HTTP request or response.
* @param name
* The name of the header.
* @return A list of strings representing the split values of the header,
* which may be empty if the header was not present in the message.
*/
public static List<String> parseMultiValuedHeader(Message message, String name) {
final Collection<String> values = message != null && message.getHeaders().containsKey(name)
? message.getHeaders().get(name).getValues() : null;
return parseMultiValuedHeader(join(values, ','));
}
/**
* Parses the header content as a multi-valued comma separated value.
*
* @param header
* The HTTP header content.
* @return A list of strings representing the split values of the header,
* which may be empty if the header was {@code null} or empty.
*/
public static List<String> parseMultiValuedHeader(final String header) {
return split(header, ',');
}
/**
* Parses the named single-valued header from the message. If there are
* multiple headers present then only the first is used.
*
* @param message
* The HTTP request or response.
* @param name
* The name of the header.
* @return The header value, or {@code null} if the header was not present
* in the message.
*/
public static String parseSingleValuedHeader(Message message, String name) {
if (message == null || !message.getHeaders().containsKey(name)) {
return null;
}
final Iterator<String> iterator = message.getHeaders().get(name).getValues().iterator();
final String header = iterator.hasNext()
? iterator.next() : null;
return header != null ? header : null;
}
/**
* Formats a {@code HTTP-date} using RFC 1123 format as specified in
* <a href="https://tools.ietf.org/html/rfc2616#page-20">RFC 2616</a>.
*
* @param date {@link Date} to format
* @return Formatted {@code HTTP-date}
*/
public static String formatDate(final Date date) {
return getDateFormatter(HTTP_DATE_RFC_1123_DATE_FORMAT).format(date);
}
/**
* Parses the supported {@code HTTP-date} formats as specified in
* <a href="https://tools.ietf.org/html/rfc2616#page-20">RFC 2616</a>.
*
* @param s Date {@link String}
* @return {@link Date} instance, or {@code null} if unable to parse the date or {@code s} is {@code null}
*/
public static Date parseDate(final String s) {
if (s == null) {
return null;
}
Date date = parseDate(s, HTTP_DATE_RFC_1123_DATE_FORMAT);
if (date != null) {
return date;
}
date = parseDate(s, LEGACY_RFC_850_DATE_FORMAT);
if (date != null) {
return date;
}
return parseDate(s, LEGACY_ANSI_C_DATE_FORMAT);
}
/**
* Parses a date {@link String} using the provided date format.
*
* @param s Date {@link String}
* @param format Date format
* @return {@link Date} instance, or {@code null} if unable to parse the date
*/
private static Date parseDate(final String s, final String format) {
try {
return getDateFormatter(format).parse(s);
} catch (ParseException eee) {
return null;
}
}
/**
* Builds a date formatter, configured to use GMT time zone, which is not thread-safe.
*
* @param format Date format
* @return Date formatter using GMT time zone
*/
private static SimpleDateFormat getDateFormatter(final String format) {
final SimpleDateFormat formatter = new SimpleDateFormat(format, Locale.ROOT);
formatter.setTimeZone(TimeZone.getTimeZone("GMT"));
return formatter;
}
}