Uris.java

/*
 * The contents of this file are subject to the terms of the Common Development and
 * Distribution License (the License). You may not use this file except in compliance with the
 * License.
 *
 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
 * specific language governing permission and limitations under the License.
 *
 * When distributing Covered Software, include this CDDL Header Notice in each file and include
 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
 * Header, with the fields enclosed by brackets [] replaced by your own identifying
 * information: "Portions Copyright [year] [name of copyright owner]".
 *
 * Copyright 2010–2011 ApexIdentity Inc.
 * Portions Copyright 2011-2016 ForgeRock AS.
 */

package org.forgerock.http.util;

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import java.util.Locale;

import org.forgerock.http.protocol.Form;

/**
 * Utility class for performing operations on universal resource identifiers.
 */
public final class Uris {

    /** Non-safe characters are escaped as UTF-8 octets using "%" HEXDIG HEXDIG production. */
    private static final char URL_ESCAPE_CHAR = '%';

    /** Look up table for characters which do not need URL encoding in path elements according to RFC 3986. */
    private static final BitSet SAFE_URL_PCHAR_CHARS = new BitSet(128);

    /** Look up table for characters which do not need URL encoding in query string parameters according to RFC 3986. */
    private static final BitSet SAFE_URL_QUERY_CHARS = new BitSet(128);

    /** Look up table for characters which do not need URL encoding in fragments according to RFC 3986. */
    private static final BitSet SAFE_URL_FRAGMENT_CHARS = new BitSet(128);

    /** Look up table for characters which do not need URL encoding in userInfo according to RFC 3986. */
    private static final BitSet SAFE_URL_USERINFO_CHARS = new BitSet(128);

    static {
        /*
         * pchar       = unreserved / pct-encoded / sub-delims / ":" / "@"
         *
         * pct-encoded = "%" HEXDIG HEXDIG
         * unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
         * sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
         */
        for (char c : "-._~!$&'()*+,;=:@".toCharArray()) {
            SAFE_URL_PCHAR_CHARS.set(c);
        }
        SAFE_URL_PCHAR_CHARS.set('0', '9' + 1);
        SAFE_URL_PCHAR_CHARS.set('a', 'z' + 1);
        SAFE_URL_PCHAR_CHARS.set('A', 'Z' + 1);

        // query = *( pchar / "/" / "?" ) - also encode ? and & since these are parameter separators,
        //                                  as well as + which is used for encoding white space (see w3c).
        SAFE_URL_QUERY_CHARS.or(SAFE_URL_PCHAR_CHARS);
        SAFE_URL_QUERY_CHARS.set('/');
        SAFE_URL_QUERY_CHARS.set('?');
        SAFE_URL_QUERY_CHARS.clear('&');
        SAFE_URL_QUERY_CHARS.clear('=');
        SAFE_URL_QUERY_CHARS.clear('+');

        // fragment = *( pchar / "/" / "?" )
        SAFE_URL_FRAGMENT_CHARS.or(SAFE_URL_PCHAR_CHARS);
        SAFE_URL_FRAGMENT_CHARS.set('/');
        SAFE_URL_FRAGMENT_CHARS.set('?');

        // userInfo = *( unreserved / pct-encoded / sub-delims / ":" )
        SAFE_URL_USERINFO_CHARS.or(SAFE_URL_PCHAR_CHARS);
        SAFE_URL_USERINFO_CHARS.clear('@');
    }

    /**
     * Fast lookup for encoding octets as hex.
     */
    private static final String[] BYTE_TO_HEX = new String[256];
    static {
        for (int i = 0; i < BYTE_TO_HEX.length; i++) {
            BYTE_TO_HEX[i] = String.format(Locale.ROOT, "%02X", i);
        }
    }

    /** Static methods only. */
    private Uris() {
    }

    /**
     * Returns a hierarchical URI constructed from the given components. Differs from the URI
     * constructor by accepting raw versions of userInfo, path, query and fragment components.
     * <p>
     * Unlike {@link #createNonStrict}, this method does not tolerate invalid characters, such
     * as double-quotes, in the query string.
     *
     * @param scheme the scheme component of the URI or {@code null} if none.
     * @param rawUserInfo the raw user-information component of the URI or {@code null} if none.
     * @param host the host component of the URI or {@code null} if none.
     * @param port the port number of the URI or {@code -1} if none.
     * @param rawPath the raw path component of the URI or {@code null} if none.
     * @param rawQuery the raw query component of the URI or {@code null} if none. The raw query must not contain
     *                 characters that should have been percent encoded.
     * @param rawFragment the raw fragment component of the URI or {@code null} if none.
     * @return the URI constructed from the given components.
     * @throws URISyntaxException if the resulting URI would be malformed per RFC 2396.
     */
    public static URI create(String scheme, String rawUserInfo, String host, int port,
            String rawPath, String rawQuery, String rawFragment) throws URISyntaxException {
        StringBuilder sb = new StringBuilder();
        if (scheme != null) {
            sb.append(scheme).append(':');
        }
        if (host != null) {
            sb.append("//");
        }
        if (rawUserInfo != null) {
            sb.append(rawUserInfo).append('@');
        }
        if (host != null) {
            sb.append(host);
            if (port != -1) {
                sb.append(':').append(Integer.toString(port));
            }
        }
        if (rawPath != null) {
            sb.append(rawPath);
        }
        if (rawQuery != null) {
            sb.append('?').append(rawQuery);
        }
        if (rawFragment != null) {
            sb.append("#").append(rawFragment);
        }
        return new URI(sb.toString());
    }

    /**
     * Returns a hierarchical URI constructed from the given components. Differs from the URI
     * constructor by accepting raw versions of userInfo, path, query and fragment components.
     * <p>
     * Unlike {@link #create}, this method tolerates invalid characters, such as double-quotes,
     * in the query string.
     *
     * @param scheme the scheme component of the URI or {@code null} if none.
     * @param rawUserInfo the raw user-information component of the URI or {@code null} if none.
     * @param host the host component of the URI or {@code null} if none.
     * @param port the port number of the URI or {@code -1} if none.
     * @param rawPath the raw path component of the URI or {@code null} if none.
     * @param rawQuery the raw query component of the URI or {@code null} if none. The raw query may contain
     *                 characters that should have been percent encoded.
     * @param rawFragment the raw fragment component of the URI or {@code null} if none.
     * @return the URI constructed from the given components.
     * @throws URISyntaxException if the resulting URI would be malformed per RFC 2396.
     */
    public static URI createNonStrict(String scheme, String rawUserInfo, String host, int port,
                             String rawPath, String rawQuery, String rawFragment) throws URISyntaxException {
        return create(scheme, rawUserInfo, host, port, rawPath, asSafeQuery(rawQuery), rawFragment);
    }

    private static String asSafeQuery(final String rawQuery) throws URISyntaxException {
        if (rawQuery == null) {
            return null;
        }
        // Allocate a bit of extra padding in case a couple of characters need % encoding.
        StringBuilder builder = new StringBuilder(rawQuery.length() + 8);
        for (String param : rawQuery.split("&")) {
            String[] nv = param.split("=", 2);
            if (!nv[0].isEmpty()) {
                if (builder.length() > 0) {
                    builder.append('&');
                }
                try {
                    String name = urlDecodeQueryParameterNameOrValue(nv[0]);
                    builder.append(urlEncodeQueryParameterNameOrValue(name));
                    if (nv.length == 2) {
                        String value = urlDecodeQueryParameterNameOrValue(nv[1]);
                        builder.append('=').append(urlEncodeQueryParameterNameOrValue(value));
                    }
                } catch (Exception e) {
                    throw new URISyntaxException(rawQuery, "The URL query string could not be decoded");
                }
            }
        }
        return builder.toString();
    }

    /**
     * Changes the base scheme, host and port of a request to that specified in a base URI,
     * or leaves them unchanged if the base URI is {@code null}. This implementation only
     * uses scheme, host and port. The remaining components of the URI remain intact.
     *
     * @param uri the URI whose base is to be changed.
     * @param base the URI to base the other URI on.
     * @return the the URI with the new established base.
     */
    public static URI rebase(URI uri, URI base)  {
        if (base == null) {
            return uri;
        }
        String scheme = base.getScheme();
        String host = base.getHost();
        int port = base.getPort();
        if (scheme == null || host == null) {
            return uri;
        }
        try {
            return create(scheme, uri.getRawUserInfo(), host, port, uri.getRawPath(),
                    uri.getRawQuery(), uri.getRawFragment());
        } catch (URISyntaxException e) {
            throw new IllegalStateException(e);
        }
    }

    /**
     * Returns a new URI having the provided query parameters. The scheme,
     * authority, path, and fragment remain unchanged.
     *
     * @param uri
     *            the URI whose query is to be changed.
     * @param query
     *            the form containing the query parameters.
     * @return a new URI having the provided query parameters. The scheme,
     *         authority, path, and fragment remain unchanged.
     */
    public static URI withQuery(final URI uri, final Form query) {
        try {
            return create(uri.getScheme(), uri.getRawUserInfo(), uri.getHost(), uri.getPort(),
                    uri.getRawPath(), query.toQueryString(), uri.getRawFragment());
        } catch (final URISyntaxException e) {
            throw new IllegalStateException(e);
        }
    }

    /**
     * Returns a new URI having the same scheme, authority and path, but no
     * query nor fragment.
     *
     * @param uri
     *            the URI whose query and fragments are to be removed.
     * @return a new URI having the same scheme, authority and path, but no
     *         query nor fragment.
     */
    public static URI withoutQueryAndFragment(final URI uri) {
        try {
            return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, null);
        } catch (final URISyntaxException e) {
            throw new IllegalStateException(e);
        }
    }

    /**
     * Decodes the provided form encoded parameter name or value as per application/x-www-form-urlencoded.
     *
     * @param nameOrValue
     *         the form encoded parameter name or value, which may be {@code null}.
     * @return the decoded form parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
     */
    public static String formDecodeParameterNameOrValue(String nameOrValue) {
        try {
            return nameOrValue != null ? URLDecoder.decode(nameOrValue, "UTF-8") : null;
        } catch (UnsupportedEncodingException e) {
            return nameOrValue;
        }
    }

    /**
     * Form encodes the provided parameter name or value as per application/x-www-form-urlencoded.
     *
     * @param nameOrValue
     *         the parameter name or value, which may be {@code null}.
     * @return the form encoded parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
     */
    public static String formEncodeParameterNameOrValue(String nameOrValue) {
        try {
            return nameOrValue != null ? URLEncoder.encode(nameOrValue, "UTF-8") : null;
        } catch (UnsupportedEncodingException e) {
            return nameOrValue;
        }
    }

    /**
     * Decodes the provided URL encoded path element as per RFC 3986.
     *
     * @param pathElement
     *            the URL encoded path element, which may be {@code null}.
     * @return the decoded path element, or {@code null} if {@code pathElement} was {@code null}.
     */
    public static String urlDecodePathElement(String pathElement) {
        return urlDecode(pathElement, false);
    }

    /**
     * URL encodes the provided path element as per RFC 3986.
     *
     * @param pathElement
     *         the path element, which may be {@code null}.
     * @return the URL encoded path element, or {@code null} if {@code pathElement} was {@code null}.
     */
    public static String urlEncodePathElement(String pathElement) {
        return urlEncode(pathElement, SAFE_URL_PCHAR_CHARS);
    }

    /**
     * Decodes the provided URL encoded query parameter name or value as per RFC 3986.
     *
     * @param nameOrValue
     *            the URL encoded query parameter name or value, which may be {@code null}.
     * @return the decoded query parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
     */
    public static String urlDecodeQueryParameterNameOrValue(String nameOrValue) {
        return urlDecode(nameOrValue, true);
    }

    /**
     * URL encodes the provided query parameter name or value as per RFC 3986. Note that this method does not
     * adhere to the "query" production in RFC 3986, because it is intended for encoding query parameter names or
     * values. Therefore, this method will encode '?' and '=' characters.
     *
     * @param nameOrValue
     *         the query parameter name or value, which may be {@code null}.
     * @return the URL encoded query parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
     */
    public static String urlEncodeQueryParameterNameOrValue(String nameOrValue) {
        return urlEncode(nameOrValue, SAFE_URL_QUERY_CHARS);
    }

    /**
     * Decodes the provided URL encoded fragment as per RFC 3986.
     *
     * @param fragment
     *            the URL encoded fragment, which may be {@code null}.
     * @return the decoded fragment, or {@code null} if {@code fragment} was {@code null}.
     */
    public static String urlDecodeFragment(String fragment) {
        return urlDecode(fragment, false);
    }

    /**
     * URL encodes the provided fragment as per RFC 3986.
     *
     * @param fragment
     *         the fragment, which may be {@code null}.
     * @return the URL encoded fragment, or {@code null} if {@code fragment} was {@code null}.
     */
    public static String urlEncodeFragment(String fragment) {
        return urlEncode(fragment, SAFE_URL_FRAGMENT_CHARS);
    }

    /**
     * Decodes the provided URL encoded userInfo as per RFC 3986.
     *
     * @param userInfo
     *            the URL encoded userInfo, which may be {@code null}.
     * @return the decoded userInfo, or {@code null} if {@code userInfo} was {@code null}.
     */
    public static String urlDecodeUserInfo(String userInfo) {
        return urlDecode(userInfo, false);
    }

    /**
     * URL encodes the provided userInfo as per RFC 3986.
     *
     * @param userInfo
     *         the userInfo, which may be {@code null}.
     * @return the URL encoded userInfo, or {@code null} if {@code userInfo} was {@code null}.
     */
    public static String urlEncodeUserInfo(String userInfo) {
        return urlEncode(userInfo, SAFE_URL_USERINFO_CHARS);
    }

    private static String urlDecode(final String s, final boolean decodePlusToSpace) {
        if (s == null) {
            return null;
        }
        // First try fast-path decode of simple ASCII.
        final int size = s.length();
        for (int i = 0; i < size; i++) {
            final char c = s.charAt(i);
            if (isUrlEscapeChar(c) || (decodePlusToSpace && c == '+')) {
                // Slow path.
                return urlDecode0(s, decodePlusToSpace);
            }
        }
        return s;
    }

    private static String urlDecode0(final String s, final boolean decodePlusToSpace) {
        final StringBuilder builder = new StringBuilder(s.length());
        final int size = s.length();
        final byte[] buffer = new byte[size / 3];
        for (int i = 0; i < size;) {
            final char c = s.charAt(i);
            if (decodePlusToSpace && c == '+') {
                builder.append(' ');
                i++;
            } else if (!isUrlEscapeChar(c)) {
                builder.append(c);
                i++;
            } else {
                int bufferPos = 0;
                for (; i < size && isUrlEscapeChar(s.charAt(i)); i += 3) {
                    if ((i + 2) >= size) {
                        throw new IllegalArgumentException(
                                "Path contains an incomplete percent encoding");
                    }
                    final String hexPair = s.substring(i + 1, i + 3);
                    try {
                        final int octet = Integer.parseInt(hexPair, 16);
                        if (octet < 0) {
                            throw new IllegalArgumentException(
                                    "Path contains an invalid percent encoding '" + hexPair + "'");
                        }
                        buffer[bufferPos++] = (byte) octet;
                    } catch (NumberFormatException e) {
                        throw new IllegalArgumentException(
                                "Path contains an invalid percent encoding '" + hexPair + "'");
                    }
                }
                builder.append(new String(buffer, 0, bufferPos, StandardCharsets.UTF_8));
            }
        }
        return builder.toString();
    }

    private static String urlEncode(final String s, final BitSet safeChars) {
        if (s == null) {
            return null;
        }
        // First try fast-path encode of simple ASCII.
        final int size = s.length();
        for (int i = 0; i < size; i++) {
            final int c = s.charAt(i);
            if (!safeChars.get(c)) {
                // Slow path.
                return urlEncode0(s, safeChars);
            }
        }
        return s;
    }

    private static String urlEncode0(String s, final BitSet safeChars) {
        final byte[] utf8 = s.getBytes(StandardCharsets.UTF_8);
        final int size = utf8.length;
        final StringBuilder builder = new StringBuilder(size + 16);
        for (final byte b : utf8) {
            final int octet = b & 0xff;
            if (safeChars.get(octet)) {
                builder.append((char) octet);
            } else {
                builder.append(URL_ESCAPE_CHAR);
                builder.append(BYTE_TO_HEX[octet]);
            }
        }
        return builder.toString();
    }

    private static boolean isUrlEscapeChar(final char c) {
        return c == URL_ESCAPE_CHAR;
    }
}