ElasticsearchUtil.java

/*
 * The contents of this file are subject to the terms of the Common Development and
 * Distribution License (the License). You may not use this file except in compliance with the
 * License.
 *
 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
 * specific language governing permission and limitations under the License.
 *
 * When distributing Covered Software, include this CDDL Header Notice in each file and include
 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
 * Header, with the fields enclosed by brackets [] replaced by your own identifying
 * information: "Portions copyright [year] [name of copyright owner]".
 *
 * Copyright 2016 ForgeRock AS.
 */

package org.forgerock.audit.util;

import static org.forgerock.http.util.Json.readJson;
import static org.forgerock.json.JsonValue.json;

import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.forgerock.json.JsonPointer;
import org.forgerock.json.JsonValue;
import org.forgerock.util.annotations.VisibleForTesting;

import com.fasterxml.jackson.databind.ObjectMapper;

/**
 * Utilities for working with Elasticsearch.
 */
public final class ElasticsearchUtil {

    /**
     * Jackson {@link ObjectMapper} for working with JSON.
     */
    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    /**
     * JSON field-name of metadata to assist in de-normalization.
     */
    @VisibleForTesting
    protected static final String NORMALIZED_FIELD = "_normalized";

    /**
     * JSON field-name of normalized field-names.
     */
    @VisibleForTesting
    protected static final String FIELD_NAMES_FIELD = "fieldNames";

    private static final String NORMALIZED_FIELD_JSON_PREFIX = ",\"" + NORMALIZED_FIELD + "\":";

    /**
     * Number of normalization metadata fields that this class might add to the {@link #NORMALIZED_FIELD JSON object.
     */
    private static final int MAX_FIELD_COUNT = 1;

    private ElasticsearchUtil() {
        // hidden
    }

    /**
     * Regex pattern that matches JSON keys that contain at least one period.  This is useful, because Elasticsearch
     * does not allow period characters in JSON keys
     * [<a href="https://discuss.elastic.co/t/field-name-cannot-contain/33251/29">ref</a>].
     * <p/>
     * The following regex matches anything after an open-curly-brace, or comma, that is within double-quotes and
     * followed by a semi-colon. Furthermore, positive-lookahead requires at least one period-character within the
     * double-quotes.
     * <pre>
     *     [{,]\s*"((?=[^"]*[.][^"]*)[^"]+)"\s*:
     * </pre>
     */
    private final static Pattern JSON_KEY_WITH_PERIOD_CHAR_PATTERN =
            Pattern.compile("[{,]\\s*\"((?=[^\"]*[.][^\"]*)[^\"]+)\"\\s*:");

    /**
     * Regex pattern identical to {@link #JSON_KEY_WITH_PERIOD_CHAR_PATTERN}, except it searches for JSON keys that
     * contain underscore characters. This is used to facilitate reversing normalization.
     */
    private final static Pattern JSON_KEY_WITH_UNDERSCORE_CHAR_PATTERN =
            Pattern.compile("[{,]\\s*\"((?=[^\"]*[_][^\"]*)[^\"]+)\"\\s*:");

    /**
     * Regex pattern to find period characters.
     */
    private final static Pattern PERIOD_CHAR_PATTERN = Pattern.compile("[.]");

    /**
     * Normalizes JSON to conform to Elasticsearch data-format restrictions. The following normalizations are performed,
     * <ul>
     * <li>Periods in JSON fields (keys) are converted to underscore characters</li>
     * </ul>
     * The following metadata, for example, is added to the Normalized JSON, to facilitate de-normalization,
     * <pre>
     *     "_normalized" : {
     *         "fieldNames" : {
     *             "key_1" : "key.1",
     *             "key_2" : "key.2"
     *         }
     *     }
     * </pre>
     *
     * @param value JSON value
     * @return Resulting JSON, with {@code _normalized} field if any normalization was necessary
     * @throws IOException If unable to parse the json.
     */
    public static String normalizeJson(final JsonValue value) throws IOException {
        if (value != null) {
            if (value.get(NORMALIZED_FIELD).isNotNull()) {
                throw new IllegalStateException(NORMALIZED_FIELD + " is a reserved JsonValue field");
            }
            final String json = OBJECT_MAPPER.writeValueAsString(value.getObject());
            return replaceKeyPeriodsWithUnderscores(json);
        }
        return null;
    }

    /**
     * De-normalizes JSON that was previously normalized by  {@link #normalizeJson(JsonValue)}.
     *
     * @param value JSON value
     * @return Original, de-normalized JSON
     * @throws IOException If unable to parse the json.
     */
    public static JsonValue denormalizeJson(final JsonValue value) throws IOException {
        if (value != null) {
            final JsonValue normalized = value.get(NORMALIZED_FIELD);
            if (normalized.isNotNull()) {
                value.remove(NORMALIZED_FIELD);
            } else {
                // nothing needs to be de-normalized, because there is no de-normalization metadata
                return value;
            }
            return restoreKeyPeriods(value, normalized);
        }
        return null;
    }

    /**
     * Replaces all period-characters in JSON keys with underscore-characters
     * [<a href="https://discuss.elastic.co/t/field-name-cannot-contain/33251/29">ref</a>]. If normalization is
     * required, the {@code fieldNames} field will be added to the {@code normalized} metadata.
     *
     * @param json JSON {@code String} input
     * @return Resulting JSON {@code String}
     * @throws IOException If unable to parse the json.
     */
    @VisibleForTesting
    protected static String replaceKeyPeriodsWithUnderscores(final String json)
            throws IOException {
        final Matcher m = JSON_KEY_WITH_PERIOD_CHAR_PATTERN.matcher(json);
        if (m.find()) {
            // fieldNames contains metadata for de-normalization
            final Map<String, Object> normalized = new LinkedHashMap<>(MAX_FIELD_COUNT);
            final Map<String, Object> fieldNames = new LinkedHashMap<>(2);
            normalized.put(FIELD_NAMES_FIELD, fieldNames);

            final int n = json.length();

            // allocate enough capacity to prevent resizing
            final StringBuilder builder = new StringBuilder(n + NORMALIZED_FIELD_JSON_PREFIX.length() + 128);
            builder.append(json);

            String originalFieldName = m.group(1);
            String normalizedFieldName = replaceAllPeriodsWithUnderscores(builder, m.start(1), m.end(1));
            fieldNames.put(normalizedFieldName, originalFieldName);

            int index = m.end();
            while (index != n && m.find(index)) {
                originalFieldName = m.group(1);
                normalizedFieldName = replaceAllPeriodsWithUnderscores(builder, m.start(1), m.end(1));
                fieldNames.put(normalizedFieldName, originalFieldName);

                index = m.end();
            }

            // remove last curly-brace, so that we can append
            builder.setLength(n - 1);

            // add JSON metadata to end
            builder.append(NORMALIZED_FIELD_JSON_PREFIX)
                    .append(OBJECT_MAPPER.writeValueAsString(normalized))
                    .append('}');

            return builder.toString();
        }
        // no normalization required
        return json;
    }

    /**
     * Reverses the normalization steps preformed by {@link #replaceKeyPeriodsWithUnderscores(String)}.
     *
     * @param value JSON input
     * @param normalized De-normalization metadata, which this method may add to
     * @return Resulting JSON
     * @throws IOException If unable to parse the json.
     */
    @VisibleForTesting
    protected static JsonValue restoreKeyPeriods(final JsonValue value, final JsonValue normalized) throws IOException {
        final JsonValue fieldNames = normalized.get(FIELD_NAMES_FIELD);
        if (fieldNames.isNotNull() && !fieldNames.asMap().isEmpty()) {
            final String s = OBJECT_MAPPER.writeValueAsString(value.getObject());
            final Matcher m = JSON_KEY_WITH_UNDERSCORE_CHAR_PATTERN.matcher(s);
            if (m.find()) {
                final int n = s.length();
                final StringBuilder builder = new StringBuilder(n);
                if (m.start(1) != 0) {
                    builder.append(s.substring(0, m.start(1)));
                }
                builder.append(replace(m.group(1), fieldNames));
                int index = m.end(1);
                while (index != n && m.find(index)) {
                    if (index != m.start(1)) {
                        builder.append(s.substring(index, m.start(1)));
                    }
                    builder.append(replace(m.group(1), fieldNames));
                    index = m.end(1);
                }
                if (index != n) {
                    builder.append(s.substring(index));
                }
                return json(readJson(builder.toString()));
            }
        }
        // no normalization required
        return value;
    }

    /**
     * Replaces all period-characters with underscore-characters.
     *
     * @param s Input
     * @param start Start index (inclusive)
     * @param end End index (exclusive)
     * @return Result
     */
    private static String replaceAllPeriodsWithUnderscores(final StringBuilder s, final int start, final int end) {
        for (int i = start; i < end; ++i) {
            if (s.charAt(i) == '.') {
                s.setCharAt(i, '_');
            }
        }
        return s.substring(start, end);
    }

    /**
     * Finds replacement for a given key ({@literal fieldName}) within {@literal fieldNames} map. If a key-value mapping
     * is not found, then the original input is returned.
     *
     * @param fieldName Key
     * @param fieldNames Map of key-values ({@literal String})
     * @return Result
     */
    private static String replace(final String fieldName, final JsonValue fieldNames) {
        final JsonValue value = fieldNames.get(fieldName);
        return value.isNull() ? fieldName : value.asString();
    }

    /**
     * Replaces periods in {@link JsonPointer} keys with underscore.
     *
     * @param ptr The {@link JsonPointer} to normalize.
     * @return A normalized {@link JsonPointer}.
     */
    public static JsonPointer normalizeJsonPointer(final JsonPointer ptr) {
        if (ptr != null) {
            final String jsonPointer = ptr.toString();
            final Matcher matcher = PERIOD_CHAR_PATTERN.matcher(jsonPointer);
            if (matcher.find()) {
                return new JsonPointer(matcher.replaceAll("_"));
            }
        }
        return ptr;
    }

    /**
     * Renames a field within the given {@link JsonValue}.
     *
     * @param jsonValue {@link JsonValue} to have a top-level field renamed
     * @param oldKey Old field name
     * @param newKey New field name (field must <b>not</b> already exist)
     * @return {@code true} if field was found and renamed, and {@code false} otherwise
     */
    public static boolean renameField(final JsonValue jsonValue, final String oldKey, final String newKey) {
        if (jsonValue.isMap()) {
            final Map<String, Object> map = jsonValue.asMap();
            final Object value = map.remove(oldKey);
            if (value != null) {
                if (map.put(newKey, value) != null) {
                    // newKey already existed, so reverse the change and throw Exception
                    renameField(jsonValue, newKey, oldKey);
                    throw new IllegalStateException("Cannot overwrite existing field: " + newKey);
                }
                return true;
            }
        }
        return false;
    }
}