Skip to content

Commit

Permalink
[Enhancement] Add schema validation and placeholders to index mappings (
Browse files Browse the repository at this point in the history
#3240)

* feat(index mappings): fetch mappings and version from json file instead of string constants

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: changing exception being thrown

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* chore: remove unused file

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* chore: fix typo in comment

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* chore: adding new line at the end of files

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* feat: add test cases

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: remove test code

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix(test): in main the versions were not updated appropriately

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: move mapping templates under common module

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: ensure that conversationindexconstants reference mlindex enums rather than use their own mappings

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: update comment

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* feat: add enhancements to validate index schema and allow using placeholders

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: modifying comment

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* test: adding testcase for MLIndex to catch failures before runtime

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: rename dir from mappings to index-mappings

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: add null checks

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: modify mappin paths for placeholders

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: adding dependencies for testing

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix(test): compare json object rather than strings to avoid eol character issue

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: combine if statements into single check

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactoring: null handling + clean code

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* spotless apply

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* tests: adding more UT

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fix: dependencies to handle jarhell

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* spotless apply

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: add header and use single instance of mapper

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* fixed: doc syntax

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: renamed files, efficient loading of resources, better exception handling

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

* refactor: cleaner comment

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>

---------

Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com>
  • Loading branch information
pyek-bot authored Dec 31, 2024
1 parent b631d89 commit 374cfd5
Show file tree
Hide file tree
Showing 23 changed files with 319 additions and 129 deletions.
3 changes: 3 additions & 0 deletions common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ dependencies {
exclude group: 'com.google.guava', module: 'listenablefuture'
}
compileOnly 'com.jayway.jsonpath:json-path:2.9.0'
compileOnly("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}")
compileOnly("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}")
compileOnly group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0'
}

lombok {
Expand Down
18 changes: 9 additions & 9 deletions common/src/main/java/org/opensearch/ml/common/CommonValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ public class CommonValue {
public static final Set<String> stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words");

// Index mapping paths
public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json";
public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json";
public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json";
public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json";
public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json";
public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json";
public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json";
public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json";
public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json";
public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml_model_group.json";
public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml_model.json";
public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml_task.json";
public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml_connector.json";
public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml_config.json";
public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml_controller.json";
public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml_agent.json";
public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml_memory_meta.json";
public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml_memory_message.json";

// Calculate Versions independently of OpenSearch core version
public static final Version VERSION_2_11_0 = Version.fromString("2.11.0");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@

package org.opensearch.ml.common.utils;

import static org.opensearch.ml.common.utils.StringUtils.validateSchema;

import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import com.google.common.base.Charsets;
Expand Down Expand Up @@ -40,20 +43,99 @@ public class IndexUtils {
public static final Map<String, Object> UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1");
public static final Map<String, Object> UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all");

// Schema that validates system index mappings
public static final String MAPPING_SCHEMA_PATH = "index-mappings/schema.json";

// Placeholders to use within the json mapping files
private static final String USER_PLACEHOLDER = "USER_MAPPING_PLACEHOLDER";
private static final String CONNECTOR_PLACEHOLDER = "CONNECTOR_MAPPING_PLACEHOLDER";
public static final Map<String, String> MAPPING_PLACEHOLDERS = Map
.of(USER_PLACEHOLDER, "index-mappings/placeholders/user.json", CONNECTOR_PLACEHOLDER, "index-mappings/placeholders/connector.json");

public static String getMappingFromFile(String path) throws IOException {
URL url = IndexUtils.class.getClassLoader().getResource(path);
if (url == null) {
throw new IOException("Resource not found: " + path);
}

String mapping = Resources.toString(url, Charsets.UTF_8).trim();
if (mapping.isEmpty() || !StringUtils.isJson(mapping)) {
throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path);
if (mapping.isEmpty()) {
throw new IllegalArgumentException("Empty mapping found at: " + path);
}

mapping = replacePlaceholders(mapping);
validateMapping(mapping);

return mapping;
}

public static String replacePlaceholders(String mapping) throws IOException {
if (mapping == null || mapping.isBlank()) {
throw new IllegalArgumentException("Mapping cannot be null or empty");
}

// Preload resources into memory to avoid redundant I/O
Map<String, String> loadedPlaceholders = new HashMap<>();
for (Map.Entry<String, String> placeholder : MAPPING_PLACEHOLDERS.entrySet()) {
URL url = IndexUtils.class.getClassLoader().getResource(placeholder.getValue());
if (url == null) {
throw new IOException("Resource not found: " + placeholder.getValue());
}

loadedPlaceholders.put(placeholder.getKey(), Resources.toString(url, Charsets.UTF_8));
}

StringBuilder result = new StringBuilder(mapping);
for (Map.Entry<String, String> entry : loadedPlaceholders.entrySet()) {
String placeholder = entry.getKey();
String replacement = entry.getValue();

// Replace all occurrences of the placeholder
int index;
while ((index = result.indexOf(placeholder)) != -1) {
result.replace(index, index + placeholder.length(), replacement);
}
}

return result.toString();
}

/**
* Checks if the provided mapping is a valid JSON and validates it against a schema.
*
* <p>The schema is located at <code>mappings/schema.json</code> and enforces the following validations:</p>
*
* <ul>
* <li>Mandatory fields:
* <ul>
* <li><code>_meta</code></li>
* <li><code>_meta.schema_version</code></li>
* <li><code>properties</code></li>
* </ul>
* </li>
* <li>No additional fields are allowed at the root level.</li>
* <li>No additional fields are allowed in the <code>_meta</code> object.</li>
* <li><code>properties</code> must be an object type.</li>
* <li><code>_meta</code> must be an object type.</li>
* <li><code>_meta.schema_version</code> must be an integer.</li>
* </ul>
*
* <p><strong>Note:</strong> Validation can be made stricter if a specific schema is defined for each index.</p>
*/
public static void validateMapping(String mapping) throws IOException {
if (mapping.isBlank() || !StringUtils.isJson(mapping)) {
throw new IllegalArgumentException("Invalid or non-JSON mapping found: " + mapping);
}

URL url = IndexUtils.class.getClassLoader().getResource(MAPPING_SCHEMA_PATH);
if (url == null) {
throw new IOException("Resource not found: " + MAPPING_SCHEMA_PATH);
}

String schema = Resources.toString(url, Charsets.UTF_8);
validateSchema(schema, mapping);
}

public static Integer getVersionFromMapping(String mapping) {
if (mapping == null || mapping.isBlank()) {
throw new IllegalArgumentException("Mapping cannot be null or empty");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,27 @@
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.commons.lang3.BooleanUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.opensearch.OpenSearchParseException;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonSyntaxException;
import com.jayway.jsonpath.JsonPath;
import com.networknt.schema.JsonSchema;
import com.networknt.schema.JsonSchemaFactory;
import com.networknt.schema.SpecVersion;
import com.networknt.schema.ValidationMessage;

import lombok.extern.log4j.Log4j2;

Expand All @@ -54,6 +63,8 @@ public class StringUtils {
}
public static final String TO_STRING_FUNCTION_NAME = ".toString()";

private static final ObjectMapper MAPPER = new ObjectMapper();

public static boolean isValidJsonString(String json) {
if (json == null || json.isBlank()) {
return false;
Expand Down Expand Up @@ -336,4 +347,28 @@ public static JsonObject getJsonObjectFromString(String jsonString) {
return JsonParser.parseString(jsonString).getAsJsonObject();
}

public static void validateSchema(String schemaString, String instanceString) {
try {
// parse the schema JSON as string
JsonNode schemaNode = MAPPER.readTree(schemaString);
JsonSchema schema = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012).getSchema(schemaNode);

// JSON data to validate
JsonNode jsonNode = MAPPER.readTree(instanceString);

// Validate JSON node against the schema
Set<ValidationMessage> errors = schema.validate(jsonNode);
if (!errors.isEmpty()) {
String errorMessage = errors.stream().map(ValidationMessage::getMessage).collect(Collectors.joining(", "));

throw new OpenSearchParseException(
"Validation failed: " + errorMessage + " for instance: " + instanceString + " with schema: " + schemaString
);
}
} catch (JsonProcessingException e) {
throw new IllegalArgumentException("Invalid JSON format: " + e.getMessage(), e);
} catch (Exception e) {
throw new OpenSearchParseException("Schema validation failed: " + e.getMessage(), e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -167,77 +167,7 @@
}
}
},
"connector": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"version": {
"type": "keyword"
},
"description": {
"type": "text"
},
"protocol": {
"type": "keyword"
},
"parameters": {
"type": "flat_object"
},
"credential": {
"type": "flat_object"
},
"client_config": {
"type": "flat_object"
},
"actions": {
"type": "flat_object"
}
}
},
"user": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"backend_roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"custom_attribute_names": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
"connector": CONNECTOR_MAPPING_PLACEHOLDER,
"user": USER_MAPPING_PLACEHOLDER
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,43 +44,6 @@
"remote_job": {
"type": "flat_object"
},
"user": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"backend_roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"roles": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"custom_attribute_names": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
"user": USER_MAPPING_PLACEHOLDER
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"version": {
"type": "keyword"
},
"description": {
"type": "text"
},
"protocol": {
"type": "keyword"
},
"parameters": {
"type": "flat_object"
},
"credential": {
"type": "flat_object"
},
"client_config": {
"type": "flat_object"
},
"actions": {
"type": "flat_object"
}
}
}
Loading

0 comments on commit 374cfd5

Please sign in to comment.