Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Fix bug where ingestion failed for input document containing list of nested objects #1053

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
### Bug Fixes
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
### Infrastructure
### Documentation
### Maintenance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,23 +434,26 @@ private void putNLPResultToSourceMapForMapType(
if (sourceValue instanceof Map) {
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
if (sourceAndMetadataMap.get(processorKey) instanceof List) {
// build nlp output for list of nested objects
Iterator<Object> inputNestedMapValueIt = ((List<Object>) inputNestedMapEntry.getValue()).iterator();
for (Map<String, Object> nestedElement : (List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey)) {
// Only fill in when value is not null
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
nestedElement.put(inputNestedMapEntry.getKey(), results.get(indexWrapper.index++));
}
if (inputNestedMapEntry.getValue() instanceof List) {
processMapEntryValue(
results,
indexWrapper,
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
inputNestedMapEntry.getKey(),
(List<Object>) inputNestedMapEntry.getValue()
);
} else if (inputNestedMapEntry.getValue() instanceof Map) {
processMapEntryValue(
results,
indexWrapper,
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
inputNestedMapEntry.getKey(),
inputNestedMapEntry.getValue()
);
}
} else {
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
Map<String, Object> sourceMap;
if (sourceAndMetadataMap.get(processorKey) == null) {
sourceMap = new HashMap<>();
sourceAndMetadataMap.put(processorKey, sourceMap);
} else {
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
}
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
putNLPResultToSourceMapForMapType(
processedNestedKey.getKey(),
processedNestedKey.getValue(),
Expand All @@ -471,6 +474,97 @@ private void putNLPResultToSourceMapForMapType(
}
}

private void processMapEntryValue(
List<?> results,
IndexWrapper indexWrapper,
List<Map<String, Object>> sourceAndMetadataMapValueInList,
String inputNestedMapEntryKey,
List<Object> inputNestedMapEntryValue
) {
// build nlp output for object in sourceValue which is list type
Iterator<Object> inputNestedMapValueIt = inputNestedMapEntryValue.iterator();
for (Map<String, Object> nestedElement : sourceAndMetadataMapValueInList) {
// Only fill in when value is not null
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
nestedElement.put(inputNestedMapEntryKey, results.get(indexWrapper.index++));
}
}
}

private void processMapEntryValue(
List<?> results,
IndexWrapper indexWrapper,
List<Map<String, Object>> sourceAndMetadataMapValueInList,
String inputNestedMapEntryKey,
Object inputNestedMapEntryValue
) {
// build nlp output for object in sourceValue which is map type
Iterator<Map<String, Object>> iterator = sourceAndMetadataMapValueInList.iterator();
IntStream.range(0, sourceAndMetadataMapValueInList.size()).forEach(index -> {
Map<String, Object> nestedElement = iterator.next();
putNLPResultToSingleSourceMapInList(
inputNestedMapEntryKey,
inputNestedMapEntryValue,
results,
indexWrapper,
nestedElement,
index
);
});
}

/**
* Put nlp result to single source element, which is in a list field of source document
* Such source element is in map type
*
* @param processorKey
* @param sourceValue
* @param results
* @param indexWrapper
* @param sourceAndMetadataMap
* @param nestedElementIndex index of the element in the list field of source document
*/
@SuppressWarnings("unchecked")
private void putNLPResultToSingleSourceMapInList(
String processorKey,
Object sourceValue,
List<?> results,
IndexWrapper indexWrapper,
Map<String, Object> sourceAndMetadataMap,
int nestedElementIndex
) {
if (processorKey == null || sourceAndMetadataMap == null || sourceValue == null) return;
if (sourceValue instanceof Map) {
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
putNLPResultToSingleSourceMapInList(
processedNestedKey.getKey(),
processedNestedKey.getValue(),
results,
indexWrapper,
sourceMap,
nestedElementIndex
);
}
} else {
if (sourceValue instanceof List && ((List<Object>) sourceValue).get(nestedElementIndex) != null) {
sourceAndMetadataMap.merge(processorKey, results.get(indexWrapper.index++), REMAPPING_FUNCTION);
}
}
}

@SuppressWarnings("unchecked")
private Map<String, Object> getSourceMapBySourceAndMetadataMap(String processorKey, Map<String, Object> sourceAndMetadataMap) {
Map<String, Object> sourceMap = new HashMap<>();
if (sourceAndMetadataMap.get(processorKey) == null) {
sourceAndMetadataMap.put(processorKey, sourceMap);
} else {
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
}
return sourceMap;
}

private List<Map<String, Object>> buildNLPResultForListType(List<String> sourceValue, List<?> results, IndexWrapper indexWrapper) {
List<Map<String, Object>> keyToResult = new ArrayList<>();
IntStream.range(0, sourceValue.size())
Expand Down
Loading
Loading