Skip to content

Commit 90df6c9

Browse files
Fix bug where ingestion failed for input document containing list of nested objects (#1040)
* Fix bug where ingestion failed for input document containing list of nested objects Signed-off-by: Yizhe Liu <yizheliu@amazon.com> * Address comments to use better method name/implementation Signed-off-by: Yizhe Liu <yizheliu@amazon.com> * Address comments: modify the test case to have doc with various fields Signed-off-by: Yizhe Liu <yizheliu@amazon.com> --------- Signed-off-by: Yizhe Liu <yizheliu@amazon.com>
1 parent ee24b1c commit 90df6c9

File tree

3 files changed

+286
-46
lines changed

3 files changed

+286
-46
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2323
- Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
2424
### Bug Fixes
2525
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
26+
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
2627
### Infrastructure
2728
### Documentation
2829
### Maintenance

src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java

+108-14
Original file line numberDiff line numberDiff line change
@@ -434,23 +434,26 @@ private void putNLPResultToSourceMapForMapType(
434434
if (sourceValue instanceof Map) {
435435
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
436436
if (sourceAndMetadataMap.get(processorKey) instanceof List) {
437-
// build nlp output for list of nested objects
438-
Iterator<Object> inputNestedMapValueIt = ((List<Object>) inputNestedMapEntry.getValue()).iterator();
439-
for (Map<String, Object> nestedElement : (List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey)) {
440-
// Only fill in when value is not null
441-
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
442-
nestedElement.put(inputNestedMapEntry.getKey(), results.get(indexWrapper.index++));
443-
}
437+
if (inputNestedMapEntry.getValue() instanceof List) {
438+
processMapEntryValue(
439+
results,
440+
indexWrapper,
441+
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
442+
inputNestedMapEntry.getKey(),
443+
(List<Object>) inputNestedMapEntry.getValue()
444+
);
445+
} else if (inputNestedMapEntry.getValue() instanceof Map) {
446+
processMapEntryValue(
447+
results,
448+
indexWrapper,
449+
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
450+
inputNestedMapEntry.getKey(),
451+
inputNestedMapEntry.getValue()
452+
);
444453
}
445454
} else {
446455
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
447-
Map<String, Object> sourceMap;
448-
if (sourceAndMetadataMap.get(processorKey) == null) {
449-
sourceMap = new HashMap<>();
450-
sourceAndMetadataMap.put(processorKey, sourceMap);
451-
} else {
452-
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
453-
}
456+
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
454457
putNLPResultToSourceMapForMapType(
455458
processedNestedKey.getKey(),
456459
processedNestedKey.getValue(),
@@ -471,6 +474,97 @@ private void putNLPResultToSourceMapForMapType(
471474
}
472475
}
473476

477+
private void processMapEntryValue(
478+
List<?> results,
479+
IndexWrapper indexWrapper,
480+
List<Map<String, Object>> sourceAndMetadataMapValueInList,
481+
String inputNestedMapEntryKey,
482+
List<Object> inputNestedMapEntryValue
483+
) {
484+
// build nlp output for object in sourceValue which is list type
485+
Iterator<Object> inputNestedMapValueIt = inputNestedMapEntryValue.iterator();
486+
for (Map<String, Object> nestedElement : sourceAndMetadataMapValueInList) {
487+
// Only fill in when value is not null
488+
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
489+
nestedElement.put(inputNestedMapEntryKey, results.get(indexWrapper.index++));
490+
}
491+
}
492+
}
493+
494+
private void processMapEntryValue(
495+
List<?> results,
496+
IndexWrapper indexWrapper,
497+
List<Map<String, Object>> sourceAndMetadataMapValueInList,
498+
String inputNestedMapEntryKey,
499+
Object inputNestedMapEntryValue
500+
) {
501+
// build nlp output for object in sourceValue which is map type
502+
Iterator<Map<String, Object>> iterator = sourceAndMetadataMapValueInList.iterator();
503+
IntStream.range(0, sourceAndMetadataMapValueInList.size()).forEach(index -> {
504+
Map<String, Object> nestedElement = iterator.next();
505+
putNLPResultToSingleSourceMapInList(
506+
inputNestedMapEntryKey,
507+
inputNestedMapEntryValue,
508+
results,
509+
indexWrapper,
510+
nestedElement,
511+
index
512+
);
513+
});
514+
}
515+
516+
/**
517+
* Put nlp result to single source element, which is in a list field of source document
518+
* Such source element is in map type
519+
*
520+
* @param processorKey
521+
* @param sourceValue
522+
* @param results
523+
* @param indexWrapper
524+
* @param sourceAndMetadataMap
525+
* @param nestedElementIndex index of the element in the list field of source document
526+
*/
527+
@SuppressWarnings("unchecked")
528+
private void putNLPResultToSingleSourceMapInList(
529+
String processorKey,
530+
Object sourceValue,
531+
List<?> results,
532+
IndexWrapper indexWrapper,
533+
Map<String, Object> sourceAndMetadataMap,
534+
int nestedElementIndex
535+
) {
536+
if (processorKey == null || sourceAndMetadataMap == null || sourceValue == null) return;
537+
if (sourceValue instanceof Map) {
538+
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
539+
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
540+
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
541+
putNLPResultToSingleSourceMapInList(
542+
processedNestedKey.getKey(),
543+
processedNestedKey.getValue(),
544+
results,
545+
indexWrapper,
546+
sourceMap,
547+
nestedElementIndex
548+
);
549+
}
550+
} else {
551+
if (sourceValue instanceof List && ((List<Object>) sourceValue).get(nestedElementIndex) != null) {
552+
sourceAndMetadataMap.merge(processorKey, results.get(indexWrapper.index++), REMAPPING_FUNCTION);
553+
}
554+
}
555+
}
556+
557+
@SuppressWarnings("unchecked")
558+
private Map<String, Object> getSourceMapBySourceAndMetadataMap(String processorKey, Map<String, Object> sourceAndMetadataMap) {
559+
Map<String, Object> sourceMap = new HashMap<>();
560+
if (sourceAndMetadataMap.get(processorKey) == null) {
561+
sourceAndMetadataMap.put(processorKey, sourceMap);
562+
} else {
563+
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
564+
}
565+
return sourceMap;
566+
}
567+
474568
private List<Map<String, Object>> buildNLPResultForListType(List<String> sourceValue, List<?> results, IndexWrapper indexWrapper) {
475569
List<Map<String, Object>> keyToResult = new ArrayList<>();
476570
IntStream.range(0, sourceValue.size())

0 commit comments

Comments
 (0)