|
28 | 28 | import java.util.function.Consumer;
|
29 | 29 | import java.util.function.Supplier;
|
30 | 30 |
|
| 31 | +import org.apache.commons.lang3.StringUtils; |
31 | 32 | import org.apache.commons.lang3.tuple.Pair;
|
32 | 33 | import org.junit.Before;
|
33 | 34 | import org.mockito.ArgumentCaptor;
|
@@ -240,31 +241,6 @@ public void testExecute_withListTypeInput_successful() {
|
240 | 241 | verify(handler).accept(any(IngestDocument.class), isNull());
|
241 | 242 | }
|
242 | 243 |
|
243 |
| - public void testExecute_SimpleTypeWithEmptyStringValue_throwIllegalArgumentException() { |
244 |
| - Map<String, Object> sourceAndMetadata = new HashMap<>(); |
245 |
| - sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index"); |
246 |
| - sourceAndMetadata.put("key1", " "); |
247 |
| - IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>()); |
248 |
| - TextEmbeddingProcessor processor = createInstanceWithLevel1MapConfig(); |
249 |
| - |
250 |
| - BiConsumer handler = mock(BiConsumer.class); |
251 |
| - processor.execute(ingestDocument, handler); |
252 |
| - verify(handler).accept(isNull(), any(IllegalArgumentException.class)); |
253 |
| - } |
254 |
| - |
255 |
| - public void testExecute_listHasEmptyStringValue_throwIllegalArgumentException() { |
256 |
| - List<String> list1 = ImmutableList.of("", "test2", "test3"); |
257 |
| - Map<String, Object> sourceAndMetadata = new HashMap<>(); |
258 |
| - sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index"); |
259 |
| - sourceAndMetadata.put("key1", list1); |
260 |
| - IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>()); |
261 |
| - TextEmbeddingProcessor processor = createInstanceWithLevel1MapConfig(); |
262 |
| - |
263 |
| - BiConsumer handler = mock(BiConsumer.class); |
264 |
| - processor.execute(ingestDocument, handler); |
265 |
| - verify(handler).accept(isNull(), any(IllegalArgumentException.class)); |
266 |
| - } |
267 |
| - |
268 | 244 | public void testExecute_listHasNonStringValue_throwIllegalArgumentException() {
|
269 | 245 | List<Integer> list2 = ImmutableList.of(1, 2, 3);
|
270 | 246 | Map<String, Object> sourceAndMetadata = new HashMap<>();
|
@@ -549,20 +525,6 @@ public void testExecute_mapHasNonStringValue_throwIllegalArgumentException() {
|
549 | 525 | verify(handler).accept(isNull(), any(IllegalArgumentException.class));
|
550 | 526 | }
|
551 | 527 |
|
552 |
| - public void testExecute_mapHasEmptyStringValue_throwIllegalArgumentException() { |
553 |
| - Map<String, String> map1 = ImmutableMap.of("test1", "test2"); |
554 |
| - Map<String, String> map2 = ImmutableMap.of("test3", " "); |
555 |
| - Map<String, Object> sourceAndMetadata = new HashMap<>(); |
556 |
| - sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index"); |
557 |
| - sourceAndMetadata.put("key1", map1); |
558 |
| - sourceAndMetadata.put("key2", map2); |
559 |
| - IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>()); |
560 |
| - TextEmbeddingProcessor processor = createInstanceWithLevel2MapConfig(); |
561 |
| - BiConsumer handler = mock(BiConsumer.class); |
562 |
| - processor.execute(ingestDocument, handler); |
563 |
| - verify(handler).accept(isNull(), any(IllegalArgumentException.class)); |
564 |
| - } |
565 |
| - |
566 | 528 | public void testExecute_mapDepthReachLimit_throwIllegalArgumentException() {
|
567 | 529 | Map<String, Object> ret = createMaxDepthLimitExceedMap(() -> 1);
|
568 | 530 | Map<String, Object> sourceAndMetadata = new HashMap<>();
|
@@ -785,6 +747,79 @@ public void testBuildVectorOutput_withNestedListHasNotForEmbeddingField_Level2_s
|
785 | 747 | assertNotNull(nestedObj.get(1).get("vectorField"));
|
786 | 748 | }
|
787 | 749 |
|
| 750 | + @SuppressWarnings("unchecked") |
| 751 | + public void testBuildVectorOutput_withPlainString_EmptyString_skipped() { |
| 752 | + Map<String, Object> config = createPlainStringConfiguration(); |
| 753 | + IngestDocument ingestDocument = createPlainIngestDocument(); |
| 754 | + Map<String, Object> sourceAndMetadata = ingestDocument.getSourceAndMetadata(); |
| 755 | + sourceAndMetadata.put("oriKey1", StringUtils.EMPTY); |
| 756 | + |
| 757 | + TextEmbeddingProcessor processor = createInstanceWithNestedMapConfiguration(config); |
| 758 | + Map<String, Object> knnMap = processor.buildMapWithTargetKeys(ingestDocument); |
| 759 | + List<List<Float>> modelTensorList = createRandomOneDimensionalMockVector(6, 100, 0.0f, 1.0f); |
| 760 | + processor.setVectorFieldsToDocument(ingestDocument, knnMap, modelTensorList); |
| 761 | + |
| 762 | + /** IngestDocument |
| 763 | + * "oriKey1": "", |
| 764 | + * "oriKey2": "oriValue2", |
| 765 | + * "oriKey3": "oriValue3", |
| 766 | + * "oriKey4": "oriValue4", |
| 767 | + * "oriKey5": "oriValue5", |
| 768 | + * "oriKey6": [ |
| 769 | + * "oriValue6", |
| 770 | + * "oriValue7" |
| 771 | + * ] |
| 772 | + * |
| 773 | + */ |
| 774 | + assertEquals(11, sourceAndMetadata.size()); |
| 775 | + assertFalse(sourceAndMetadata.containsKey("oriKey1_knn")); |
| 776 | + } |
| 777 | + |
| 778 | + @SuppressWarnings("unchecked") |
| 779 | + public void testBuildVectorOutput_withNestedField_EmptyString_skipped() { |
| 780 | + Map<String, Object> config = createNestedMapConfiguration(); |
| 781 | + IngestDocument ingestDocument = createNestedMapIngestDocument(); |
| 782 | + Map<String, Object> favorites = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("favorites"); |
| 783 | + Map<String, Object> favorite = (Map<String, Object>) favorites.get("favorite"); |
| 784 | + favorite.put("movie", StringUtils.EMPTY); |
| 785 | + |
| 786 | + TextEmbeddingProcessor processor = createInstanceWithNestedMapConfiguration(config); |
| 787 | + Map<String, Object> knnMap = processor.buildMapWithTargetKeys(ingestDocument); |
| 788 | + List<List<Float>> modelTensorList = createRandomOneDimensionalMockVector(1, 100, 0.0f, 1.0f); |
| 789 | + processor.buildNLPResult(knnMap, modelTensorList, ingestDocument.getSourceAndMetadata()); |
| 790 | + |
| 791 | + /** |
| 792 | + * "favorites": { |
| 793 | + * "favorite": { |
| 794 | + * "movie": "", |
| 795 | + * "actor": "Charlie Chaplin", |
| 796 | + * "games" : { |
| 797 | + * "adventure": { |
| 798 | + * "action": "overwatch", |
| 799 | + * "rpg": "elden ring" |
| 800 | + * } |
| 801 | + * } |
| 802 | + * } |
| 803 | + * } |
| 804 | + */ |
| 805 | + Map<String, Object> favoritesMap = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("favorites"); |
| 806 | + assertNotNull(favoritesMap); |
| 807 | + Map<String, Object> favoriteMap = (Map<String, Object>) favoritesMap.get("favorite"); |
| 808 | + assertNotNull(favoriteMap); |
| 809 | + |
| 810 | + Map<String, Object> favoriteGames = (Map<String, Object>) favoriteMap.get("games"); |
| 811 | + assertNotNull(favoriteGames); |
| 812 | + Map<String, Object> adventure = (Map<String, Object>) favoriteGames.get("adventure"); |
| 813 | + List<Float> adventureKnnVector = (List<Float>) adventure.get("with_action_knn"); |
| 814 | + assertNotNull(adventureKnnVector); |
| 815 | + assertEquals(100, adventureKnnVector.size()); |
| 816 | + for (float vector : adventureKnnVector) { |
| 817 | + assertTrue(vector >= 0.0f && vector <= 1.0f); |
| 818 | + } |
| 819 | + |
| 820 | + assertFalse(favoriteMap.containsKey("favorite_movie_knn")); |
| 821 | + } |
| 822 | + |
788 | 823 | public void test_updateDocument_appendVectorFieldsToDocument_successful() {
|
789 | 824 | Map<String, Object> config = createPlainStringConfiguration();
|
790 | 825 | IngestDocument ingestDocument = createPlainIngestDocument();
|
|
0 commit comments