Skip to content

Commit

Permalink
Allow missing semantic text field in bulk updates (elastic#116478) (e…
Browse files Browse the repository at this point in the history
…lastic#116500)

This update enables bulk update operations to succeed even if the semantic text field is absent in the partial update.
For the simple case where the field isn’t referenced by a copy_to operation from another source, the inference can be safely bypassed, allowing the update to proceed without errors.
  • Loading branch information
jimczi authored Nov 8, 2024
1 parent ef4489a commit 1a6e1ba
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 2 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/116478.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116478
summary: Semantic text simple partial update
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ public Set<NodeFeature> getFeatures() {

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX);
return Set.of(
SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX,
SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,8 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu
String field = entry.getName();
String inferenceId = entry.getInferenceId();
var originalFieldValue = XContentMapValues.extractValue(field, docMap);
if (originalFieldValue instanceof Map) {
if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) {
// Inference has already been computed, or there is no inference required.
continue;
}
int order = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2");
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");

public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");

public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -610,3 +610,59 @@ setup:
- exists: _source.dense_field.inference.chunks.0.embeddings
- match: { _source.dense_field.inference.chunks.0.text: "another updated inference test" }
- match: { _source.non_inference_field: "updated non inference test" }

---
"Bypass inference on bulk update operation":
- requires:
cluster_features: semantic_text.single_field_update_fix
reason: Standalone semantic text fields are now optional in a bulk update operation

# Update as upsert
- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "sparse_field": "inference test", "dense_field": "another inference test", "non_inference_field": "non inference test" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "created" }

- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "non_inference_field": "another value" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "updated" }

- do:
get:
index: test-index
id: doc_1

- match: { _source.sparse_field.text: "inference test" }
- exists: _source.sparse_field.inference.chunks.0.embeddings
- match: { _source.sparse_field.inference.chunks.0.text: "inference test" }
- match: { _source.dense_field.text: "another inference test" }
- exists: _source.dense_field.inference.chunks.0.embeddings
- match: { _source.dense_field.inference.chunks.0.text: "another inference test" }
- match: { _source.non_inference_field: "another value" }

- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "sparse_field": null, "dense_field": null, "non_inference_field": "updated value" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "updated" }

- do:
get:
index: test-index
id: doc_1

- match: { _source.sparse_field: null }
- match: { _source.dense_field: null }
- match: { _source.non_inference_field: "updated value" }

0 comments on commit 1a6e1ba

Please sign in to comment.