-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Bulk Load CDK: Identity AirbyteValue Nulling/Capturing Mapper (#47197)
- Loading branch information
1 parent
017d7cc
commit 15ff3ee
Showing
10 changed files
with
308 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
116 changes: 116 additions & 0 deletions
116
...cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/data/AirbyteValueIdentityMapper.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* | ||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.load.data | ||
|
||
import io.airbyte.cdk.load.message.DestinationRecord | ||
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Change | ||
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason | ||
|
||
open class AirbyteValueIdentityMapper( | ||
open val meta: DestinationRecord.Meta, | ||
) { | ||
private fun collectFailure( | ||
path: List<String>, | ||
reason: Reason = Reason.DESTINATION_SERIALIZATION_ERROR | ||
) { | ||
meta.changes.add(DestinationRecord.Change(path.joinToString("."), Change.NULLED, reason)) | ||
} | ||
|
||
fun map( | ||
value: AirbyteValue, | ||
schema: AirbyteType, | ||
path: List<String> = emptyList() | ||
): AirbyteValue = | ||
try { | ||
when (schema) { | ||
is ObjectType -> mapObject(value as ObjectValue, schema, path) | ||
is ObjectTypeWithoutSchema -> | ||
mapObjectWithoutSchema(value as ObjectValue, schema, path) | ||
is ObjectTypeWithEmptySchema -> | ||
mapObjectWithEmptySchema(value as ObjectValue, schema, path) | ||
is ArrayType -> mapArray(value as ArrayValue, schema, path) | ||
is ArrayTypeWithoutSchema -> | ||
mapArrayWithoutSchema(value as ArrayValue, schema, path) | ||
is UnionType -> mapUnion(value, schema, path) | ||
is BooleanType -> mapBoolean(value as BooleanValue, path) | ||
is NumberType -> mapNumber(value as NumberValue, path) | ||
is StringType -> mapString(value as StringValue, path) | ||
is IntegerType -> mapInteger(value as IntegerValue, path) | ||
is DateType -> mapDate(value as DateValue, path) | ||
is TimeTypeWithTimezone -> mapTimeWithTimezone(value as TimeValue, path) | ||
is TimeTypeWithoutTimezone -> mapTimeWithoutTimezone(value as TimeValue, path) | ||
is TimestampTypeWithTimezone -> | ||
mapTimestampWithTimezone(value as TimestampValue, path) | ||
is TimestampTypeWithoutTimezone -> | ||
mapTimestampWithoutTimezone(value as TimestampValue, path) | ||
is NullType -> mapNull(path) | ||
is UnknownType -> mapUnknown(value as UnknownValue, path) | ||
} | ||
} catch (e: Exception) { | ||
collectFailure(path) | ||
mapNull(path) | ||
} | ||
|
||
open fun mapObject(value: ObjectValue, schema: ObjectType, path: List<String>): AirbyteValue { | ||
val values = LinkedHashMap<String, AirbyteValue>() | ||
schema.properties.forEach { (name, field) -> | ||
values[name] = map(value.values[name] ?: NullValue, field.type, path + name) | ||
} | ||
return ObjectValue(values) | ||
} | ||
|
||
open fun mapObjectWithoutSchema( | ||
value: ObjectValue, | ||
schema: ObjectTypeWithoutSchema, | ||
path: List<String> | ||
): AirbyteValue = value | ||
|
||
open fun mapObjectWithEmptySchema( | ||
value: ObjectValue, | ||
schema: ObjectTypeWithEmptySchema, | ||
path: List<String> | ||
): AirbyteValue = value | ||
|
||
open fun mapArray(value: ArrayValue, schema: ArrayType, path: List<String>): AirbyteValue { | ||
return ArrayValue( | ||
value.values.mapIndexed { index, element -> | ||
map(element, schema.items.type, path + "[$index]") | ||
} | ||
) | ||
} | ||
|
||
open fun mapArrayWithoutSchema( | ||
value: ArrayValue, | ||
schema: ArrayTypeWithoutSchema, | ||
path: List<String> | ||
): AirbyteValue = value | ||
|
||
open fun mapUnion(value: AirbyteValue, schema: UnionType, path: List<String>): AirbyteValue = | ||
value | ||
|
||
open fun mapBoolean(value: BooleanValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapNumber(value: NumberValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapString(value: StringValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapInteger(value: IntegerValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapDate(value: DateValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapTimeWithTimezone(value: TimeValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapTimeWithoutTimezone(value: TimeValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapTimestampWithTimezone(value: TimestampValue, path: List<String>): AirbyteValue = | ||
value | ||
|
||
open fun mapTimestampWithoutTimezone(value: TimestampValue, path: List<String>): AirbyteValue = | ||
value | ||
|
||
open fun mapNull(path: List<String>): AirbyteValue = NullValue | ||
|
||
open fun mapUnknown(value: UnknownValue, path: List<String>): AirbyteValue = value | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
...bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/data/AirbyteValueIdentityMapperTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.load.data | ||
|
||
import io.airbyte.cdk.load.message.DestinationRecord | ||
import io.airbyte.cdk.load.test.util.ValueTestBuilder | ||
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange | ||
import org.junit.jupiter.api.Assertions | ||
import org.junit.jupiter.api.Test | ||
|
||
class AirbyteValueIdentityMapperTest { | ||
@Test | ||
fun testIdentityMapping() { | ||
val (inputValues, inputSchema, expectedValues) = | ||
ValueTestBuilder() | ||
.with(StringValue("a"), StringType) | ||
.with(IntegerValue(1), IntegerType) | ||
.with(BooleanValue(true), BooleanType) | ||
.with(TimestampValue("2021-01-01T12:00:00Z"), TimestampTypeWithTimezone) | ||
.with(TimestampValue("2021-01-01T12:00:00"), TimestampTypeWithoutTimezone) | ||
.with(TimeValue("12:00:00Z"), TimeTypeWithTimezone) | ||
.with(TimeValue("12:00:00"), TimeTypeWithoutTimezone) | ||
.with(DateValue("2021-01-01"), DateType) | ||
.withRecord() | ||
.with( | ||
ArrayValue(listOf("a", "b", "c").map(::StringValue)), | ||
ArrayType(FieldType(StringType, false)) | ||
) | ||
.with( | ||
ArrayValue(listOf(IntegerValue(1), BooleanValue(true))), | ||
ArrayTypeWithoutSchema | ||
) | ||
.withRecord() | ||
.with(NullValue, NullType) | ||
.endRecord() | ||
.endRecord() | ||
.build() | ||
|
||
val meta = DestinationRecord.Meta() | ||
val values = AirbyteValueIdentityMapper(meta).map(inputValues, inputSchema) | ||
Assertions.assertEquals(expectedValues, values) | ||
Assertions.assertTrue(meta.changes.isEmpty()) | ||
} | ||
|
||
@Test | ||
fun testIdentityMappingWithBadSchema() { | ||
val (inputValues, inputSchema, _) = | ||
ValueTestBuilder() | ||
.with(StringValue("a"), StringType) | ||
.with( | ||
TimestampValue("2021-01-01T12:00:00Z"), | ||
TimeTypeWithTimezone, | ||
nameOverride = "bad" | ||
) | ||
.build() | ||
val meta = DestinationRecord.Meta() | ||
val values = AirbyteValueIdentityMapper(meta).map(inputValues, inputSchema) as ObjectValue | ||
Assertions.assertTrue(meta.changes.isNotEmpty()) | ||
Assertions.assertTrue(values.values["bad"] is NullValue) | ||
Assertions.assertTrue(meta.changes[0].field == "bad") | ||
Assertions.assertTrue( | ||
meta.changes[0].change == AirbyteRecordMessageMetaChange.Change.NULLED | ||
) | ||
Assertions.assertTrue( | ||
meta.changes[0].reason == | ||
AirbyteRecordMessageMetaChange.Reason.DESTINATION_SERIALIZATION_ERROR | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/test/util/ValueTestBuilder.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/* | ||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.load.test.util | ||
|
||
import io.airbyte.cdk.load.data.AirbyteType | ||
import io.airbyte.cdk.load.data.AirbyteValue | ||
import io.airbyte.cdk.load.data.ObjectType | ||
import io.airbyte.cdk.load.data.ObjectValue | ||
import java.util.UUID | ||
|
||
data class ValueTestBuilder( | ||
private val inputValues: ObjectValue = ObjectValue(linkedMapOf()), | ||
private val expectedValues: ObjectValue = ObjectValue(linkedMapOf()), | ||
private val schemaTestBuilder: SchemaTestBuilder = SchemaTestBuilder(), | ||
private val parent: ValueTestBuilder? = null | ||
) { | ||
fun with( | ||
inputValue: AirbyteValue, | ||
inputSchema: AirbyteType, | ||
expectedValue: AirbyteValue = inputValue, | ||
nameOverride: String? = null | ||
): ValueTestBuilder { | ||
val name = nameOverride ?: UUID.randomUUID().toString() | ||
inputValues.values[name] = inputValue | ||
expectedValues.values[name] = expectedValue | ||
schemaTestBuilder.with(inputSchema, nameOverride = name) | ||
return this | ||
} | ||
|
||
fun withRecord(): ValueTestBuilder { | ||
val name = UUID.randomUUID().toString() | ||
val inputRecord = ObjectValue(linkedMapOf()) | ||
val outputRecord = ObjectValue(linkedMapOf()) | ||
inputValues.values[name] = inputRecord | ||
expectedValues.values[name] = outputRecord | ||
return ValueTestBuilder( | ||
inputValues = inputRecord, | ||
expectedValues = outputRecord, | ||
schemaTestBuilder = schemaTestBuilder.withRecord(nameOverride = name), | ||
parent = this | ||
) | ||
} | ||
|
||
fun endRecord(): ValueTestBuilder { | ||
if (parent == null) { | ||
throw IllegalStateException("Cannot end record without parent") | ||
} | ||
return parent.copy(schemaTestBuilder = schemaTestBuilder.endRecord()) | ||
} | ||
|
||
fun build(): Triple<ObjectValue, ObjectType, ObjectValue> { | ||
if (parent != null) { | ||
throw IllegalStateException("Cannot build nested schema") | ||
} | ||
return Triple(inputValues, schemaTestBuilder.build().first, expectedValues) | ||
} | ||
} |
Oops, something went wrong.