Skip to content

Commit

Permalink
S3 1.0 Breaking Change Documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
johnny-schmidt committed Aug 14, 2024
1 parent 5dd71eb commit 151e458
Show file tree
Hide file tree
Showing 27 changed files with 793 additions and 422 deletions.
575 changes: 288 additions & 287 deletions airbyte-cdk/java/airbyte-cdk/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ constructor(
* This probably doesn't belong here, but it's the easiest place where both [BufferEnqueue] and
* [io.airbyte.cdk.integrations.destination.async.AsyncStreamConsumer] can both get to it.
*/
public val defaultNamespace: String?,
val defaultNamespace: String?,
maxMemory: Long = (Runtime.getRuntime().maxMemory() * MEMORY_LIMIT_RATIO).toLong(),
) {
@get:VisibleForTesting val buffers: ConcurrentMap<StreamDescriptor, StreamAwareQueue>
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version=0.44.9
version=0.44.11
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":"string1","union_type":10,"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[25,null,"[\"goodbye\",\"cruel world\"]"],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "empty_object": "{}", "object_with_null_properties": "{}"}
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":20,"union_type":"string2","schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}"}
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting":null, "empty_object": null, "object_with_null_properties": null }
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":"string1","union_type":10,"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[25,null,"[\"goodbye\",\"cruel world\"]"],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "empty_object": "{}", "object_with_null_properties": "{}", "combined_with_null": "foobar", "union_with_null": "barfoo", "combined_nulls": null}
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":20,"union_type":"string2","schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}", "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null}
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting":null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null }
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,22 @@
},
"object_with_null_properties": {
"type": "string"
},
"combined_with_null": {
"type": ["string", "null"]
},
"union_with_null": {
"oneOf": [
{
"type": "string"
},
{
"type": "null"
}
]
},
"combined_nulls": {
"type": "null"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,22 @@
"object_with_null_properties": {
"type": "object",
"properties": null
},
"combined_with_null": {
"type": ["string", "null"]
},
"union_with_null": {
"oneOf": [
{
"type": "string"
},
{
"type": "null"
}
]
},
"combined_nulls": {
"type": ["null", "null"]
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":{"type":"string","string":"string1","integer":null},"union_type":{"type":"integer","string":null,"integer":10},"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[{"type":"integer","integer":25,"string":null},null,{"type":"string","integer":null,"string":"[\"goodbye\",\"cruel world\"]"}],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": {"type":"integer","integer":1,"string":null}, "name": "Jenny" }, "empty_object": "{}", "object_with_null_properties": "{}"}}
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":{"type":"integer","string":null,"integer":20},"union_type":{"type":"string","string":"string2","integer":null},"schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": {"type":"string","integer":null,"string":"seal-one-hippity"}, "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}"}
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting": null, "empty_object": null, "object_with_null_properties": null }
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":{"type":"string","string":"string1","integer":null},"union_type":{"type":"integer","string":null,"integer":10},"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[{"type":"integer","integer":25,"string":null},null,{"type":"string","integer":null,"string":"[\"goodbye\",\"cruel world\"]"}],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": {"type":"integer","integer":1,"string":null}, "name": "Jenny" }, "empty_object": "{}","object_with_null_properties": "{}", "combined_with_null": "foobar", "union_with_null":"barfoo", "combined_nulls": null }}
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":{"type":"integer","string":null,"integer":20},"union_type":{"type":"string","string":"string2","integer":null},"schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": {"type":"string","integer":null,"string":"seal-one-hippity"}, "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}", "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null}
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting": null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null }
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,22 @@
},
"object_with_null_properties": {
"type": "string"
},
"combined_with_null": {
"type": ["string", "null"]
},
"union_with_null": {
"oneOf": [
{
"type": "string"
},
{
"type": "null"
}
]
},
"combined_nulls": {
"type": "null"
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589100, "data": { "schemaless_object": { "uuid": "38F52396-736D-4B23-B5B4-F504D8894B97", "probability": 1.5 }, "schematized_object": { "id": 1, "name": "Joe" }, "combined_type": "string1", "union_type": 10, "schemaless_array": [ 10, "foo", null, { "bar": "qua" } ], "mixed_array_integer_and_schemaless_object": [ 15, null, { "hello": "world" } ], "array_of_union_integer_and_schemaless_array": [ 25, null, ["goodbye", "cruel world"] ], "union_of_objects_with_properties_identical": { "id": 10, "name": "Joe" }, "union_of_objects_with_properties_overlapping": { "id": 20, "name": "Jane", "flagged": true }, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "union_of_objects_with_properties_nonoverlapping": { "id": 30, "name": "Phil", "flagged": false, "description":"Very Phil" }, "empty_object": {},"object_with_null_properties": {} } } }
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589200, "data": { "schemaless_object": { "address": { "street": "113 Hickey Rd", "zip": "37932" }, "flags": [ true, false, false ] }, "schematized_object": { "id": 2, "name": "Jane" }, "combined_type": 20, "union_type": "string2", "schemaless_array": [], "mixed_array_integer_and_schemaless_object": [ ], "array_of_union_integer_and_schemaless_array": [ ], "union_of_objects_with_properties_identical": { }, "union_of_objects_with_properties_overlapping": {}, "union_of_objects_with_properties_nonoverlapping": {}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": {"extra": "stuff"}, "object_with_null_properties": { "more": { "extra": "stuff" } } } } }
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589300, "data": { "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "empty_object": null, "object_with_null_properties": null } } }
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589100, "data": { "schemaless_object": { "uuid": "38F52396-736D-4B23-B5B4-F504D8894B97", "probability": 1.5 }, "schematized_object": { "id": 1, "name": "Joe" }, "combined_type": "string1", "union_type": 10, "schemaless_array": [ 10, "foo", null, { "bar": "qua" } ], "mixed_array_integer_and_schemaless_object": [ 15, null, { "hello": "world" } ], "array_of_union_integer_and_schemaless_array": [ 25, null, ["goodbye", "cruel world"] ], "union_of_objects_with_properties_identical": { "id": 10, "name": "Joe" }, "union_of_objects_with_properties_overlapping": { "id": 20, "name": "Jane", "flagged": true }, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "union_of_objects_with_properties_nonoverlapping": { "id": 30, "name": "Phil", "flagged": false, "description":"Very Phil" }, "empty_object": {},"object_with_null_properties": {}, "combined_with_null": "foobar", "union_with_null": "barfoo", "combined_nulls": null } } }
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589200, "data": { "schemaless_object": { "address": { "street": "113 Hickey Rd", "zip": "37932" }, "flags": [ true, false, false ] }, "schematized_object": { "id": 2, "name": "Jane" }, "combined_type": 20, "union_type": "string2", "schemaless_array": [], "mixed_array_integer_and_schemaless_object": [ ], "array_of_union_integer_and_schemaless_array": [ ], "union_of_objects_with_properties_identical": { }, "union_of_objects_with_properties_overlapping": {}, "union_of_objects_with_properties_nonoverlapping": {}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": {"extra": "stuff"}, "object_with_null_properties": { "more": { "extra": "stuff" } }, "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null } } }
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589300, "data": { "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null } } }
{"type": "STATE", "state": { "data": {"start_date": "2022-02-14"}}}
{"type": "TRACE", "trace": { "type": "STREAM_STATUS", "stream_status": {"stream_descriptor": {"name": "problematic_types"}, "status": "COMPLETE"}, "emitted_at": 1721428636000}}
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,16 @@ class S3ConsumerFactory {
},
useV2FieldNames = true
)

// Parquet has significantly higher overhead. This small adjustment
// results in a ~5x performance improvement.
val adjustedMemoryRatio =
if (s3Config.formatConfig!!.format == FileUploadFormat.PARQUET) {
memoryRatio * 0.6 // ie 0.5 => 0.3
} else {
memoryRatio
}

return AsyncStreamConsumer(
outputRecordCollector,
onStartFunction(storageOps, writeConfigs),
Expand All @@ -209,7 +219,7 @@ class S3ConsumerFactory {
// is simply omitted from the path.
BufferManager(
defaultNamespace = null,
maxMemory = (Runtime.getRuntime().maxMemory() * memoryRatio).toLong()
maxMemory = (Runtime.getRuntime().maxMemory() * adjustedMemoryRatio).toLong()
),
workerPool = Executors.newFixedThreadPool(nThreads)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ class JsonToAvroSchemaConverter {
addStringToLogicalTypes,
)
val nextStep = fieldBuilder.type(parsed)
if (parsed.isUnion) {
if (parsed.isUnion || parsed == NULL_SCHEMA) {
nextStep.withDefault(null)
} else {
nextStep.noDefault()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.commons.jackson.MoreMappers

enum class AirbyteJsonSchemaType {
NULL,
BOOLEAN,
INTEGER,
NUMBER,
Expand All @@ -30,6 +31,7 @@ enum class AirbyteJsonSchemaType {

fun matchesValue(tree: JsonNode): Boolean {
return when (this) {
NULL -> tree.isNull
BOOLEAN -> tree.isBoolean
INTEGER -> tree.isIntegralNumber || tree.isInt || tree.isBigInteger
NUMBER ->
Expand Down Expand Up @@ -97,6 +99,7 @@ enum class AirbyteJsonSchemaType {
val airbyteType = schema["airbyte_type"]?.asText()

return when (typeStr) {
"null" -> NULL
"boolean" -> BOOLEAN
"integer" -> INTEGER
"number" -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.commons.jackson.MoreMappers

open class JsonRecordIdentityMapper : JsonRecordMapper<JsonNode?>() {
override fun mapNull(record: JsonNode?, schema: ObjectNode): JsonNode? {
return record?.deepCopy()
}

override fun mapBoolean(record: JsonNode?, schema: ObjectNode): JsonNode? {
return record?.deepCopy()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ abstract class JsonRecordMapper<R> {
val schemaType = AirbyteJsonSchemaType.fromJsonSchema(schema)

return when (schemaType) {
AirbyteJsonSchemaType.NULL -> mapNull(record, schema)
AirbyteJsonSchemaType.BOOLEAN -> mapBoolean(record, schema)
AirbyteJsonSchemaType.INTEGER -> mapInteger(record, schema)
AirbyteJsonSchemaType.NUMBER -> mapNumber(record, schema)
Expand All @@ -34,6 +35,7 @@ abstract class JsonRecordMapper<R> {
}
}

abstract fun mapNull(record: JsonNode?, schema: ObjectNode): R
abstract fun mapBoolean(record: JsonNode?, schema: ObjectNode): R
abstract fun mapInteger(record: JsonNode?, schema: ObjectNode): R
abstract fun mapNumber(record: JsonNode?, schema: ObjectNode): R
Expand Down
Loading

0 comments on commit 151e458

Please sign in to comment.