From ef1a01ddb2dacfa3639d3c6a9fcbd4bc14f71ff6 Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Thu, 26 Sep 2024 10:18:13 -0700 Subject: [PATCH 1/5] WIP. --- clients/redshift/dialect/dialect.go | 4 ++++ clients/redshift/dialect/dialect_test.go | 10 ++++++++++ lib/config/types.go | 2 ++ 3 files changed, 16 insertions(+) diff --git a/clients/redshift/dialect/dialect.go b/clients/redshift/dialect/dialect.go index 9cb35bc20..9db4187c3 100644 --- a/clients/redshift/dialect/dialect.go +++ b/clients/redshift/dialect/dialect.go @@ -147,6 +147,10 @@ func (RedshiftDialect) BuildAlterColumnQuery(tableID sql.TableIdentifier, column return fmt.Sprintf("ALTER TABLE %s %s COLUMN %s", tableID.FullyQualifiedName(), columnOp, colSQLPart) } +func (rd RedshiftDialect) BuildIncreaseStringPrecisionQuery(tableID sql.TableIdentifier, column columns.Column, newPrecision int32) string { + return fmt.Sprintf("ALTER TABLE %s ALTER COLUMN %s TYPE VARCHAR(%d)", tableID.FullyQualifiedName(), rd.QuoteIdentifier(column.Name()), newPrecision) +} + func (rd RedshiftDialect) BuildIsNotToastValueExpression(tableAlias constants.TableAlias, column columns.Column) string { colName := sql.QuoteTableAliasColumn(tableAlias, column, rd) if column.KindDetails == typing.Struct { diff --git a/clients/redshift/dialect/dialect_test.go b/clients/redshift/dialect/dialect_test.go index f285c7671..e620b7362 100644 --- a/clients/redshift/dialect/dialect_test.go +++ b/clients/redshift/dialect/dialect_test.go @@ -195,6 +195,16 @@ func TestRedshiftDialect_BuildAlterColumnQuery(t *testing.T) { ) } +func TestRedshiftDialect_BuildIncreaseStringPrecisionQuery(t *testing.T) { + fakeTableID := &mocks.FakeTableIdentifier{} + fakeTableID.FullyQualifiedNameReturns("{TABLE}") + + assert.Equal(t, + `ALTER TABLE {TABLE} ALTER COLUMN "{column}" TYPE VARCHAR(12345)`, + RedshiftDialect{}.BuildIncreaseStringPrecisionQuery(fakeTableID, columns.NewColumn("{COLUMN}", typing.String), 12345), + ) +} + func TestRedshiftDialect_BuildIsNotToastValueExpression(t *testing.T) { assert.Equal(t, `COALESCE(tbl."bar" != '__debezium_unavailable_value', true)`, diff --git a/lib/config/types.go b/lib/config/types.go index 59fe936fa..2637737d5 100644 --- a/lib/config/types.go +++ b/lib/config/types.go @@ -39,6 +39,8 @@ type Kafka struct { type SharedDestinationSettings struct { // TruncateExceededValues - This will truncate exceeded values instead of replacing it with `__artie_exceeded_value` TruncateExceededValues bool `yaml:"truncateExceededValues"` + // IncreaseStringPrecision - This will increase the string precision to the maximum allowed by the destination. + IncreaseStringPrecision bool `yaml:"increaseStringPrecision"` } type Config struct { From c767dca9df19bf0d92f1f66edbbdefb3fab62adb Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Thu, 26 Sep 2024 10:41:23 -0700 Subject: [PATCH 2/5] WIP. --- clients/redshift/cast.go | 8 ++++++++ clients/redshift/cast_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/clients/redshift/cast.go b/clients/redshift/cast.go index 52af74d7f..22cf1be98 100644 --- a/clients/redshift/cast.go +++ b/clients/redshift/cast.go @@ -10,6 +10,14 @@ import ( const maxRedshiftLength int32 = 65535 +func canIncreasePrecision(colKind typing.KindDetails) bool { + if colKind.Kind == typing.String.Kind && colKind.OptionalStringPrecision != nil { + return maxRedshiftLength > *colKind.OptionalStringPrecision + } + + return false +} + func replaceExceededValues(colVal string, colKind typing.KindDetails, truncateExceededValue bool) string { if colKind.Kind == typing.Struct.Kind || colKind.Kind == typing.String.Kind { maxLength := maxRedshiftLength diff --git a/clients/redshift/cast_test.go b/clients/redshift/cast_test.go index 7867e164f..abe30773a 100644 --- a/clients/redshift/cast_test.go +++ b/clients/redshift/cast_test.go @@ -11,6 +11,35 @@ import ( "github.com/stretchr/testify/assert" ) +func (r *RedshiftTestSuite) TestCanIncreasePrecision() { + { + // Not a string + assert.False(r.T(), canIncreasePrecision(typing.Struct)) + } + { + // String, but precision is not specified + assert.False(r.T(), canIncreasePrecision(typing.String)) + } + { + // String, but maxed out already + assert.False(r.T(), canIncreasePrecision( + typing.KindDetails{ + Kind: typing.String.Kind, + OptionalStringPrecision: typing.ToPtr(maxRedshiftLength), + }), + ) + } + { + // String, precision is low and can be increased + assert.True(r.T(), canIncreasePrecision( + typing.KindDetails{ + Kind: typing.String.Kind, + OptionalStringPrecision: typing.ToPtr(maxRedshiftLength - 1), + }), + ) + } +} + func (r *RedshiftTestSuite) TestReplaceExceededValues() { { // Irrelevant data type From 40a1a724db974e32cf4c6f852d92c1009af9fd93 Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Thu, 26 Sep 2024 10:52:27 -0700 Subject: [PATCH 3/5] WIP. --- clients/redshift/cast.go | 35 +++++++++++-------- clients/redshift/cast_test.go | 63 ++++++++++++++++++++++++----------- clients/redshift/redshift.go | 4 +++ clients/redshift/staging.go | 11 +++++- 4 files changed, 79 insertions(+), 34 deletions(-) diff --git a/clients/redshift/cast.go b/clients/redshift/cast.go index 22cf1be98..71ebd6077 100644 --- a/clients/redshift/cast.go +++ b/clients/redshift/cast.go @@ -10,15 +10,17 @@ import ( const maxRedshiftLength int32 = 65535 -func canIncreasePrecision(colKind typing.KindDetails) bool { +func canIncreasePrecision(colKind typing.KindDetails, valueLength int32) bool { if colKind.Kind == typing.String.Kind && colKind.OptionalStringPrecision != nil { - return maxRedshiftLength > *colKind.OptionalStringPrecision + return maxRedshiftLength > *colKind.OptionalStringPrecision && valueLength <= maxRedshiftLength } return false } -func replaceExceededValues(colVal string, colKind typing.KindDetails, truncateExceededValue bool) string { +// replaceExceededValues replaces the value with a marker if it exceeds the maximum length +// Returns the value and boolean indicating whether the column should be increased or not. +func replaceExceededValues(colVal string, colKind typing.KindDetails, truncateExceededValue bool, increaseStringPrecision bool) (string, bool) { if colKind.Kind == typing.Struct.Kind || colKind.Kind == typing.String.Kind { maxLength := maxRedshiftLength // If the customer has specified the maximum string precision, let's use that as the max length. @@ -26,38 +28,45 @@ func replaceExceededValues(colVal string, colKind typing.KindDetails, truncateEx maxLength = *colKind.OptionalStringPrecision } - if shouldReplace := int32(len(colVal)) > maxLength; shouldReplace { + colValLength := int32(len(colVal)) + if shouldReplace := colValLength > maxLength; shouldReplace { if colKind.Kind == typing.Struct.Kind { - return fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker) + return fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker), false + } + + if increaseStringPrecision && canIncreasePrecision(colKind, colValLength) { + return colVal, true } if truncateExceededValue { - return colVal[:maxLength] + return colVal[:maxLength], false } else { - return constants.ExceededValueMarker + return constants.ExceededValueMarker, false } } } - return colVal + return colVal, false } -func castColValStaging(colVal any, colKind typing.KindDetails, truncateExceededValue bool) (string, error) { +func castColValStaging(colVal any, colKind typing.KindDetails, truncateExceededValue bool, increaseStringPrecision bool) (string, bool, error) { if colVal == nil { if colKind == typing.Struct { // Returning empty here because if it's a struct, it will go through JSON PARSE and JSON_PARSE("") = null - return "", nil + return "", false, nil } // This matches the COPY clause for NULL terminator. - return `\N`, nil + return `\N`, false, nil } colValString, err := values.ToString(colVal, colKind) if err != nil { - return "", err + return "", false, err } // Checks for DDL overflow needs to be done at the end in case there are any conversions that need to be done. - return replaceExceededValues(colValString, colKind, truncateExceededValue), nil + + colValue, shouldIncreaseColumn := replaceExceededValues(colValString, colKind, truncateExceededValue, increaseStringPrecision) + return colValue, shouldIncreaseColumn, nil } diff --git a/clients/redshift/cast_test.go b/clients/redshift/cast_test.go index abe30773a..139424f37 100644 --- a/clients/redshift/cast_test.go +++ b/clients/redshift/cast_test.go @@ -45,13 +45,13 @@ func (r *RedshiftTestSuite) TestReplaceExceededValues() { // Irrelevant data type { // Integer - assert.Equal(r.T(), "123", replaceExceededValues("123", typing.Integer, false)) + assert.Equal(r.T(), "123", replaceExceededValues("123", typing.Integer, false, false)) } { // Returns the full value since it's not a struct or string // This is invalid and should not happen, but it's here to ensure we're only checking for structs and strings. value := stringutil.Random(int(maxRedshiftLength + 1)) - assert.Equal(r.T(), value, replaceExceededValues(value, typing.Integer, false)) + assert.Equal(r.T(), value, replaceExceededValues(value, typing.Integer, false, false)) } } { @@ -59,44 +59,44 @@ func (r *RedshiftTestSuite) TestReplaceExceededValues() { { // String { - // TruncateExceededValue = false - assert.Equal(r.T(), constants.ExceededValueMarker, replaceExceededValues(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false)) + // TruncateExceededValue = false, IncreaseStringPrecision = false + assert.Equal(r.T(), constants.ExceededValueMarker, replaceExceededValues(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false, false)) } { - // TruncateExceededValue = false, string precision specified + // TruncateExceededValue = false, string precision specified, IncreaseStringPrecision = false stringKd := typing.KindDetails{ Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(3)), } - assert.Equal(r.T(), constants.ExceededValueMarker, replaceExceededValues("hello", stringKd, false)) + assert.Equal(r.T(), constants.ExceededValueMarker, replaceExceededValues("hello", stringKd, false, false)) } { - // TruncateExceededValue = true + // TruncateExceededValue = true, IncreaseStringPrecision = false superLongString := stringutil.Random(int(maxRedshiftLength) + 1) - assert.Equal(r.T(), superLongString[:maxRedshiftLength], replaceExceededValues(superLongString, typing.String, true)) + assert.Equal(r.T(), superLongString[:maxRedshiftLength], replaceExceededValues(superLongString, typing.String, true, false)) } { - // TruncateExceededValue = true, string precision specified + // TruncateExceededValue = true, string precision specified, IncreaseStringPrecision = false stringKd := typing.KindDetails{ Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(3)), } - assert.Equal(r.T(), "hel", replaceExceededValues("hello", stringKd, true)) + assert.Equal(r.T(), "hel", replaceExceededValues("hello", stringKd, true, false)) } } { // Struct and masked - assert.Equal(r.T(), fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker), replaceExceededValues(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false)) + assert.Equal(r.T(), fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker), replaceExceededValues(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false, false)) } } { // Valid { // Not masked - assert.Equal(r.T(), `{"foo": "bar"}`, replaceExceededValues(`{"foo": "bar"}`, typing.Struct, false)) - assert.Equal(r.T(), "hello world", replaceExceededValues("hello world", typing.String, false)) + assert.Equal(r.T(), `{"foo": "bar"}`, replaceExceededValues(`{"foo": "bar"}`, typing.Struct, false, false)) + assert.Equal(r.T(), "hello world", replaceExceededValues("hello world", typing.String, false, false)) } } } @@ -107,22 +107,45 @@ func (r *RedshiftTestSuite) TestCastColValStaging() { { // String { - // TruncateExceededValue = false - value, err := castColValStaging(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false) + // TruncateExceededValue = false, IncreaseStringPrecision = false + value, err := castColValStaging(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), constants.ExceededValueMarker, value) } { - // TruncateExceededValue = true + // TruncateExceededValue = true, IncreaseStringPrecision = false value := stringutil.Random(int(maxRedshiftLength) + 1) - value, err := castColValStaging(value, typing.String, true) + value, err := castColValStaging(value, typing.String, true, false) + assert.NoError(r.T(), err) + assert.Equal(r.T(), value[:maxRedshiftLength], value) + } + { + // TruncateExceededValue = false, IncreaseStringPrecision = true + stringKd := typing.KindDetails{ + Kind: typing.String.Kind, + OptionalStringPrecision: typing.ToPtr(int32(3)), + } + + value, err := castColValStaging("hello", stringKd, false, true) + assert.NoError(r.T(), err) + assert.Equal(r.T(), "hello", value) + } + { + value := stringutil.Random(int(maxRedshiftLength) + 1) + // TruncateExceededValue = true, IncreaseStringPrecision = true + stringKd := typing.KindDetails{ + Kind: typing.String.Kind, + OptionalStringPrecision: typing.ToPtr(int32(3)), + } + + value, err := castColValStaging(value, stringKd, true, true) assert.NoError(r.T(), err) assert.Equal(r.T(), value[:maxRedshiftLength], value) } } { // Masked struct - value, err := castColValStaging(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false) + value, err := castColValStaging(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker), value) } @@ -131,13 +154,13 @@ func (r *RedshiftTestSuite) TestCastColValStaging() { // Not exceeded { // Valid string - value, err := castColValStaging("thisissuperlongbutnotlongenoughtogetmasked", typing.String, false) + value, err := castColValStaging("thisissuperlongbutnotlongenoughtogetmasked", typing.String, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), "thisissuperlongbutnotlongenoughtogetmasked", value) } { // Valid struct - value, err := castColValStaging(`{"foo": "bar"}`, typing.Struct, false) + value, err := castColValStaging(`{"foo": "bar"}`, typing.Struct, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), `{"foo": "bar"}`, value) } diff --git a/clients/redshift/redshift.go b/clients/redshift/redshift.go index 735a66a69..1cdf1923f 100644 --- a/clients/redshift/redshift.go +++ b/clients/redshift/redshift.go @@ -52,6 +52,10 @@ func (s *Store) GetConfigMap() *types.DwhToTablesConfigMap { } func (s *Store) Dialect() sql.Dialect { + return s.dialect() +} + +func (s *Store) dialect() dialect.RedshiftDialect { return dialect.RedshiftDialect{} } diff --git a/clients/redshift/staging.go b/clients/redshift/staging.go index 9d5893f99..14a7f633b 100644 --- a/clients/redshift/staging.go +++ b/clients/redshift/staging.go @@ -94,11 +94,20 @@ func (s *Store) loadTemporaryTable(tableData *optimization.TableData, newTableID for _, value := range tableData.Rows() { var row []string for _, col := range columns { - castedValue, castErr := castColValStaging(value[col.Name()], col.KindDetails, s.config.SharedDestinationSettings.TruncateExceededValues) + castedValue, shouldIncreaseColumn, castErr := castColValStaging( + value[col.Name()], + col.KindDetails, + s.config.SharedDestinationSettings.TruncateExceededValues, + s.config.SharedDestinationSettings.IncreaseStringPrecision, + ) if castErr != nil { return "", castErr } + if shouldIncreaseColumn { + s.dialect().BuildIncreaseStringPrecisionQuery() + } + row = append(row, castedValue) } From e34defcb5af51a6210ec73873af4698b7a04ae94 Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Thu, 26 Sep 2024 15:44:49 -0700 Subject: [PATCH 4/5] Clean up. --- clients/redshift/cast_test.go | 134 +++++++++++++++++++++------------- clients/redshift/staging.go | 7 +- 2 files changed, 85 insertions(+), 56 deletions(-) diff --git a/clients/redshift/cast_test.go b/clients/redshift/cast_test.go index 139424f37..42cecb6ff 100644 --- a/clients/redshift/cast_test.go +++ b/clients/redshift/cast_test.go @@ -3,40 +3,46 @@ package redshift import ( "fmt" - "github.com/artie-labs/transfer/lib/stringutil" - "github.com/artie-labs/transfer/lib/config/constants" - + "github.com/artie-labs/transfer/lib/stringutil" "github.com/artie-labs/transfer/lib/typing" "github.com/stretchr/testify/assert" ) func (r *RedshiftTestSuite) TestCanIncreasePrecision() { { - // Not a string - assert.False(r.T(), canIncreasePrecision(typing.Struct)) - } - { - // String, but precision is not specified - assert.False(r.T(), canIncreasePrecision(typing.String)) - } - { - // String, but maxed out already - assert.False(r.T(), canIncreasePrecision( - typing.KindDetails{ - Kind: typing.String.Kind, - OptionalStringPrecision: typing.ToPtr(maxRedshiftLength), - }), - ) + // False + { + // Not a string + assert.False(r.T(), canIncreasePrecision(typing.Struct, 123)) + } + { + // String, but precision not specified. + assert.False(r.T(), canIncreasePrecision(typing.String, 123)) + } + { + // String and precision specified, but value length exceeds maxRedshiftLength + assert.False(r.T(), canIncreasePrecision( + typing.KindDetails{ + Kind: typing.String.Kind, + OptionalStringPrecision: typing.ToPtr(int32(123)), + }, + maxRedshiftLength+1), + ) + } } { - // String, precision is low and can be increased - assert.True(r.T(), canIncreasePrecision( - typing.KindDetails{ - Kind: typing.String.Kind, - OptionalStringPrecision: typing.ToPtr(maxRedshiftLength - 1), - }), - ) + // True + { + // String, precision is low and can be increased + assert.True(r.T(), canIncreasePrecision( + typing.KindDetails{ + Kind: typing.String.Kind, + OptionalStringPrecision: typing.ToPtr(int32(123)), + }, + 123), + ) + } } } @@ -45,22 +51,26 @@ func (r *RedshiftTestSuite) TestReplaceExceededValues() { // Irrelevant data type { // Integer - assert.Equal(r.T(), "123", replaceExceededValues("123", typing.Integer, false, false)) + + value, _ := replaceExceededValues("123", typing.Integer, false, false) + assert.Equal(r.T(), "123", value) } { // Returns the full value since it's not a struct or string // This is invalid and should not happen, but it's here to ensure we're only checking for structs and strings. - value := stringutil.Random(int(maxRedshiftLength + 1)) - assert.Equal(r.T(), value, replaceExceededValues(value, typing.Integer, false, false)) + input := stringutil.Random(int(maxRedshiftLength + 1)) + value, _ := replaceExceededValues(input, typing.Integer, false, false) + assert.Equal(r.T(), input, value) } } { // Exceeded { - // String { // TruncateExceededValue = false, IncreaseStringPrecision = false - assert.Equal(r.T(), constants.ExceededValueMarker, replaceExceededValues(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false, false)) + value, shouldIncrease := replaceExceededValues(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false, false) + assert.Equal(r.T(), constants.ExceededValueMarker, value) + assert.False(r.T(), shouldIncrease) } { // TruncateExceededValue = false, string precision specified, IncreaseStringPrecision = false @@ -68,13 +78,16 @@ func (r *RedshiftTestSuite) TestReplaceExceededValues() { Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(3)), } - - assert.Equal(r.T(), constants.ExceededValueMarker, replaceExceededValues("hello", stringKd, false, false)) + value, shouldIncrease := replaceExceededValues("hello", stringKd, false, false) + assert.Equal(r.T(), constants.ExceededValueMarker, value) + assert.False(r.T(), shouldIncrease) } { // TruncateExceededValue = true, IncreaseStringPrecision = false - superLongString := stringutil.Random(int(maxRedshiftLength) + 1) - assert.Equal(r.T(), superLongString[:maxRedshiftLength], replaceExceededValues(superLongString, typing.String, true, false)) + input := stringutil.Random(int(maxRedshiftLength) + 1) + value, shouldIncrease := replaceExceededValues(input, typing.String, true, false) + assert.Equal(r.T(), input[:maxRedshiftLength], value) + assert.False(r.T(), shouldIncrease) } { // TruncateExceededValue = true, string precision specified, IncreaseStringPrecision = false @@ -82,21 +95,32 @@ func (r *RedshiftTestSuite) TestReplaceExceededValues() { Kind: typing.String.Kind, OptionalStringPrecision: typing.ToPtr(int32(3)), } - - assert.Equal(r.T(), "hel", replaceExceededValues("hello", stringKd, true, false)) + value, shouldIncrease := replaceExceededValues("hello", stringKd, true, false) + assert.Equal(r.T(), "hel", value) + assert.False(r.T(), shouldIncrease) } } { // Struct and masked - assert.Equal(r.T(), fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker), replaceExceededValues(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false, false)) + value, shouldIncrease := replaceExceededValues(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false, false) + assert.Equal(r.T(), fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker), value) + assert.False(r.T(), shouldIncrease) } } { // Valid { // Not masked - assert.Equal(r.T(), `{"foo": "bar"}`, replaceExceededValues(`{"foo": "bar"}`, typing.Struct, false, false)) - assert.Equal(r.T(), "hello world", replaceExceededValues("hello world", typing.String, false, false)) + { + value, shouldIncrease := replaceExceededValues(`{"foo": "bar"}`, typing.Struct, false, false) + assert.Equal(r.T(), `{"foo": "bar"}`, value) + assert.False(r.T(), shouldIncrease) + } + { + value, shouldIncrease := replaceExceededValues("hello world", typing.String, false, false) + assert.Equal(r.T(), "hello world", value) + assert.False(r.T(), shouldIncrease) + } } } } @@ -108,16 +132,18 @@ func (r *RedshiftTestSuite) TestCastColValStaging() { // String { // TruncateExceededValue = false, IncreaseStringPrecision = false - value, err := castColValStaging(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false, false) + value, shouldIncrease, err := castColValStaging(stringutil.Random(int(maxRedshiftLength)+1), typing.String, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), constants.ExceededValueMarker, value) + assert.False(r.T(), shouldIncrease) } { // TruncateExceededValue = true, IncreaseStringPrecision = false - value := stringutil.Random(int(maxRedshiftLength) + 1) - value, err := castColValStaging(value, typing.String, true, false) + input := stringutil.Random(int(maxRedshiftLength) + 1) + value, shouldIncrease, err := castColValStaging(input, typing.String, true, false) assert.NoError(r.T(), err) - assert.Equal(r.T(), value[:maxRedshiftLength], value) + assert.Equal(r.T(), input[:maxRedshiftLength], value) + assert.False(r.T(), shouldIncrease) } { // TruncateExceededValue = false, IncreaseStringPrecision = true @@ -126,43 +152,49 @@ func (r *RedshiftTestSuite) TestCastColValStaging() { OptionalStringPrecision: typing.ToPtr(int32(3)), } - value, err := castColValStaging("hello", stringKd, false, true) + value, shouldIncrease, err := castColValStaging("hello", stringKd, false, true) assert.NoError(r.T(), err) assert.Equal(r.T(), "hello", value) + assert.True(r.T(), shouldIncrease) } { - value := stringutil.Random(int(maxRedshiftLength) + 1) // TruncateExceededValue = true, IncreaseStringPrecision = true + input := stringutil.Random(int(maxRedshiftLength) + 1) + stringPrecision := int32(3) stringKd := typing.KindDetails{ Kind: typing.String.Kind, - OptionalStringPrecision: typing.ToPtr(int32(3)), + OptionalStringPrecision: typing.ToPtr(stringPrecision), } - value, err := castColValStaging(value, stringKd, true, true) + value, shouldIncrease, err := castColValStaging(input, stringKd, true, true) assert.NoError(r.T(), err) - assert.Equal(r.T(), value[:maxRedshiftLength], value) + assert.Equal(r.T(), input[:stringPrecision], value) + assert.False(r.T(), shouldIncrease) } } { // Masked struct - value, err := castColValStaging(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false, false) + value, shouldIncrease, err := castColValStaging(fmt.Sprintf(`{"foo": "%s"}`, stringutil.Random(int(maxRedshiftLength)+1)), typing.Struct, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), fmt.Sprintf(`{"key":"%s"}`, constants.ExceededValueMarker), value) + assert.False(r.T(), shouldIncrease) } } { // Not exceeded { // Valid string - value, err := castColValStaging("thisissuperlongbutnotlongenoughtogetmasked", typing.String, false, false) + value, shouldIncrease, err := castColValStaging("thisissuperlongbutnotlongenoughtogetmasked", typing.String, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), "thisissuperlongbutnotlongenoughtogetmasked", value) + assert.False(r.T(), shouldIncrease) } { // Valid struct - value, err := castColValStaging(`{"foo": "bar"}`, typing.Struct, false, false) + value, shouldIncrease, err := castColValStaging(`{"foo": "bar"}`, typing.Struct, false, false) assert.NoError(r.T(), err) assert.Equal(r.T(), `{"foo": "bar"}`, value) + assert.False(r.T(), shouldIncrease) } } } diff --git a/clients/redshift/staging.go b/clients/redshift/staging.go index 9bb42f9b5..f5d008a6d 100644 --- a/clients/redshift/staging.go +++ b/clients/redshift/staging.go @@ -94,7 +94,8 @@ func (s *Store) loadTemporaryTable(tableData *optimization.TableData, newTableID for _, value := range tableData.Rows() { var row []string for _, col := range columns { - castedValue, shouldIncreaseColumn, castErr := castColValStaging( + // TODO: Implement + castedValue, _, castErr := castColValStaging( value[col.Name()], col.KindDetails, s.config.SharedDestinationSettings.TruncateExceededValues, @@ -104,10 +105,6 @@ func (s *Store) loadTemporaryTable(tableData *optimization.TableData, newTableID return "", castErr } - if shouldIncreaseColumn { - s.dialect().BuildIncreaseStringPrecisionQuery() - } - row = append(row, castedValue) } From 588411dfbd4b0f794affddfa4a1de9ebb43080ed Mon Sep 17 00:00:00 2001 From: Robin Tang Date: Thu, 26 Sep 2024 15:45:24 -0700 Subject: [PATCH 5/5] Update comment. --- clients/redshift/cast.go | 1 + 1 file changed, 1 insertion(+) diff --git a/clients/redshift/cast.go b/clients/redshift/cast.go index 71ebd6077..e317be7a3 100644 --- a/clients/redshift/cast.go +++ b/clients/redshift/cast.go @@ -10,6 +10,7 @@ import ( const maxRedshiftLength int32 = 65535 +// canIncreasePrecision - returns true if column is a string, precision is specified and value length is less than [maxRedshiftLength] func canIncreasePrecision(colKind typing.KindDetails, valueLength int32) bool { if colKind.Kind == typing.String.Kind && colKind.OptionalStringPrecision != nil { return maxRedshiftLength > *colKind.OptionalStringPrecision && valueLength <= maxRedshiftLength