Skip to content

Commit

Permalink
Merge branch 'master' into remove-additional-date-formats
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 committed Sep 9, 2024
2 parents a905eef + a187fbf commit 97b01a3
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 79 deletions.
15 changes: 2 additions & 13 deletions clients/s3/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ import (
"github.com/artie-labs/transfer/lib/config"
"github.com/artie-labs/transfer/lib/optimization"
"github.com/artie-labs/transfer/lib/parquetutil"
"github.com/artie-labs/transfer/lib/typing"
"github.com/artie-labs/transfer/lib/typing/columns"
"github.com/xitongsys/parquet-go-source/local"
"github.com/xitongsys/parquet-go/writer"
)
Expand Down Expand Up @@ -79,15 +77,7 @@ func (s *Store) Merge(tableData *optimization.TableData) error {
return nil
}

var cols []columns.Column
for _, col := range tableData.ReadOnlyInMemoryCols().GetColumns() {
if col.KindDetails == typing.Invalid {
continue
}

cols = append(cols, col)
}

cols := tableData.ReadOnlyInMemoryCols().ValidColumns()
schema, err := parquetutil.GenerateJSONSchema(cols)
if err != nil {
return fmt.Errorf("failed to generate parquet schema: %w", err)
Expand All @@ -105,10 +95,9 @@ func (s *Store) Merge(tableData *optimization.TableData) error {
}

pw.CompressionType = parquet.CompressionCodec_GZIP
columns := tableData.ReadOnlyInMemoryCols().ValidColumns()
for _, val := range tableData.Rows() {
row := make(map[string]any)
for _, col := range columns {
for _, col := range cols {
value, err := parquetutil.ParseValue(val[col.Name()], col)
if err != nil {
return fmt.Errorf("failed to parse value, err: %w, value: %v, column: %q", err, val[col.Name()], col.Name())
Expand Down
69 changes: 12 additions & 57 deletions lib/typing/ext/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package ext

import (
"fmt"
"log/slog"
"time"
)

Expand All @@ -24,86 +23,42 @@ func ParseFromInterface(val any) (*ExtendedTime, error) {
}
}

// ParseTimeExactMatch - This function is the same as `ParseTimeExactMatchLegacy` with the only exception that it'll return an error if it was not an exact match
// ParseTimeExactMatch will return an error if it was not an exact match.
// We need this function because things may parse correctly but actually truncate precision
func ParseTimeExactMatch(layout, timeString string) (time.Time, error) {
ts, err := time.Parse(layout, timeString)
func ParseTimeExactMatch(layout, value string) (time.Time, error) {
ts, err := time.Parse(layout, value)
if err != nil {
return time.Time{}, err
}

if ts.Format(layout) != timeString {
return time.Time{}, fmt.Errorf("failed to parse %q with layout %q", timeString, layout)
if ts.Format(layout) != value {
return time.Time{}, fmt.Errorf("failed to parse %q with layout %q", value, layout)
}

return ts, nil
}

// TODO: Remove callers from this.
// ParseTimeExactMatchLegacy is a wrapper around time.Parse() and will return an extra boolean to indicate if it was an exact match or not.
// Parameters: layout, potentialDateTimeString
// Returns: time.Time object, exactLayout (boolean), error
func ParseTimeExactMatchLegacy(layout, potentialDateTimeString string) (time.Time, bool, error) {
ts, err := time.Parse(layout, potentialDateTimeString)
if err != nil {
return ts, false, err
}

return ts, ts.Format(layout) == potentialDateTimeString, nil
}

// ParseExtendedDateTime will take a string and check if the string is of the following types:
// - Timestamp w/ timezone
// - Timestamp w/o timezone
// - Date
// - Time w/ timezone
// - Time w/o timezone
// It will attempt to find the exact layout that parses without precision loss in the form of `ExtendedTime` object which is built to solve:
// 1) Precision loss in translation
// 2) Original format preservation (with tz locale).
// If it cannot find it, then it will give you the next best thing.
func ParseExtendedDateTime(dtString string) (*ExtendedTime, error) {
// Check all the timestamp formats
var potentialFormat string
var potentialTime time.Time
func ParseExtendedDateTime(val string) (*ExtendedTime, error) {
// TODO: ExtendedTimeKindType so we can selectively parse.
for _, supportedDateTimeLayout := range supportedDateTimeLayouts {
ts, exactMatch, err := ParseTimeExactMatchLegacy(supportedDateTimeLayout, dtString)
if err == nil {
potentialFormat = supportedDateTimeLayout
potentialTime = ts
if exactMatch {
return NewExtendedTime(ts, DateTimeKindType, supportedDateTimeLayout), nil
}
if ts, err := ParseTimeExactMatch(supportedDateTimeLayout, val); err == nil {
return NewExtendedTime(ts, DateTimeKindType, supportedDateTimeLayout), nil
}
}

// Now check DATE formats, btw you can append nil arrays
for _, supportedDateFormat := range supportedDateFormats {
ts, exactMatch, err := ParseTimeExactMatchLegacy(supportedDateFormat, dtString)
if err == nil && exactMatch {
if ts, err := ParseTimeExactMatch(supportedDateFormat, val); err == nil {
return NewExtendedTime(ts, DateKindType, supportedDateFormat), nil
}
}

// TODO: Remove this if we don't see any Sentry.
// Now check TIME formats
for _, supportedTimeFormat := range SupportedTimeFormatsLegacy {
ts, exactMatch, err := ParseTimeExactMatchLegacy(supportedTimeFormat, dtString)
if err == nil && exactMatch {
slog.Error("Unexpected call to SupportedTimeFormatsLegacy",
slog.String("dtString", dtString),
slog.String("supportedTimeFormat", supportedTimeFormat),
)
if ts, err := ParseTimeExactMatch(supportedTimeFormat, val); err == nil {
return NewExtendedTime(ts, TimeKindType, supportedTimeFormat), nil
}
}

// If nothing fits, return the next best thing.
if potentialFormat != "" {
// TODO: Remove this if we don't see any logs.
slog.Warn("Failed to find exact match for dtString, returning next best thing", slog.String("dtString", dtString), slog.String("potentialFormat", potentialFormat))
return NewExtendedTime(potentialTime, DateTimeKindType, potentialFormat), nil
}

return nil, fmt.Errorf("dtString: %s is not supported", dtString)
return nil, fmt.Errorf("unsupported value: %q", val)
}
9 changes: 0 additions & 9 deletions lib/typing/ext/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,6 @@ func TestParseExtendedDateTime_Timestamp(t *testing.T) {
assert.Equal(t, "2023-04-24T17:29:05.69944Z", extTime.String(""))
}

func TestParseExtendedDateTime(t *testing.T) {
{
dtString := "Mon Jan 02 15:04:05.69944 -0700 2006"
ts, err := ParseExtendedDateTime(dtString)
assert.NoError(t, err)
assert.NotEqual(t, ts.String(""), dtString)
}
}

func TestTimeLayout(t *testing.T) {
ts := time.Now()

Expand Down

0 comments on commit 97b01a3

Please sign in to comment.