Skip to content

Commit

Permalink
importccl: Preserve '\r\n' during CSV import
Browse files Browse the repository at this point in the history
See #25344.

It appears this is being caused by the behaviour of Golang's
encoding/csv library, which folds \r\n into \n when reading. This was
fixed in golang/go#21201 but then reverted golang/go#22746. It appears
based on that second issue that Go is unlikely to change that behavior.

Check in the stdlib `encoding/csv` into `pkg/util` with
golang/go#22746 reverted.

Release note:
`\r\n` characters in CSV files were silently converted into `\n`. This
causes imported data to be different. This is now fixed.
  • Loading branch information
neeral committed Aug 2, 2018
1 parent 0d80777 commit 67ffc7e
Show file tree
Hide file tree
Showing 12 changed files with 1,279 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pkg/acceptance/decommission_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package acceptance

import (
"context"
"encoding/csv"
"reflect"
"regexp"
"strconv"
Expand All @@ -31,6 +30,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/encoding/csv"
"github.com/cockroachdb/cockroach/pkg/util/httputil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/retry"
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/importccl/exportcsv.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ package importccl
import (
"bytes"
"context"
"encoding/csv"
"fmt"
"strconv"
"strings"
Expand All @@ -29,6 +28,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/types"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/encoding/csv"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
)

Expand Down
9 changes: 9 additions & 0 deletions pkg/ccl/importccl/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,15 @@ d
typ: "CSV",
data: "1",
},
{
name: "new line characters",
create: `t text`,
typ: "CSV",
data: "\"hello\r\nworld\"\n\"friend\nfoe\"\n\"mr\rmrs\"",
query: map[string][][]string{
`SELECT t from t`: {{"hello\r\nworld"}, {"friend\nfoe"}, {"mr\rmrs"}},
},
},

// MySQL OUTFILE
{
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/importccl/read_import_csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ package importccl

import (
"context"
"encoding/csv"
"io"
"runtime"

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/encoding/csv"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/pkg/errors"
)
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/format_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package cli

import (
"bytes"
"encoding/csv"
"fmt"
"html"
"io"
Expand All @@ -27,6 +26,7 @@ import (

"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/encoding/csv"
"github.com/olekukonko/tablewriter"
"github.com/pkg/errors"
)
Expand Down
1 change: 1 addition & 0 deletions pkg/testutils/lint/testdata/errcheck_excludes.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
(*bufio.Writer).Flush
(*database/sql.DB).Close
(*database/sql.Rows).Close
(*database/sql.Stmt).Close
Expand Down
140 changes: 140 additions & 0 deletions pkg/util/encoding/csv/example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package csv_test

import (
"context"
"encoding/csv"
"fmt"
"io"
"os"
"strings"

"github.com/cockroachdb/cockroach/pkg/util/log"
)

func ExampleReader() {
ctx := context.Background()
in := `first_name,last_name,username
"Rob","Pike",rob
Ken,Thompson,ken
"Robert","Griesemer","gri"
`
r := csv.NewReader(strings.NewReader(in))

for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(ctx, err)
}

fmt.Println(record)
}
// Output:
// [first_name last_name username]
// [Rob Pike rob]
// [Ken Thompson ken]
// [Robert Griesemer gri]
}

// This example shows how csv.Reader can be configured to handle other
// types of CSV files.
func ExampleReader_options() {
ctx := context.Background()
in := `first_name;last_name;username
"Rob";"Pike";rob
# lines beginning with a # character are ignored
Ken;Thompson;ken
"Robert";"Griesemer";"gri"
`
r := csv.NewReader(strings.NewReader(in))
r.Comma = ';'
r.Comment = '#'

records, err := r.ReadAll()
if err != nil {
log.Fatal(ctx, err)
}

fmt.Print(records)
// Output:
// [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]]
}

func ExampleReader_ReadAll() {
ctx := context.Background()
in := `first_name,last_name,username
"Rob","Pike",rob
Ken,Thompson,ken
"Robert","Griesemer","gri"
`
r := csv.NewReader(strings.NewReader(in))

records, err := r.ReadAll()
if err != nil {
log.Fatal(ctx, err)
}

fmt.Print(records)
// Output:
// [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]]
}

func ExampleWriter() {
ctx := context.Background()
records := [][]string{
{"first_name", "last_name", "username"},
{"Rob", "Pike", "rob"},
{"Ken", "Thompson", "ken"},
{"Robert", "Griesemer", "gri"},
}

w := csv.NewWriter(os.Stdout)

for _, record := range records {
if err := w.Write(record); err != nil {
log.Fatalf(ctx, "error writing record to csv: %v\n", err)
}
}

// Write any buffered data to the underlying writer (standard output).
w.Flush()

if err := w.Error(); err != nil {
log.Fatal(ctx, err)
}
// Output:
// first_name,last_name,username
// Rob,Pike,rob
// Ken,Thompson,ken
// Robert,Griesemer,gri
}

func ExampleWriter_WriteAll() {
ctx := context.Background()
records := [][]string{
{"first_name", "last_name", "username"},
{"Rob", "Pike", "rob"},
{"Ken", "Thompson", "ken"},
{"Robert", "Griesemer", "gri"},
}

w := csv.NewWriter(os.Stdout)
if err := w.WriteAll(records); err != nil { // calls Flush internally
log.Fatalf(ctx, "error writing csv: %v\n", err)
}

if err := w.Error(); err != nil {
log.Fatalf(ctx, "error writing csv: %v\n", err)
}
// Output:
// first_name,last_name,username
// Rob,Pike,rob
// Ken,Thompson,ken
// Robert,Griesemer,gri
}
Loading

0 comments on commit 67ffc7e

Please sign in to comment.