Skip to content

Commit

Permalink
Establish redacted exceptions as the default for strict mode
Browse files Browse the repository at this point in the history
  • Loading branch information
beatrichartz committed Sep 24, 2023
1 parent a3571ff commit c92ea24
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 36 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,12 @@ occur, aborting the operation:
File.stream!("data.csv") |> CSV.decode!
````

Redact data in exceptions that `decode!` throws to avoid potentially sensitive data showing up in logs:
```elixir
File.stream!("data.csv") |> CSV.decode!(redact_exception: true)
```


#### Options

For all available options [check the docs on `decode`](https://hexdocs.pm/csv/CSV.html#decode/2)
Expand Down
42 changes: 21 additions & 21 deletions lib/csv.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,27 @@ defmodule CSV do
These are the options:
* `:separator` – The separator token to use, defaults to `?,`.
* `:separator` – The separator token to use, defaults to `?,`.
Must be a codepoint (syntax: ? + (your separator)).
* `:escape_character` – The escape character token to use, defaults to `?"`.
* `:escape_character` – The escape character token to use, defaults to `?"`.
Must be a codepoint (syntax: ? + (your escape character)).
* `:escape_max_lines` – The number of lines an escape sequence is allowed
* `:escape_max_lines` – The number of lines an escape sequence is allowed
to span, defaults to 10.
* `:field_transform` – A function with arity 1 that will get called with
* `:field_transform` – A function with arity 1 that will get called with
each field and can apply transformations. Defaults to identity function.
This function will get called for every field and therefore should return
quickly.
* `:headers` – When set to `true`, will take the first row of
* `:headers` – When set to `true`, will take the first row of
the csv and use it as header values.
When set to a list, will use the given list as header values.
When set to `false` (default), will use no header values.
When set to anything but `false`, the resulting rows in the matrix will
be maps instead of lists.
* `:validate_row_length` – When set to `true`, will take the first row of
* `:validate_row_length` – When set to `true`, will take the first row of
the csv or its headers and validate that following rows are of the same
length. Defaults to `false`.
* `:unescape_formulas` – When set to `true`, will remove formula escaping
* `:unescape_formulas` – When set to `true`, will remove formula escaping
inserted to prevent [CSV Injection](https://owasp.org/www-community/attacks/CSV_Injection).
* `:redact_exception` – When set to `true`, will remove line data from
exception message output. This is to prevent sensitive data leaking in
logs
## Examples
Expand Down Expand Up @@ -151,7 +148,6 @@ defmodule CSV do
| {:validate_row_length, boolean()}
| {:escape_character, char()}
| {:escape_max_lines, integer()}
| {:redact_exception, boolean()}

@spec decode(Enumerable.t(), [decode_options()]) :: Enumerable.t()
def decode(stream, options \\ []) do
Expand Down Expand Up @@ -185,6 +181,10 @@ defmodule CSV do
length. Will raise an error if validation fails. Defaults to `false`.
* `:unescape_formulas` – When set to `true`, will remove formula escaping
inserted to prevent [CSV Injection](https://owasp.org/www-community/attacks/CSV_Injection).
* `:unredact_exceptions` – When set to `true`, will show csv data in
message output of exceptions thrown. Only use this when using CSV strict
mode in environments and situations where there is no concern with
exception message data leaking in logs. Defaults to `false`.
## Examples
Expand Down Expand Up @@ -284,36 +284,36 @@ defmodule CSV do
"""

@spec decode!(Enumerable.t(), [decode_options()]) :: Enumerable.t()
@spec decode!(Enumerable.t(), [decode_options() | {:unredact_exceptions, boolean()}]) ::
Enumerable.t()
def decode!(stream, options \\ []) do
stream |> Decoder.decode(options) |> raise_errors!(options)
end

defp raise_errors!(stream, options) do
escape_max_lines = options |> Keyword.get(:escape_max_lines, @escape_max_lines)
redact_exception = options |> Keyword.get(:redact_exception, false)
unredact_exceptions = options |> Keyword.get(:unredact_exceptions, false)

stream |> Stream.map(&yield_or_raise!(&1, escape_max_lines, redact_exception))
stream |> Stream.map(&yield_or_raise!(&1, escape_max_lines, unredact_exceptions))
end

defp yield_or_raise!({:error, mod, args}, _, redact_exception) do
raise mod, args ++ [mode: :strict, redact: redact_exception]
defp yield_or_raise!({:error, mod, args}, _, unredact_exceptions) do
raise mod, args ++ [mode: :strict, unredact: unredact_exceptions]
end

defp yield_or_raise!({:ok, row}, _, _), do: row

defp inline_errors!(stream, options) do
escape_max_lines = options |> Keyword.get(:escape_max_lines, @escape_max_lines)
redact_exception = options |> Keyword.get(:redact_exception, false)

stream |> Stream.map(&yield_or_inline!(&1, escape_max_lines, redact_exception))
stream |> Stream.map(&yield_or_inline!(&1, escape_max_lines))
end

defp yield_or_inline!({:error, mod, args},_, redact_exception) do
{:error, mod.exception(args ++ [mode: :normal, redact: redact_exception]).message}
defp yield_or_inline!({:error, mod, args}, _) do
{:error, mod.exception(args ++ [mode: :normal, unredact: true]).message}
end

defp yield_or_inline!(value, _, _), do: value
defp yield_or_inline!(value, _), do: value

@doc """
Encode a table stream into a stream of RFC 4180 compliant CSV lines for
Expand Down
19 changes: 13 additions & 6 deletions lib/csv/exceptions.ex
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ defmodule CSV.StrayEscapeCharacterError do

def exception(options) do
line = options |> Keyword.fetch!(:line)
sequence = options |> Keyword.fetch!(:sequence)
redact = options |> Keyword.get(:redact, false)
unredact = options |> Keyword.get(:unredact, false)
sequence = options |> Keyword.fetch!(:sequence) |> get_sequence(unredact)

message =
"Stray escape character on line #{line}:" <>
"\n\n#{get_sequence(redact, sequence)}" <>
"\n\n#{sequence}" <>
"\n\nThis error often happens when the wrong separator or escape character has been applied.\n"

%__MODULE__{
Expand All @@ -43,8 +43,8 @@ defmodule CSV.StrayEscapeCharacterError do
}
end

defp get_sequence(true, _), do: "[redacted]"
defp get_sequence(false, sequence), do: sequence
defp get_sequence(_, false), do: "**redacted**"
defp get_sequence(sequence, true), do: sequence
end

defmodule CSV.EscapeSequenceError do
Expand All @@ -58,7 +58,11 @@ defmodule CSV.EscapeSequenceError do
def exception(options) do
line = options |> Keyword.fetch!(:line)
stream_halted = options |> Keyword.get(:stream_halted, false)
escape_sequence_start = options |> Keyword.fetch!(:escape_sequence_start)
unredact = options |> Keyword.get(:unredact, false)

escape_sequence_start =
options |> Keyword.fetch!(:escape_sequence_start) |> get_sequence(unredact)

mode = options |> Keyword.fetch!(:mode)

continues_parsing =
Expand Down Expand Up @@ -90,4 +94,7 @@ defmodule CSV.EscapeSequenceError do
message: message
}
end

defp get_sequence(_, false), do: "**redacted**"
defp get_sequence(sequence, true), do: sequence
end
25 changes: 25 additions & 0 deletions test/csv_exceptions_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defmodule CSVExceptionsTest do
import TestSupport.StreamHelpers

alias CSV.RowLengthError
alias CSV.StrayEscapeCharacterError

test "decodes in normal mode emitting errors with row lengths when configured" do
stream = ~w(a,be a c,d) |> to_line_stream
Expand Down Expand Up @@ -38,6 +39,30 @@ defmodule CSVExceptionsTest do
end
end

test "decodes in strict mode redacting error messages" do
stream = ~w(a,be a" c,d) |> to_line_stream

expected_message =
"Stray escape character on line 2:\n\n**redacted**\n\n" <>
"This error often happens when the wrong separator or escape character has been applied.\n"

assert_raise StrayEscapeCharacterError, expected_message, fn ->
CSV.decode!(stream) |> Stream.run()
end
end

test "decodes in strict mode allowing error messages to be unredacted" do
stream = ~w(a,be a" c,d) |> to_line_stream

expected_message =
"Stray escape character on line 2:\n\na\"\n\n" <>
"This error often happens when the wrong separator or escape character has been applied.\n"

assert_raise StrayEscapeCharacterError, expected_message, fn ->
CSV.decode!(stream, unredact_exceptions: true) |> Stream.run()
end
end

test "returns encoding errors as is with rows in normal mode" do
stream = [<<"Diego,Fern", 225, "ndez">>, "John,Smith"] |> to_line_stream
result = CSV.decode(stream) |> Enum.to_list()
Expand Down
4 changes: 2 additions & 2 deletions test/escaped_fields_exceptions_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ defmodule DecodingTests.EscapedFieldsExceptionsTest do
test "raises errors for stray quotes in strict mode and redacts sequence" do
stream = [",sensitive\",", ",c,d"] |> to_line_stream

assert_raise CSV.StrayEscapeCharacterError, ~r/\[redacted\]/, fn ->
CSV.decode!(stream, redact_exception: true) |> Stream.run()
assert_raise CSV.StrayEscapeCharacterError, ~r/\*\*redacted\*\*/, fn ->
CSV.decode!(stream) |> Stream.run()
end
end
end
35 changes: 28 additions & 7 deletions test/exceptions_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ defmodule ExceptionsTest do
mode: :normal,
stream_halted: true,
escape_sequence_start: "SEQUENCE START",
escape_max_lines: 2
escape_max_lines: 2,
unredact: true
)

assert exception.message ==
Expand All @@ -39,7 +40,26 @@ defmodule ExceptionsTest do
line: 1,
mode: :strict,
escape_sequence_start: "SEQUENCE START",
escape_max_lines: 2
escape_max_lines: 2,
unredact: false
)

assert exception.message ==
"Escape sequence started on line 1:\n\n**redacted**\n\ndid not terminate. " <>
"You can use normal mode to continue parsing rows even if single rows have errors.\n\n" <>
"Escape sequences are allowed to span up to 2 lines. This threshold avoids " <>
"collecting the whole file into memory when an escape sequence does not terminate.\n" <>
"You can change it using the escape_max_lines option: https://hexdocs.pm/csv/CSV.html#decode/2\n"
end

test "unredacted exception messaging about unfinished escape sequences" do
exception =
EscapeSequenceError.exception(
line: 1,
mode: :strict,
escape_sequence_start: "SEQUENCE START",
escape_max_lines: 2,
unredact: true
)

assert exception.message ==
Expand All @@ -50,18 +70,19 @@ defmodule ExceptionsTest do
"You can change it using the escape_max_lines option: https://hexdocs.pm/csv/CSV.html#decode/2\n"
end

test "exception messaging about stray escape character errors" do
exception = StrayEscapeCharacterError.exception(line: 1, sequence: "THIS")
test "exception messaging about stray escape character errors shows line when unredacted" do
exception = StrayEscapeCharacterError.exception(line: 1, sequence: "THIS", unredact: true)

assert exception.message ==
"Stray escape character on line 1:\n\nTHIS\n\nThis error often happens when the wrong separator or escape character has been applied.\n"
end

test "exception messaging about stray escape character errors can be redacted" do
exception = StrayEscapeCharacterError.exception(line: 1, sequence: "sensitive", redact: true)
test "exception messaging about stray escape character errors is redacted by default" do
exception =
StrayEscapeCharacterError.exception(line: 1, sequence: "sensitive", unredact: false)

assert exception.message ==
"Stray escape character on line 1:\n\n[redacted]\n\nThis error often happens when the wrong separator or escape character has been applied.\n"
"Stray escape character on line 1:\n\n**redacted**\n\nThis error often happens when the wrong separator or escape character has been applied.\n"

refute exception.message =~ "sensitive"
end
Expand Down

0 comments on commit c92ea24

Please sign in to comment.