Skip to content

Commit

Permalink
[chore][pkg/stanza] Add syslog octet counting test cases with whitesp…
Browse files Browse the repository at this point in the history
…ace scenarios (#32832)

Resolves #31477

---------

Co-authored-by: Tiffany Hrabusa <30397949+tiffany76@users.noreply.github.com>
Co-authored-by: Curtis Robert <crobert@splunk.com>
  • Loading branch information
3 people authored May 14, 2024
1 parent 9ce4f20 commit a35ad07
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 16 deletions.
21 changes: 19 additions & 2 deletions pkg/stanza/operator/input/syslog/input_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,23 @@ func TestInput(t *testing.T) {
for _, tc := range cases {
cfg := tc.Config.BaseConfig
if tc.ValidForTCP {
tcpCfg := NewConfigWithTCP(&cfg)
if tc.Name == syslog.RFC6587OctetCountingPreserveSpaceTest {
tcpCfg.TCP.TrimConfig.PreserveLeading = true
tcpCfg.TCP.TrimConfig.PreserveTrailing = true
}
t.Run(fmt.Sprintf("TCP-%s", tc.Name), func(t *testing.T) {
InputTest(t, tc, NewConfigWithTCP(&cfg), nil, nil)
InputTest(t, tc, tcpCfg, nil, nil)
})
}
if tc.ValidForUDP {
udpCfg := NewConfigWithUDP(&cfg)
if tc.Name == syslog.RFC6587OctetCountingPreserveSpaceTest {
udpCfg.UDP.TrimConfig.PreserveLeading = true
udpCfg.UDP.TrimConfig.PreserveTrailing = true
}
t.Run(fmt.Sprintf("UDP-%s", tc.Name), func(t *testing.T) {
InputTest(t, tc, NewConfigWithUDP(&cfg), nil, nil)
InputTest(t, tc, udpCfg, nil, nil)
})
}
}
Expand Down Expand Up @@ -268,6 +278,13 @@ func TestOctetFramingSplitFunc(t *testing.T) {
splittest.ExpectToken(`17 my log LOGEND 123`),
},
},
{
name: "OneLogTrailingSpace",
input: []byte(`84 <13>1 2024-02-28T03:32:00.313226+00:00 192.168.65.1 inactive - - - partition is p2 `),
steps: []splittest.Step{
splittest.ExpectToken(`84 <13>1 2024-02-28T03:32:00.313226+00:00 192.168.65.1 inactive - - - partition is p2 `),
},
},
{
name: "TwoLogsSimple",
input: []byte(`17 my log LOGEND 12317 my log LOGEND 123`),
Expand Down
31 changes: 31 additions & 0 deletions pkg/stanza/operator/parser/syslog/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry"
)

// This is the name of a test which requires setting the PreserveWhitespace flags.
const RFC6587OctetCountingPreserveSpaceTest = "RFC6587 Octet Counting Preserve Space"

type Case struct {
Name string
Config *Config
Expand Down Expand Up @@ -340,6 +343,34 @@ func CreateCases(basicConfig func() *Config) ([]Case, error) {
true,
false,
},
{
RFC6587OctetCountingPreserveSpaceTest,
func() *Config {
cfg := basicConfig()
cfg.Protocol = RFC5424
cfg.EnableOctetCounting = true
return cfg
}(),
&entry.Entry{
Body: `77 <86>1 2015-08-05T21:58:59.693Z 192.168.2.132 inactive - - - partition is p2 `,
},
&entry.Entry{
Timestamp: time.Date(2015, 8, 5, 21, 58, 59, 693000000, time.UTC),
Severity: entry.Info,
SeverityText: "info",
Attributes: map[string]any{
"appname": "inactive",
"facility": 10,
"hostname": "192.168.2.132",
"message": " partition is p2 ",
"priority": 86,
"version": 1,
},
Body: `77 <86>1 2015-08-05T21:58:59.693Z 192.168.2.132 inactive - - - partition is p2 `,
},
true,
false,
},
{
"RFC6587 Non-Transparent-framing",
func() *Config {
Expand Down
80 changes: 66 additions & 14 deletions receiver/syslogreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,28 +44,80 @@ Each operator performs a simple responsibility, such as parsing a timestamp or J

### UDP Configuration

| Field | Default | Description |
| --- | --- | --- |
| `listen_address` | required | A listen address of the form `<ip>:<port>` |
| Field | Default | Description |
|---------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------|
| `listen_address` | required | A listen address of the form `<ip>:<port>`. |
| `add_attributes` | false | Adds `net.*` attributes according to OpenTelemetry semantic conventions. |
| `multiline` | | A `multiline` configuration block. See below for details. |
| `one_log_per_packet` | false | Skip log tokenization, set to true if logs contain one log per record and multiline is not used. This will improve performance. |
| `preserve_leading_whitespaces` | false | Whether to preserve leading whitespaces. |
| `preserve_trailing_whitespaces` | false | Whether to preserve trailing whitespaces. |
| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options. |
| `async` | nil | An `async` configuration block. See below for details. |

### TCP Configuration

| Field | Default | Description |
| --- | --- | --- |
| `max_buffer_size` | `1024kib` | Maximum size of buffer that may be allocated while reading TCP input |
| `listen_address` | required | A listen address of the form `<ip>:<port>` |
| `tls` | | An optional `TLS` configuration (see the TLS configuration section) |
| Field | Default | Description |
|---------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------|
| `max_log_size` | `1MiB` | The maximum size of a log entry to read before failing. Protects against reading large amounts of data into memory. |
| `listen_address` | required | A listen address of the form `<ip>:<port>`. |
| `tls` | nil | An optional `TLS` configuration (see the TLS configuration section). |
| `add_attributes` | false | Adds `net.*` attributes according to OpenTelemetry semantic conventions. |
| `multiline` | | A `multiline` configuration block. See below for details. |
| `one_log_per_packet` | false | Skip log tokenization, set to true if logs contain one log per record and multiline is not used. This will improve performance. |
| `preserve_leading_whitespaces` | false | Whether to preserve leading whitespaces. |
| `preserve_trailing_whitespaces` | false | Whether to preserve trailing whitespaces. |
| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options. |

#### TLS Configuration

The `tcp_input` operator supports TLS, disabled by default.

| Field | Default | Description |
| --- | --- | --- |
| `cert_file` | | Path to the TLS cert to use for TLS required connections. |
| `key_file` | | Path to the TLS key to use for TLS required connections.|
| `ca_file` | | Path to the CA cert. For a client this verifies the server certificate. For a server this verifies client certificates. If empty uses system root CA. |
| `client_ca_file` | | (optional) Path to the TLS cert to use by the server to verify a client certificate. This sets the ClientCAs and ClientAuth to RequireAndVerifyClientCert in the TLSConfig. Please refer to godoc.org/crypto/tls#Config for more information. |
| Field | Default | Description |
|------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `cert_file` | | Path to the TLS cert to use for TLS required connections. |
| `key_file` | | Path to the TLS key to use for TLS required connections. |
| `ca_file` | | Path to the CA cert. For a client this verifies the server certificate. For a server this verifies client certificates. If empty, the system root CA is used. |
| `client_ca_file` | | (optional) Path to the TLS cert to use by the server to verify a client certificate. This sets the ClientCAs and ClientAuth to RequireAndVerifyClientCert in the TLSConfig. Please refer to godoc.org/crypto/tls#Config for more information. |

#### `multiline` configuration

If set, the `multiline` configuration block instructs the `udp_input` operator to split log entries on a pattern other than newlines.

**note** If `multiline` is not set at all, it won't split log entries at all. Every UDP packet is going to be treated as a log.
**note** `multiline` detection works per UDP packet due to protocol limitations.

The `multiline` configuration block must contain exactly one of `line_start_pattern` or `line_end_pattern`. These are regex patterns that
match either the beginning of a new log entry, or the end of a log entry.

The `omit_pattern` setting can be used to omit the start/end pattern from each entry.

#### Supported encodings

| Key | Description |
|------------|------------------------------------------------------------------|
| `nop` | No encoding validation. Treats the file as a stream of raw bytes |
| `utf-8` | UTF-8 encoding |
| `utf-16le` | UTF-16 encoding with little-endian byte order |
| `utf-16be` | UTF-16 encoding with big-endian byte order |
| `ascii` | ASCII encoding |
| `big5` | The Big5 Chinese character encoding |

Other less common encodings are supported on a best-effort basis.
See [https://www.iana.org/assignments/character-sets/character-sets.xhtml](https://www.iana.org/assignments/character-sets/character-sets.xhtml)
for other encodings available.

#### `async` configuration

If set, the `async` configuration block instructs the `udp_input` operator to read and process logs asynchronously and concurrently.

**note** If `async` is not set at all, a single thread will read & process lines synchronously.

| Field | Default | Description |
|--------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `readers` | 1 | Concurrency level - Determines how many go routines read from UDP port and push to channel (to be handled by processors). |
| `processors` | 1 | Concurrency level - Determines how many go routines read from channel (pushed by readers) and process logs before sending downstream. |
| `max_queue_length` | 100 | Determines max number of messages which may be waiting for a processor. While the queue is full, the readers will wait until there's room (readers will not drop messages, but they will not read additional incoming messages during that period). |

## Additional Terminology and Features

Expand Down

0 comments on commit a35ad07

Please sign in to comment.