diff --git a/pkg/stanza/operator/input/syslog/input_test.go b/pkg/stanza/operator/input/syslog/input_test.go index 5e90d3242967..262ba309e09a 100644 --- a/pkg/stanza/operator/input/syslog/input_test.go +++ b/pkg/stanza/operator/input/syslog/input_test.go @@ -106,13 +106,23 @@ func TestInput(t *testing.T) { for _, tc := range cases { cfg := tc.Config.BaseConfig if tc.ValidForTCP { + tcpCfg := NewConfigWithTCP(&cfg) + if tc.Name == syslog.RFC6587OctetCountingPreserveSpaceTest { + tcpCfg.TCP.TrimConfig.PreserveLeading = true + tcpCfg.TCP.TrimConfig.PreserveTrailing = true + } t.Run(fmt.Sprintf("TCP-%s", tc.Name), func(t *testing.T) { - InputTest(t, tc, NewConfigWithTCP(&cfg), nil, nil) + InputTest(t, tc, tcpCfg, nil, nil) }) } if tc.ValidForUDP { + udpCfg := NewConfigWithUDP(&cfg) + if tc.Name == syslog.RFC6587OctetCountingPreserveSpaceTest { + udpCfg.UDP.TrimConfig.PreserveLeading = true + udpCfg.UDP.TrimConfig.PreserveTrailing = true + } t.Run(fmt.Sprintf("UDP-%s", tc.Name), func(t *testing.T) { - InputTest(t, tc, NewConfigWithUDP(&cfg), nil, nil) + InputTest(t, tc, udpCfg, nil, nil) }) } } @@ -268,6 +278,13 @@ func TestOctetFramingSplitFunc(t *testing.T) { splittest.ExpectToken(`17 my log LOGEND 123`), }, }, + { + name: "OneLogTrailingSpace", + input: []byte(`84 <13>1 2024-02-28T03:32:00.313226+00:00 192.168.65.1 inactive - - - partition is p2 `), + steps: []splittest.Step{ + splittest.ExpectToken(`84 <13>1 2024-02-28T03:32:00.313226+00:00 192.168.65.1 inactive - - - partition is p2 `), + }, + }, { name: "TwoLogsSimple", input: []byte(`17 my log LOGEND 12317 my log LOGEND 123`), diff --git a/pkg/stanza/operator/parser/syslog/data.go b/pkg/stanza/operator/parser/syslog/data.go index 835f8a1a3258..d9481882349c 100644 --- a/pkg/stanza/operator/parser/syslog/data.go +++ b/pkg/stanza/operator/parser/syslog/data.go @@ -11,6 +11,9 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" ) +// This is the name of a test which requires setting the PreserveWhitespace flags. +const RFC6587OctetCountingPreserveSpaceTest = "RFC6587 Octet Counting Preserve Space" + type Case struct { Name string Config *Config @@ -340,6 +343,34 @@ func CreateCases(basicConfig func() *Config) ([]Case, error) { true, false, }, + { + RFC6587OctetCountingPreserveSpaceTest, + func() *Config { + cfg := basicConfig() + cfg.Protocol = RFC5424 + cfg.EnableOctetCounting = true + return cfg + }(), + &entry.Entry{ + Body: `77 <86>1 2015-08-05T21:58:59.693Z 192.168.2.132 inactive - - - partition is p2 `, + }, + &entry.Entry{ + Timestamp: time.Date(2015, 8, 5, 21, 58, 59, 693000000, time.UTC), + Severity: entry.Info, + SeverityText: "info", + Attributes: map[string]any{ + "appname": "inactive", + "facility": 10, + "hostname": "192.168.2.132", + "message": " partition is p2 ", + "priority": 86, + "version": 1, + }, + Body: `77 <86>1 2015-08-05T21:58:59.693Z 192.168.2.132 inactive - - - partition is p2 `, + }, + true, + false, + }, { "RFC6587 Non-Transparent-framing", func() *Config { diff --git a/receiver/syslogreceiver/README.md b/receiver/syslogreceiver/README.md index 2af50cb0132a..0faf67541024 100644 --- a/receiver/syslogreceiver/README.md +++ b/receiver/syslogreceiver/README.md @@ -44,28 +44,80 @@ Each operator performs a simple responsibility, such as parsing a timestamp or J ### UDP Configuration -| Field | Default | Description | -| --- | --- | --- | -| `listen_address` | required | A listen address of the form `:` | +| Field | Default | Description | +|---------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------| +| `listen_address` | required | A listen address of the form `:`. | +| `add_attributes` | false | Adds `net.*` attributes according to OpenTelemetry semantic conventions. | +| `multiline` | | A `multiline` configuration block. See below for details. | +| `one_log_per_packet` | false | Skip log tokenization, set to true if logs contain one log per record and multiline is not used. This will improve performance. | +| `preserve_leading_whitespaces` | false | Whether to preserve leading whitespaces. | +| `preserve_trailing_whitespaces` | false | Whether to preserve trailing whitespaces. | +| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options. | +| `async` | nil | An `async` configuration block. See below for details. | ### TCP Configuration -| Field | Default | Description | -| --- | --- | --- | -| `max_buffer_size` | `1024kib` | Maximum size of buffer that may be allocated while reading TCP input | -| `listen_address` | required | A listen address of the form `:` | -| `tls` | | An optional `TLS` configuration (see the TLS configuration section) | +| Field | Default | Description | +|---------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------| +| `max_log_size` | `1MiB` | The maximum size of a log entry to read before failing. Protects against reading large amounts of data into memory. | +| `listen_address` | required | A listen address of the form `:`. | +| `tls` | nil | An optional `TLS` configuration (see the TLS configuration section). | +| `add_attributes` | false | Adds `net.*` attributes according to OpenTelemetry semantic conventions. | +| `multiline` | | A `multiline` configuration block. See below for details. | +| `one_log_per_packet` | false | Skip log tokenization, set to true if logs contain one log per record and multiline is not used. This will improve performance. | +| `preserve_leading_whitespaces` | false | Whether to preserve leading whitespaces. | +| `preserve_trailing_whitespaces` | false | Whether to preserve trailing whitespaces. | +| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options. | #### TLS Configuration The `tcp_input` operator supports TLS, disabled by default. -| Field | Default | Description | -| --- | --- | --- | -| `cert_file` | | Path to the TLS cert to use for TLS required connections. | -| `key_file` | | Path to the TLS key to use for TLS required connections.| -| `ca_file` | | Path to the CA cert. For a client this verifies the server certificate. For a server this verifies client certificates. If empty uses system root CA. | -| `client_ca_file` | | (optional) Path to the TLS cert to use by the server to verify a client certificate. This sets the ClientCAs and ClientAuth to RequireAndVerifyClientCert in the TLSConfig. Please refer to godoc.org/crypto/tls#Config for more information. | +| Field | Default | Description | +|------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `cert_file` | | Path to the TLS cert to use for TLS required connections. | +| `key_file` | | Path to the TLS key to use for TLS required connections. | +| `ca_file` | | Path to the CA cert. For a client this verifies the server certificate. For a server this verifies client certificates. If empty, the system root CA is used. | +| `client_ca_file` | | (optional) Path to the TLS cert to use by the server to verify a client certificate. This sets the ClientCAs and ClientAuth to RequireAndVerifyClientCert in the TLSConfig. Please refer to godoc.org/crypto/tls#Config for more information. | + +#### `multiline` configuration + +If set, the `multiline` configuration block instructs the `udp_input` operator to split log entries on a pattern other than newlines. + +**note** If `multiline` is not set at all, it won't split log entries at all. Every UDP packet is going to be treated as a log. +**note** `multiline` detection works per UDP packet due to protocol limitations. + +The `multiline` configuration block must contain exactly one of `line_start_pattern` or `line_end_pattern`. These are regex patterns that +match either the beginning of a new log entry, or the end of a log entry. + +The `omit_pattern` setting can be used to omit the start/end pattern from each entry. + +#### Supported encodings + +| Key | Description | +|------------|------------------------------------------------------------------| +| `nop` | No encoding validation. Treats the file as a stream of raw bytes | +| `utf-8` | UTF-8 encoding | +| `utf-16le` | UTF-16 encoding with little-endian byte order | +| `utf-16be` | UTF-16 encoding with big-endian byte order | +| `ascii` | ASCII encoding | +| `big5` | The Big5 Chinese character encoding | + +Other less common encodings are supported on a best-effort basis. +See [https://www.iana.org/assignments/character-sets/character-sets.xhtml](https://www.iana.org/assignments/character-sets/character-sets.xhtml) +for other encodings available. + +#### `async` configuration + +If set, the `async` configuration block instructs the `udp_input` operator to read and process logs asynchronously and concurrently. + +**note** If `async` is not set at all, a single thread will read & process lines synchronously. + +| Field | Default | Description | +|--------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `readers` | 1 | Concurrency level - Determines how many go routines read from UDP port and push to channel (to be handled by processors). | +| `processors` | 1 | Concurrency level - Determines how many go routines read from channel (pushed by readers) and process logs before sending downstream. | +| `max_queue_length` | 100 | Determines max number of messages which may be waiting for a processor. While the queue is full, the readers will wait until there's room (readers will not drop messages, but they will not read additional incoming messages during that period). | ## Additional Terminology and Features