diff --git a/.chloggen/container_parser_expose_max_recombine_log.yaml b/.chloggen/container_parser_expose_max_recombine_log.yaml new file mode 100644 index 000000000000..b2b20f575478 --- /dev/null +++ b/.chloggen/container_parser_expose_max_recombine_log.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/stanza + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Expose recombine max log size option in the container parser configuration + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [33186] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/pkg/stanza/docs/operators/container.md b/pkg/stanza/docs/operators/container.md index 4cc972fbc5ed..8c3fe381eccc 100644 --- a/pkg/stanza/docs/operators/container.md +++ b/pkg/stanza/docs/operators/container.md @@ -9,6 +9,7 @@ The `container` operator parses logs in `docker`, `cri-o` and `containerd` forma | `id` | `container` | A unique identifier for the operator. | | `format` | `` | The container log format to use if it is known. Users can choose between `docker`, `crio` and `containerd`. If not set, the format will be automatically detected. | | `add_metadata_from_filepath` | `true` | Set if k8s metadata should be added from the file path. Requires the `log.file.path` field to be present. | +| `max_log_size` | `0` | The maximum bytes size of the recombined log when parsing partial logs. Once the size exceeds the limit, all received entries of the source will be combined and flushed. "0" of max_log_size means no limit. | | `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. | | `parse_from` | `body` | The [field](../types/field.md) from which the value will be parsed. | | `parse_to` | `attributes` | The [field](../types/field.md) to which the value will be parsed. | @@ -187,7 +188,10 @@ Configuration: -#### Parse the multiline as containerd container log and recombine into a single one +#### Parse multiline CRI container logs and recombine into a single one + +Kubernetes logs in the CRI format have a tag that indicates whether the log entry is part of a longer log line (P) +or the final entry (F). Using this tag, we can recombine the CRI logs back into complete log lines. Configuration: ```yaml diff --git a/pkg/stanza/operator/parser/container/config.go b/pkg/stanza/operator/parser/container/config.go index 1310d596c3b9..39c279c3369f 100644 --- a/pkg/stanza/operator/parser/container/config.go +++ b/pkg/stanza/operator/parser/container/config.go @@ -17,7 +17,11 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/transformer/recombine" ) -const operatorType = "container" +const ( + operatorType = "container" + recombineSourceIdentifier = "log.file.path" + recombineIsLastEntry = "attributes.logtag == 'F'" +) func init() { operator.Register(operatorType, func() operator.Builder { return NewConfig() }) @@ -34,6 +38,7 @@ func NewConfigWithID(operatorID string) *Config { ParserConfig: helper.NewParserConfig(operatorID, operatorType), Format: "", AddMetadataFromFilePath: true, + MaxLogSize: 0, } } @@ -41,8 +46,9 @@ func NewConfigWithID(operatorID string) *Config { type Config struct { helper.ParserConfig `mapstructure:",squash"` - Format string `mapstructure:"format"` - AddMetadataFromFilePath bool `mapstructure:"add_metadata_from_filepath"` + Format string `mapstructure:"format"` + AddMetadataFromFilePath bool `mapstructure:"add_metadata_from_filepath"` + MaxLogSize helper.ByteSize `mapstructure:"max_log_size,omitempty"` } // Build will build a Container parser operator. @@ -53,7 +59,7 @@ func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error } cLogEmitter := helper.NewLogEmitter(set) - recombineParser, err := createRecombine(set, cLogEmitter) + recombineParser, err := createRecombine(set, c, cLogEmitter) if err != nil { return nil, fmt.Errorf("failed to create internal recombine config: %w", err) } @@ -93,8 +99,8 @@ func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error // max_log_size: 102400 // source_identifier: attributes["log.file.path"] // type: recombine -func createRecombine(set component.TelemetrySettings, cLogEmitter *helper.LogEmitter) (operator.Operator, error) { - recombineParserCfg := createRecombineConfig() +func createRecombine(set component.TelemetrySettings, c Config, cLogEmitter *helper.LogEmitter) (operator.Operator, error) { + recombineParserCfg := createRecombineConfig(c) recombineParser, err := recombineParserCfg.Build(set) if err != nil { return nil, fmt.Errorf("failed to resolve internal recombine config: %w", err) @@ -109,12 +115,12 @@ func createRecombine(set component.TelemetrySettings, cLogEmitter *helper.LogEmi return recombineParser, nil } -func createRecombineConfig() *recombine.Config { +func createRecombineConfig(c Config) *recombine.Config { recombineParserCfg := recombine.NewConfigWithID(recombineInternalID) - recombineParserCfg.IsLastEntry = "attributes.logtag == 'F'" + recombineParserCfg.IsLastEntry = recombineIsLastEntry recombineParserCfg.CombineField = entry.NewBodyField() recombineParserCfg.CombineWith = "" - recombineParserCfg.SourceIdentifier = entry.NewAttributeField("log.file.path") - recombineParserCfg.MaxLogSize = 102400 + recombineParserCfg.SourceIdentifier = entry.NewAttributeField(recombineSourceIdentifier) + recombineParserCfg.MaxLogSize = c.MaxLogSize return recombineParserCfg } diff --git a/pkg/stanza/operator/parser/container/config_test.go b/pkg/stanza/operator/parser/container/config_test.go index 599c26c1b7fd..e2aeceb8b1d1 100644 --- a/pkg/stanza/operator/parser/container/config_test.go +++ b/pkg/stanza/operator/parser/container/config_test.go @@ -78,6 +78,14 @@ func TestConfig(t *testing.T) { return cfg }(), }, + { + Name: "max_log_size", + Expect: func() *Config { + cfg := NewConfig() + cfg.MaxLogSize = 10242 + return cfg + }(), + }, { Name: "parse_to_attributes", Expect: func() *Config { diff --git a/pkg/stanza/operator/parser/container/parser_test.go b/pkg/stanza/operator/parser/container/parser_test.go index 1d966705be8d..71e159daacb2 100644 --- a/pkg/stanza/operator/parser/container/parser_test.go +++ b/pkg/stanza/operator/parser/container/parser_test.go @@ -83,7 +83,7 @@ func TestFormatDetectionFailure(t *testing.T) { } func TestInternalRecombineCfg(t *testing.T) { - cfg := createRecombineConfig() + cfg := createRecombineConfig(Config{MaxLogSize: 102400}) expected := recombine.NewConfigWithID(recombineInternalID) expected.IsLastEntry = "attributes.logtag == 'F'" expected.CombineField = entry.NewBodyField() diff --git a/pkg/stanza/operator/parser/container/testdata/config.yaml b/pkg/stanza/operator/parser/container/testdata/config.yaml index fe174f3bb071..d44093243a6a 100644 --- a/pkg/stanza/operator/parser/container/testdata/config.yaml +++ b/pkg/stanza/operator/parser/container/testdata/config.yaml @@ -9,6 +9,9 @@ on_error_drop: add_metadata_from_file_path: type: container add_metadata_from_file_path: true +max_log_size: + type: container + max_log_size: 10242 parse_from_simple: type: container parse_from: body.from