Skip to content

Commit

Permalink
Feature/harubins/add multiline pattern config (#26460)
Browse files Browse the repository at this point in the history
**Description:** Add a flag to the multiline config that if set to true
will omit the pattern from the logs.

**Link to tracking Issue:**
#26381

**Testing:** manual testing and  unit tests in multile_test file.
  • Loading branch information
haimrubinstein authored Sep 19, 2023
1 parent 10851b4 commit e2e212c
Show file tree
Hide file tree
Showing 10 changed files with 240 additions and 7 deletions.
17 changes: 17 additions & 0 deletions .chloggen/add_multiline_pattern_omit_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add 'omit_pattern' setting to `split.Config`.

# One or more tracking issues related to the change
issues: [26381]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
This can be used omit the start or end pattern from a log entry.
2 changes: 2 additions & 0 deletions pkg/stanza/docs/operators/file_input.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ If set, the `multiline` configuration block instructs the `file_input` operator
The `multiline` configuration block must contain exactly one of `line_start_pattern` or `line_end_pattern`. These are regex patterns that
match either the beginning of a new log entry, or the end of a log entry.

The `omit_pattern` setting can be used to omit the start/end pattern from each entry.

If using multiline, last log can sometimes be not flushed due to waiting for more content.
In order to forcefully flush last buffered log after certain period of time,
use `force_flush_period` option.
Expand Down
2 changes: 2 additions & 0 deletions pkg/stanza/docs/operators/tcp_input.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ If set, the `multiline` configuration block instructs the `tcp_input` operator t
The `multiline` configuration block must contain exactly one of `line_start_pattern` or `line_end_pattern`. These are regex patterns that
match either the beginning of a new log entry, or the end of a log entry.

The `omit_pattern` setting can be used to omit the start/end pattern from each entry.

#### Supported encodings

| Key | Description
Expand Down
2 changes: 2 additions & 0 deletions pkg/stanza/docs/operators/udp_input.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ If set, the `multiline` configuration block instructs the `udp_input` operator t
The `multiline` configuration block must contain exactly one of `line_start_pattern` or `line_end_pattern`. These are regex patterns that
match either the beginning of a new log entry, or the end of a log entry.

The `omit_pattern` setting can be used to omit the start/end pattern from each entry.

#### Supported encodings

| Key | Description
Expand Down
21 changes: 17 additions & 4 deletions pkg/stanza/split/split.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
type Config struct {
LineStartPattern string `mapstructure:"line_start_pattern"`
LineEndPattern string `mapstructure:"line_end_pattern"`
OmitPattern bool `mapstructure:"omit_pattern"`
}

// Func will return a bufio.SplitFunc based on the config
Expand All @@ -37,20 +38,20 @@ func (c Config) Func(enc encoding.Encoding, flushAtEOF bool, maxLogSize int) (sp
if err != nil {
return nil, fmt.Errorf("compile line end regex: %w", err)
}
splitFunc = LineEndSplitFunc(re, flushAtEOF)
splitFunc = LineEndSplitFunc(re, c.OmitPattern, flushAtEOF)
case c.LineStartPattern != "":
re, err := regexp.Compile("(?m)" + c.LineStartPattern)
if err != nil {
return nil, fmt.Errorf("compile line start regex: %w", err)
}
splitFunc = LineStartSplitFunc(re, flushAtEOF)
splitFunc = LineStartSplitFunc(re, c.OmitPattern, flushAtEOF)
}
return splitFunc, nil
}

// LineStartSplitFunc creates a bufio.SplitFunc that splits an incoming stream into
// tokens that start with a match to the regex pattern provided
func LineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
func LineStartSplitFunc(re *regexp.Regexp, omitPattern bool, flushAtEOF bool) bufio.SplitFunc {
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
firstLoc := re.FindIndex(data)
if firstLoc == nil {
Expand Down Expand Up @@ -81,6 +82,10 @@ func LineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {

// Flush if no more data is expected
if atEOF && flushAtEOF {
if omitPattern {
return len(data), data[firstMatchEnd:], nil
}

return len(data), data, nil
}

Expand All @@ -90,6 +95,9 @@ func LineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
return 0, nil, nil // read more data and try again
}
secondMatchStart := secondLoc[0] + secondLocOfset
if omitPattern {
return secondMatchStart, data[firstMatchEnd:secondMatchStart], nil
}

// start scanning at the beginning of the second match
// the token begins at the first match, and ends at the beginning of the second match
Expand All @@ -99,7 +107,7 @@ func LineStartSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {

// LineEndSplitFunc creates a bufio.SplitFunc that splits an incoming stream into
// tokens that end with a match to the regex pattern provided
func LineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
func LineEndSplitFunc(re *regexp.Regexp, omitPattern bool, flushAtEOF bool) bufio.SplitFunc {
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
loc := re.FindIndex(data)
if loc == nil {
Expand All @@ -115,6 +123,11 @@ func LineEndSplitFunc(re *regexp.Regexp, flushAtEOF bool) bufio.SplitFunc {
if loc[1] == len(data)-1 && !atEOF {
return 0, nil, nil
}

if omitPattern {
return loc[1], data[:loc[0]], nil
}

return loc[1], data[:loc[1]], nil
}
}
Expand Down
Loading

0 comments on commit e2e212c

Please sign in to comment.