Skip to content

Commit

Permalink
Fix bad bytes count in docker input when filtering by stream (#10211)
Browse files Browse the repository at this point in the history
This error happened when a reader restarts after having filtered one
stream (stderr or stdout) before:

```
2018-12-04T02:32:18.636Z        ERROR   log/harvester.go:281    Read line error: parsing CRI timestamp: parsing time "3.chunk.js" as "2006-01-02T15:04:05Z07:00": cannot parse "unk.js" as "2006"; File: /var/lib/docker/containers/2a7d435274655d581cbdba63c8477042a30583c7cb84c9b2ef5a4c5af9bd2da7/2a7d435274655d581cbdba63c8477042a30583c7cb84c9b2ef5a4c5af9bd2da7-json.log
```

Reason is bytes count is wrong, as filtered lines are not accounted.
Uppon reader restart, it begins reading from the wrong offset, resuling
in an parsing error.

This PR fixes bytes count for this case, and adds tests to make sure the
count is now right.
  • Loading branch information
exekias authored Jan 22, 2019
1 parent daa4911 commit f5cf3d2
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
{issue}9836[9836] error log: {pull}9869[9869], access log: {pull}9955[9955].
- Support haproxy log lines without captured headers. {issue}9463[9463] {pull}9958[9958]
- Make elasticsearch/audit fileset be more lenient in parsing node name. {issue}10035[10035] {pull}10135[10135]
- Fix bad bytes count in `docker` input when filtering by stream. {pull}10211[10211]

*Heartbeat*

Expand Down
12 changes: 11 additions & 1 deletion libbeat/reader/readjson/docker_json.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,14 @@ func (p *DockerJSONReader) parseLine(message *reader.Message, msg *logLine) erro

// Next returns the next line.
func (p *DockerJSONReader) Next() (reader.Message, error) {
var bytes int
for {
message, err := p.reader.Next()

// keep the right bytes count even if we return an error
bytes += message.Bytes
message.Bytes = bytes

if err != nil {
return message, err
}
Expand All @@ -185,6 +191,11 @@ func (p *DockerJSONReader) Next() (reader.Message, error) {
// Handle multiline messages, join partial lines
for p.partial && logLine.Partial {
next, err := p.reader.Next()

// keep the right bytes count even if we return an error
bytes += next.Bytes
message.Bytes = bytes

if err != nil {
return message, err
}
Expand All @@ -193,7 +204,6 @@ func (p *DockerJSONReader) Next() (reader.Message, error) {
return message, err
}
message.Content = append(message.Content, next.Content...)
message.Bytes += next.Bytes
}

if p.stream != "all" && p.stream != logLine.Stream {
Expand Down
38 changes: 34 additions & 4 deletions libbeat/reader/readjson/docker_json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,36 @@ func TestDockerJSON(t *testing.T) {
input: [][]byte{[]byte(`this is not JSON`)},
stream: "all",
expectedError: true,
expectedMessage: reader.Message{
Bytes: 16,
},
},
{
name: "Wrong CRI",
input: [][]byte{[]byte(`2017-09-12T22:32:21.212861448Z stdout`)},
stream: "all",
expectedError: true,
expectedMessage: reader.Message{
Bytes: 37,
},
},
{
name: "Wrong CRI",
input: [][]byte{[]byte(`{this is not JSON nor CRI`)},
stream: "all",
expectedError: true,
expectedMessage: reader.Message{
Bytes: 25,
},
},
{
name: "Missing time",
input: [][]byte{[]byte(`{"log":"1:M 09 Nov 13:27:36.276 # User requested shutdown...\n","stream":"stdout"}`)},
stream: "all",
expectedError: true,
expectedMessage: reader.Message{
Bytes: 82,
},
},
{
name: "CRI log no tags",
Expand All @@ -98,7 +110,7 @@ func TestDockerJSON(t *testing.T) {
criflags: true,
},
{
name: "Filtering stream",
name: "Filtering stream, bytes count accounts for all (filtered and not)",
input: [][]byte{
[]byte(`{"log":"filtered\n","stream":"stdout","time":"2017-11-09T13:27:36.277747246Z"}`),
[]byte(`{"log":"unfiltered\n","stream":"stderr","time":"2017-11-09T13:27:36.277747246Z"}`),
Expand All @@ -109,11 +121,11 @@ func TestDockerJSON(t *testing.T) {
Content: []byte("unfiltered\n"),
Fields: common.MapStr{"stream": "stderr"},
Ts: time.Date(2017, 11, 9, 13, 27, 36, 277747246, time.UTC),
Bytes: 80,
Bytes: 158,
},
},
{
name: "Filtering stream",
name: "Filtering CRI stream, bytes count accounts for all (filtered and not)",
input: [][]byte{
[]byte(`2017-10-12T13:32:21.232861448Z stdout F 2017-10-12 13:32:21.212 [INFO][88] table.go 710: Invalidating dataplane cache`),
[]byte(`2017-11-12T23:32:21.212771448Z stderr F 2017-11-12 23:32:21.212 [ERROR][77] table.go 111: error`),
Expand All @@ -124,7 +136,7 @@ func TestDockerJSON(t *testing.T) {
Content: []byte("2017-11-12 23:32:21.212 [ERROR][77] table.go 111: error"),
Fields: common.MapStr{"stream": "stderr"},
Ts: time.Date(2017, 11, 12, 23, 32, 21, 212771448, time.UTC),
Bytes: 95,
Bytes: 212,
},
criflags: true,
},
Expand Down Expand Up @@ -196,6 +208,9 @@ func TestDockerJSON(t *testing.T) {
stream: "all",
forceCRI: true,
expectedError: true,
expectedMessage: reader.Message{
Bytes: 82,
},
},
{
name: "Force CRI log no tags",
Expand Down Expand Up @@ -257,6 +272,19 @@ func TestDockerJSON(t *testing.T) {
forceCRI: true,
criflags: true,
},
{
name: "Error parsing still keeps good bytes count",
input: [][]byte{
[]byte(`{"log":"1:M 09 Nov 13:27:36.276 # User requested ","stream":"stdout","time":"2017-11-09T13:27:36.277747246Z"}`),
[]byte(`{"log":"shutdown...\n","stream`),
},
stream: "stdout",
expectedError: true,
expectedMessage: reader.Message{
Bytes: 139,
},
partial: true,
},
}

for _, test := range tests {
Expand All @@ -273,6 +301,8 @@ func TestDockerJSON(t *testing.T) {

if err == nil {
assert.EqualValues(t, test.expectedMessage, message)
} else {
assert.Equal(t, test.expectedMessage.Bytes, message.Bytes)
}
})
}
Expand Down

0 comments on commit f5cf3d2

Please sign in to comment.