Skip to content

Commit

Permalink
W3C plugin (#307)
Browse files Browse the repository at this point in the history
* initial w3c plugin

* use bool not string

* w3c test cases and schema

* remove fields header after csv parser

* typo

Co-authored-by: EricWHolt <39141134+ericwholt@users.noreply.github.com>

* remove duplicate param

* fix min stanza version

* use stanza 1.2.0

* use go 1.16

Co-authored-by: EricWHolt <39141134+ericwholt@users.noreply.github.com>
  • Loading branch information
Joseph Sirianni and ericwholt authored Aug 25, 2021
1 parent 35d90b3 commit dac1c33
Show file tree
Hide file tree
Showing 10 changed files with 285 additions and 47 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v1
with:
go-version: '1.16'
- name: go test
run: make test
135 changes: 135 additions & 0 deletions plugins/w3c.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Plugin Info
version: 0.0.1
title: W3C
description: File Input W3C Parser
min_stanza_version: 1.2.0
parameters:
- name: file_log_path
label: File Path
description: Specify a single path or multiple paths to read one or many files. You may also use a wildcard (*) to read multiple files within a directory.
type: strings
required: true
- name: exclude_file_log_path
label: Exclude File Path
description: Specify a single path or multiple paths to exclude one or many files from being read. You may also use a wildcard (*) to exclude multiple files from being read within a directory.
type: strings
default: []
- name: encoding
label: Encoding
description: Specify the encoding of the file(s) being read. In most cases, you can leave the default option selected.
type: enum
valid_values:
- nop
- utf-8
- utf-16le
- utf-16be
- ascii
- big5
default: nop
- name: log_type
label: Type
description: Adds the specified 'Type' as a label to each log message.
type: string
default: w3c
- name: start_at
label: Start At
description: Start reading file from 'beginning' or 'end'
type: enum
valid_values:
- beginning
- end
default: end
- name: include_file_name
label: Include File Name
description: Include File Name as a label
type: bool
default: true
- name: include_file_path
label: Include File Path
description: Include File Path as a label
type: bool
default: false
- name: include_file_name_resolved
label: Include Resolved File Name
description: Same as include_file_name, however, if file name is a symlink, the underlying file's name will be set as a label
type: bool
default: false
- name: include_file_path_resolved
label: Include Resolved File Path
description: Same as include_file_path, however, if file path is a symlink, the underlying file's path will be set as a label
type: bool
default: false
- name: fields_header
label: W3C Fields Header
description: The W3C header which specifies the field names
type: string
default: Fields
- name: delimiter
label: Delimiter
description: Delimiter character used between fields (Defaults to a tab character)
type: string
default: "\t"
- name: header_delimiter
label: Header Delimiter
description: Header Delimiter character used between values in a header (Defaults to a tab character)
type: string
default: "\t"
# Set Defaults
# {{$encoding := default "nop" .encoding}}
# {{$log_type := default "w3c" .log_type}}
# {{$start_at := default "end" .start_at}}
# {{$include_file_name := default true .include_file_name}}
# {{$include_file_path := default false .include_file_path}}
# {{$include_file_name_resolved := default false .include_file_name_resolved}}
# {{$include_file_path_resolved := default false .include_file_path_resolved}}
# {{$fields_header := default "Fields" .fields_header}}
# {{$delimiter := default "\t" .delimiter}}
# {{$header_delimiter := default "\t" .header_delimiter}}

# Pipeline Template
pipeline:
- type: file_input
start_at: '{{ $start_at }}'
label_regex: '^#(?P<key>.*?): (?P<value>.*)'
include_file_name: {{ $include_file_name }}
include_file_path: {{ $include_file_path }}
include_file_name_resolved: {{ $include_file_name_resolved }}
include_file_path_resolved: {{ $include_file_path_resolved }}
include:
# {{ range $i, $fp := .file_log_path }}
- '{{ $fp }}'
# {{ end }}
# {{ if .exclude_file_log_path }}
exclude:
# {{ range $i, $efp := .exclude_file_log_path }}
- '{{ $efp }}'
# {{ end }}
# {{ end }}
# {{ if $encoding }}
encoding: '{{ $encoding }}'
# {{ end }}
labels:
plugin_id: {{ .id }}
log_type: '{{ $log_type }}'

# Ignore header lines that may exists in the file periodically
# or at the end. File input has already read the headers at the top
# of the file, and attached them as labels to each entry.
# For example, some w3c logs may have these two fields at th end of a file:
#
# #End-Date: 2021-07-21 14:40:00
# #X-Records: 41373
#
- type: filter
expr: '$record matches "^#"'

# Leverage CSV parser's dynamic field name detection by specifying
# delimiter, header_delimiter, and header_label
- type: csv_parser
delimiter: '{{ $delimiter }}'
header_delimiter: '{{ $header_delimiter }}'
header_label: '{{ $fields_header }}'

- type: remove
field: '$labels.{{ $fields_header }}'
output: {{ .output }}
45 changes: 45 additions & 0 deletions schemas/w3c.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
version: str()
title: str()
description: str()
parameters: list(include('parameter'))
pipeline: list(include('operator'))
---
parameter:
name: str()
label: str()
description: str()
type: str()
valid_values: list(required=False)

operator:
id: str(required=False)
type: str()
include: list(required=False)
start_at: str(required=False)
labels: map(str(), required=False)
output: str(required=False)
regex: str(required=False)
timestamp: map(include('timestamp_list'), str(), map(), required=False)
severity: map(include('severity_list'), str(), map(), required=False)

timestamp_list:
parse_from: str()
layout: str()

severity_list:
parse_from: str()
preset: str(required=False)
mapping: list(include('mapping_list'), str(), required=False)
preserve_to: str(required=False)
if: str(required=False)

mapping_list:
info: str(required=False)
error: str(required=False)
warning: str(required=False)
critical: str(required=False)
debug: str(required=False)
trace: str(required=False)
alert: str(required=False)
emergency: str(required=False)
catastrophe: str(required=False)
5 changes: 5 additions & 0 deletions test/configs/w3c/invalid/invalid_encoding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pipeline:
- type: w3c
file_log_path: "/tmp/in.log"
encoding: "stable"
- type: stdout
5 changes: 5 additions & 0 deletions test/configs/w3c/invalid/invalid_start_at.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pipeline:
- type: w3c
file_log_path: "/tmp/in.log"
start_at: endf
- type: stdout
18 changes: 18 additions & 0 deletions test/configs/w3c/valid/full.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
pipeline:
- type: w3c
file_log_path:
- "/tmp/in.log"
- "/var/log/w3c/*"
exclude_file_log_path:
- "/var/log/w3c/*.tar.gz"
encoding: "ascii"
log_type: "custom_w3c"
stat_at: "beginning"
include_file_name: false
include_file_path: true
include_file_name_resolved: true
include_file_path_resolved: true
fields_header: LogFields
delimiter: "-"
header_delimiter: ":"
- type: stdout
5 changes: 5 additions & 0 deletions test/configs/w3c/valid/minimal.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pipeline:
- type: w3c
file_log_path:
- "/tmp/in.log"
- type: stdout
6 changes: 6 additions & 0 deletions test/configs/w3c/valid/minimal_2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pipeline:
- type: w3c
file_log_path:
- "/tmp/in.log"
- "/var/log/w3c/*"
- type: stdout
2 changes: 1 addition & 1 deletion test/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.16

require (
github.com/go-git/go-git/v5 v5.4.1
github.com/observiq/stanza v1.1.7
github.com/observiq/stanza v1.2.0
github.com/stretchr/testify v1.7.0
github.com/testcontainers/testcontainers-go v0.11.1
go.uber.org/zap v1.17.0
Expand Down
Loading

0 comments on commit dac1c33

Please sign in to comment.