From e1aa8528bc4c46843558d218bf3c164af51a9938 Mon Sep 17 00:00:00 2001 From: Chris Mark Date: Thu, 1 Feb 2024 20:56:11 +0200 Subject: [PATCH] [receiver/filelog] Add docs for offset tracking (#30914) **Description:** This PR adds documentation notes on how to achieve fault tolerance on `filelog`'s receiver offset tracking. The need for this is obvious but was also explained at https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/20552#issuecomment-1908225519. **Link to tracking Issue:** **Testing:** **Documentation:** --------- Signed-off-by: ChrsMark --- .../fault-tolerant-logs-collection/README.md | 9 +++ .../otel-col-config.yaml | 24 +++++++ examples/my-config.yaml | 63 +++++++++++++++++++ receiver/filelogreceiver/README.md | 7 ++- 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 examples/fault-tolerant-logs-collection/README.md create mode 100644 examples/fault-tolerant-logs-collection/otel-col-config.yaml create mode 100644 examples/my-config.yaml diff --git a/examples/fault-tolerant-logs-collection/README.md b/examples/fault-tolerant-logs-collection/README.md new file mode 100644 index 000000000000..54f868a3eb9a --- /dev/null +++ b/examples/fault-tolerant-logs-collection/README.md @@ -0,0 +1,9 @@ +## Fault tolerant log collection example + +Filelog receiver's persistence can be covered by the usage of the following extensions: +- [filestorage](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/storage/filestorage) extension, + to ensure that Collector's restarts do not affect the log collection and offset tracking. +- [exporterhelper persistent-queue](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/exporterhelper/README.md#persistent-queue), + to ensure that Collector's restarts do not affect the delivery of the already collected logs. + +A full configuration example is provided in [example config](./otel-col-config.yaml) \ No newline at end of file diff --git a/examples/fault-tolerant-logs-collection/otel-col-config.yaml b/examples/fault-tolerant-logs-collection/otel-col-config.yaml new file mode 100644 index 000000000000..eec53399f20a --- /dev/null +++ b/examples/fault-tolerant-logs-collection/otel-col-config.yaml @@ -0,0 +1,24 @@ +receivers: + filelog: + include: [/var/log/busybox/simple.log] + storage: file_storage/filelogreceiver + +extensions: + file_storage/filelogreceiver: + directory: /var/lib/otelcol/file_storage/receiver + file_storage/otlpoutput: + directory: /var/lib/otelcol/file_storage/output + +service: + extensions: [file_storage/filelogreceiver, file_storage/otlpoutput] + pipelines: + logs: + receivers: [filelog] + exporters: [otlp/custom] + processors: [] + +exporters: + otlp/custom: + endpoint: http://0.0.0.0:4242 + sending_queue: + storage: file_storage/otlpoutput diff --git a/examples/my-config.yaml b/examples/my-config.yaml new file mode 100644 index 000000000000..ebc3a2c488a2 --- /dev/null +++ b/examples/my-config.yaml @@ -0,0 +1,63 @@ +receivers: + filelog: + include: + #- /var/log/busybox/*.log + - /var/lib/docker/containers/cf79f880f414937e7befa0e4d2770590a19d83058b4f5df0e1cd22d819c836b3/cf79f880f414937e7befa0e4d2770590a19d83058b4f5df0e1cd22d819c836b3-json.log + #storage: file_storage/filelogreceiver + #start_at: beginning + operators: + - id: get-format + routes: + - expr: body matches "^\\{" + output: parser-docker + type: router + - id: parser-docker + timestamp: + layout: '%Y-%m-%dT%H:%M:%S.%LZ' + parse_from: attributes.time + type: json_parser + - from: attributes.log + to: body + type: move + +processors: + transform: + error_mode: ignore + log_statements: + - context: log + statements: + # Parse body as JSON and merge the resulting map with the cache map, ignoring non-json bodies. + # cache is a field exposed by OTTL that is a temporary storage place for complex operations. + - merge_maps(cache, ParseJSON(body), "upsert") where IsMatch(body, "^\\{") + + # Set attributes using the values merged into cache. + # If the attribute doesn't exist in cache then nothing happens. + - set(attributes["message"], cache["message"]) + - set(attributes["severity"], cache["log.level"]) + - merge_maps(attributes, cache, "upsert") + +extensions: + file_storage/filelogreceiver: + directory: /home/chrismark/otelcol/file_storage/freceiver + file_storage/otcouput: + directory: /home/chrismark/otelcol/file_storage/output + +service: + extensions: [file_storage/filelogreceiver, file_storage/otcouput] + pipelines: + logs: + receivers: [filelog] + exporters: [otlp/elastic] + processors: [transform] +# telemetry: +# logs: +# level: "debug" + +exporters: + otlp/elastic: + endpoint: http://0.0.0.0:8200 + sending_queue: + storage: file_storage/otcouput + tls: + insecure: true + insecure_skip_verify: true diff --git a/receiver/filelogreceiver/README.md b/receiver/filelogreceiver/README.md index 8f0795d4d7ed..518afe40873a 100644 --- a/receiver/filelogreceiver/README.md +++ b/receiver/filelogreceiver/README.md @@ -42,7 +42,7 @@ Tails and parses logs from files. | `attributes` | {} | A map of `key: value` pairs to add to the entry's attributes. | | `resource` | {} | A map of `key: value` pairs to add to the entry's resource. | | `operators` | [] | An array of [operators](../../pkg/stanza/docs/operators/README.md#what-operators-are-available). See below for more details. | -| `storage` | none | The ID of a storage extension to be used to store file checkpoints. File checkpoints allow the receiver to pick up where it left off in the case of a collector restart. If no storage extension is used, the receiver will manage checkpoints in memory only. | +| `storage` | none | The ID of a storage extension to be used to store file offsets. File offsets allow the receiver to pick up where it left off in the case of a collector restart. If no storage extension is used, the receiver will manage offsets in memory only. | | `header` | nil | Specifies options for parsing header metadata. Requires that the `filelog.allowHeaderMetadataParsing` feature gate is enabled. See below for details. Must be `false` when `start_at` is set to `end`. | | `header.pattern` | required for header metadata parsing | A regex that matches every header line. | | `header.metadata_operators` | required for header metadata parsing | A list of operators used to parse metadata from the header. | @@ -153,4 +153,9 @@ The above configuration will read logs from the "simple.log" file. Some examples 2023-06-20 12:50:00 DEBUG This is a test debug message ``` +## Offset tracking +`storage` setting allows to define the proper storage extension to be used for storing file offsets. +While the storage parameter can ensure that log files are consumed accurately, it is possible that +logs are dropped while moving downstream through other components in the collector. +For additional resiliency, see [Fault tolerant log collection example](../../examples/fault-tolerant-logs-collection/README.md)