From fd5360486e025eedc67bbc34ec52629398347af8 Mon Sep 17 00:00:00 2001 From: Christoph Wurm Date: Fri, 1 Nov 2019 10:34:42 +0000 Subject: [PATCH] GA the script processor (#14325) Mark the `script` processor as GA. Since being introduced in 7.2, it has seen a good amount of adoption. Several modules in Filebeat and Winlogbeat are built on top of it. --- CHANGELOG.next.asciidoc | 1 + libbeat/docs/processors-using.asciidoc | 1543 +++++++++++++++++++++++- 2 files changed, 1543 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 415cf6de85b..d5197556ee3 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -277,6 +277,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Marking Central Management deprecated. {pull}14018[14018] - Add `keep_null` setting to allow Beats to publish null values in events. {issue}5522[5522] {pull}13928[13928] - Add shared_credential_file option in aws related config for specifying credential file directory. {issue}14157[14157] {pull}14178[14178] +- GA the `script` processor. {pull}14325[14325] *Auditbeat* diff --git a/libbeat/docs/processors-using.asciidoc b/libbeat/docs/processors-using.asciidoc index 87dd305aabb..9aa3fa61a6b 100644 --- a/libbeat/docs/processors-using.asciidoc +++ b/libbeat/docs/processors-using.asciidoc @@ -467,4 +467,1545 @@ not: status: OK ------ -include::processors-list.asciidoc[tag=processors-include] +[[add-cloud-metadata]] +=== Add cloud metadata + +The `add_cloud_metadata` processor enriches each event with instance metadata +from the machine's hosting provider. At startup it will query a list of hosting +providers and cache the instance metadata. + +The following cloud providers are supported: + +- Amazon Web Services (AWS) +- Digital Ocean +- Google Compute Engine (GCE) +- https://www.qcloud.com/?lang=en[Tencent Cloud] (QCloud) +- Alibaba Cloud (ECS) +- Azure Virtual Machine +- Openstack Nova + +The Alibaba Cloud and Tencent cloud providers are disabled by default, because +they require to access a remote host. The `providers` setting allows users to +select a list of default providers to query. + +The simple configuration below enables the processor. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_cloud_metadata: ~ +------------------------------------------------------------------------------- + +The `add_cloud_metadata` processor has three optional configuration settings. +The first one is `timeout` which specifies the maximum amount of time to wait +for a successful response when detecting the hosting provider. The default +timeout value is `3s`. + +If a timeout occurs then no instance metadata will be added to the events. This +makes it possible to enable this processor for all your deployments (in the +cloud or on-premise). + +The second optional setting is `providers`. The `providers` settings accepts a +list of cloud provider names to be used. If `providers` is not configured, then +all providers that do not access a remote endpoint are enabled by default. + +List of names the `providers` setting supports: + +- "alibaba", or "ecs" for the Alibaba Cloud provider (disabled by default). +- "azure" for Azure Virtual Machine (enabled by default). +- "digitalocean" for Digital Ocean (enabled by default). +- "aws", or "ec2" for Amazon Web Services (enabled by default). +- "gcp" for Google Copmute Enging (enabled by default). +- "openstack", or "nova" for Openstack Nova (enabled by default). +- "tencent", or "qcloud" for Tencent Cloud (disabled by default). + +The third optional configuration setting is `overwrite`. When `overwrite` is +`true`, `add_cloud_metadata` overwrites existing `cloud.*` fields (`false` by +default). + +The metadata that is added to events varies by hosting provider. Below are +examples for each of the supported providers. + +_AWS_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "account.id": "123456789012", + "availability_zone": "us-east-1c", + "instance.id": "i-4e123456", + "machine.type": "t2.medium", + "image.id": "ami-abcd1234", + "provider": "aws", + "region": "us-east-1" + } +} +------------------------------------------------------------------------------- + +_Digital Ocean_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "instance.id": "1234567", + "provider": "digitalocean", + "region": "nyc2" + } +} +------------------------------------------------------------------------------- + +_GCP_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "availability_zone": "us-east1-b", + "instance.id": "1234556778987654321", + "machine.type": "f1-micro", + "project.id": "my-dev", + "provider": "gcp" + } +} +------------------------------------------------------------------------------- + +_Tencent Cloud_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "availability_zone": "gz-azone2", + "instance.id": "ins-qcloudv5", + "provider": "qcloud", + "region": "china-south-gz" + } +} +------------------------------------------------------------------------------- + +_Alibaba Cloud_ + +This metadata is only available when VPC is selected as the network type of the +ECS instance. + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "availability_zone": "cn-shenzhen", + "instance.id": "i-wz9g2hqiikg0aliyun2b", + "provider": "ecs", + "region": "cn-shenzhen-a" + } +} +------------------------------------------------------------------------------- + +_Azure Virtual Machine_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "provider": "az", + "instance.id": "04ab04c3-63de-4709-a9f9-9ab8c0411d5e", + "instance.name": "test-az-vm", + "machine.type": "Standard_D3_v2", + "region": "eastus2" + } +} +------------------------------------------------------------------------------- + +_Openstack Nova_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "instance.name": "test-998d932195.mycloud.tld", + "instance.id": "i-00011a84", + "availability_zone": "xxxx-az-c", + "provider": "openstack", + "machine.type": "m2.large" + } +} +------------------------------------------------------------------------------- + +[[add-fields]] +=== Add fields + +The `add_fields` processor adds additional fields to the event. Fields can be +scalar values, arrays, dictionaries, or any nested combination of these. By +default the fields that you specify will be grouped under the `fields` +sub-dictionary in the event. To group the fields under a different +sub-dictionary, use the `target` setting. To store the fields as +top-level fields, set `target: ''`. + +`target`:: (Optional) Sub-dictionary to put all fields into. Defaults to `fields`. +`fields`:: Fields to be added. + + +For example, this configuration: + +[source,yaml] +------------------------------------------------------------------------------ +processors: +- add_fields: + target: project + fields: + name: myproject + id: '574734885120952459' +------------------------------------------------------------------------------ + +Adds these fields to any event: + +[source,json] +------------------------------------------------------------------------------- +{ + "project": { + "name": "myproject", + "id": "574734885120952459" + } +} +------------------------------------------------------------------------------- + + +[[add-labels]] +=== Add labels + +The `add_labels` processors adds a set of key-value pairs to an event. +The processor will flatten nested configuration objects like arrays or +dictionaries into a fully qualified name by merging nested names with a `.`. +Array entries create numeric names starting with 0. Labels are always stored +under the Elastic Common Schema compliant `labels` sub-dictionary. + +`labels`:: dictionaries of labels to be added. + +For example, this configuration: + +[source,yaml] +------------------------------------------------------------------------------ +processors: +- add_labels: + labels: + number: 1 + with.dots: test + nested: + with.dots: nested + array: + - do + - re + - with.field: mi +------------------------------------------------------------------------------ + +Adds these fields to every event: + +[source,json] +------------------------------------------------------------------------------- +{ + "labels": { + "number": 1, + "with.dots": "test", + "nested.with.dots": "nested", + "array.0": "do", + "array.1": "re", + "array.2.with.field": "mi" + } +} +------------------------------------------------------------------------------- + + +[[add-locale]] +=== Add the local time zone + +The `add_locale` processor enriches each event with the machine's time zone +offset from UTC or with the name of the time zone. It supports one configuration +option named `format` that controls whether an offset or time zone abbreviation +is added to the event. The default format is `offset`. The processor adds the +a `event.timezone` value to each event. + +The configuration below enables the processor with the default settings. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_locale: ~ +------------------------------------------------------------------------------- + +This configuration enables the processor and configures it to add the time zone +abbreviation to events. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_locale: + format: abbreviation +------------------------------------------------------------------------------- + +NOTE: Please note that `add_locale` differentiates between daylight savings +time (DST) and regular time. For example `CEST` indicates DST and and `CET` is +regular time. + +[[add-tags]] +=== Add tags + +The `add_tags` processor adds tags to a list of tags. If the target field already exists, +the tags are appended to the existing list of tags. + +`tags`:: List of tags to add. +`target`:: (Optional) Field the tags will be added to. Defaults to `tags`. + +For example, this configuration: + + +[source,yaml] +------------------------------------------------------------------------------ +processors: +- add_tags: + tags: [web, production] + target: "environment" +------------------------------------------------------------------------------ + +Adds the environment field to every event: + +[source,json] +------------------------------------------------------------------------------- +{ + "environment": ["web", "production"] +} +------------------------------------------------------------------------------- + +ifdef::has_decode_cef_processor[] +[[processor-decode-cef]] +[role="xpack"] +=== Decode CEF + +beta[] + +The `decode_cef` processor decodes Common Event Format (CEF) messages. This +processor is available in Filebeat. + +Below is an example configuration that decodes the `message` field as CEF after +renaming it to `event.original`. It is best to rename `message` to +`event.original` because the decoded CEF data contains its own `message` field. + +[source,yaml] +---- +processors: +- rename: + fields: + - {from: "message", to: "event.original"} +- decode_cef: + field: event.original +---- + +The `decode_cef` processor has the following configuration settings. + +.Decode CEF options +[options="header"] +|====== +| `field` | no | message | Source field containing the CEF message to be parsed. | +| `target_field` | no | cef | Target field where the parsed CEF object will be written. | +| `ecs` | no | true | Generate Elastic Common Schema (ECS) fields from the CEF data. + Certain CEF header and extension values will be used to populate ECS fields. | +| `ignore_missing` | no | false | Ignore errors when the source field is missing. | +| `ignore_failure` | no | false | Ignore failures when the source field does not contain a CEF message. | +| `id` | no | | An identifier for this processor instance. Useful for debugging. | +|====== +endif::[] + +ifdef::has_decode_csv_fields_processor[] +[[decode-csv-fields]] +=== Decode CSV fields + +experimental[] + +The `decode_csv_fields` processor decodes fields containing records in +comma-separated format (CSV). It will output the values as an array of strings. +This processor is available for Filebeat and Journalbeat. + +[source,yaml] +----------------------------------------------------- +processors: + - decode_csv_fields: + fields: + message: decoded.csv + separator: , + ignore_missing: false + overwrite_keys: true + trim_leading_whitespace: false + fail_on_error: true +----------------------------------------------------- + +The `decode_csv_fields` has the following settings: + +`fields`:: This is a mapping from the source field containing the CSV data to + the destination field to which the decoded array will be written. +`separator`:: (Optional) Character to be used as a column separator. + The default is the comma character. For using a TAB character you + must set it to "\t". +`ignore_missing`:: (Optional) Whether to ignore events which lack the source + field. The default is `false`, which will fail processing of + an event if a field is missing. +`overwrite_keys`:: Whether the target field is overwritten if it + already exists. The default is false, which will fail + processing of an event when `target` already exists. +`trim_leading_space`:: Whether extra space after the separator is trimmed from + values. This works even if the separator is also a space. + The default is `false`. +`fail_on_error`:: (Optional) If set to true, in case of an error the changes to +the event are reverted, and the original event is returned. If set to `false`, +processing continues also if an error happens. Default is `true`. + +endif::[] + +[[decode-json-fields]] +=== Decode JSON fields + +The `decode_json_fields` processor decodes fields containing JSON strings and +replaces the strings with valid JSON objects. + +[source,yaml] +----------------------------------------------------- +processors: + - decode_json_fields: + fields: ["field1", "field2", ...] + process_array: false + max_depth: 1 + target: "" + overwrite_keys: false + add_error_key: true +----------------------------------------------------- + +The `decode_json_fields` processor has the following configuration settings: + +`fields`:: The fields containing JSON strings to decode. +`process_array`:: (Optional) A boolean that specifies whether to process +arrays. The default is false. +`max_depth`:: (Optional) The maximum parsing depth. The default is 1. +`target`:: (Optional) The field under which the decoded JSON will be written. By +default the decoded JSON object replaces the string field from which it was +read. To merge the decoded JSON fields into the root of the event, specify +`target` with an empty string (`target: ""`). Note that the `null` value (`target:`) +is treated as if the field was not set at all. +`overwrite_keys`:: (Optional) A boolean that specifies whether keys that already +exist in the event are overwritten by keys from the decoded JSON object. The +default value is false. +`add_error_key`:: (Optional) If it set to true, in case of error while decoding json keys +`error` field is going to be part of event with error message. If it set to false, there +will not be any error in event's field. Even error occurs while decoding json keys. The +default value is false + + +[[decode-base64-field]] +=== Decode Base64 fields + +The `decode_base64_field` processor specifies a field to base64 decode. +The `field` key contains a `from: old-key` and a `to: new-key` pair. `from` is +the origin and `to` the target name of the field. + +To overwrite fields either first rename the target field or use the `drop_fields` +processor to drop the field and then rename the field. + +[source,yaml] +------- +processors: +- decode_base64_field: + field: + from: "field1" + to: "field2" + ignore_missing: false + fail_on_error: true +------- + +In the example above: + - field1 is decoded in field2 + +The `decode_base64_field` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If set to true, no error is logged in case a key +which should be base64 decoded is missing. Default is `false`. + +`fail_on_error`:: (Optional) If set to true, in case of an error the base6 4decode +of fields is stopped and the original event is returned. If set to false, decoding +continues also if an error happened during decoding. Default is `true`. + +See <> for a list of supported conditions. + +[[decompress-gzip-field]] +=== Decompress gzip fields + +The `decompress_gzip_field` processor specifies a field to gzip decompress. +The `field` key contains a `from: old-key` and a `to: new-key` pair. `from` is +the origin and `to` the target name of the field. + +To overwrite fields either first rename the target field or use the `drop_fields` +processor to drop the field and then rename the field. + +[source,yaml] +------- +processors: +- decompress_gzip_field: + field: + from: "field1" + to: "field2" + ignore_missing: false + fail_on_error: true +------- + +In the example above: + - field1 is decoded in field2 + +The `decompress_gzip_field` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If set to true, no error is logged in case a key +which should be base64 decoded is missing. Default is `false`. + +`fail_on_error`:: (Optional) If set to true, in case of an error the base6 4decode +of fields is stopped and the original event is returned. If set to false, decoding +continues also if an error happened during decoding. Default is `true`. + +See <> for a list of supported conditions. + +[[community-id]] +=== Community ID Network Flow Hash + +The `community_id` processor computes a network flow hash according to the +https://github.com/corelight/community-id-spec[Community ID Flow Hash +specification]. + +The flow hash is useful for correlating all network events related to a +single flow. For example you can filter on a community ID value and you might +get back the Netflow records from multiple collectors and layer 7 protocol +records from Packetbeat. + +By default the processor is configured to read the flow parameters from the +appropriate Elastic Common Schema (ECS) fields. If you are processing ECS data +then no parameters are required. + +[source,yaml] +---- +processors: + - community_id: +---- + +If the data does not conform to ECS then you can customize the field names +that the processor reads from. You can also change the `target` field which +is where the computed hash is written to. + +[source,yaml] +---- +processors: + - community_id: + fields: + source_ip: my_source_ip + source_port: my_source_port + destination_ip: my_dest_ip + destination_port: my_dest_port + iana_number: my_iana_number + transport: my_transport + icmp_type: my_icmp_type + icmp_code: my_icmp_code + target: network.community_id +---- + +If the necessary fields are not present in the event then the processor will +silently continue without adding the target field. + +The processor also accepts an optional `seed` parameter that must be a 16-bit +unsigned integer. This value gets incorporated into all generated hashes. + +[[convert]] +=== Convert + +The `convert` processor converts a field in the event to a different type, such +as converting a string to an integer. + +The supported types include: `integer`, `long`, `float`, `double`, `string`, +`boolean`, and `ip`. + +The `ip` type is effectively an alias for `string`, but with an added validation +that the value is an IPv4 or IPv6 address. + +[source,yaml] +---- +processors: + - convert: + fields: + - {from: "src_ip", to: "source.ip", type: "ip"} + - {from: "src_port", to: "source.port", type: "integer"} + ignore_missing: true + fail_on_error: false +---- + +The `convert` processor has the following configuration settings: + +`fields`:: (Required) This is the list of fields to convert. At least one item +must be contained in the list. Each item in the list must have a `from` key that +specifies the source field. The `to` key is optional and specifies where to +assign the converted value. If `to` is omitted then the `from` field is updated +in-place. The `type` key specifies the data type to convert the value to. If +`type` is omitted then the processor copies or renames the field without any +type conversion. + +`ignore_missing`:: (Optional) If `true` the processor continues to the next +field when the `from` key is not found in the event. If false then the processor +returns an error and does not process the remaining fields. Default is `false`. + +`fail_on_error`:: (Optional) If false type conversion failures are ignored and +the processor continues to the next field. Default is `true`. + +`tag`:: (Optional) An identifier for this processor. Useful for debugging. + +`mode`:: (Optional) When both `from` and `to` are defined for a field then +`mode` controls whether to `copy` or `rename` the field when the type conversion +is successful. Default is `copy`. + +[[drop-event]] +=== Drop events + +The `drop_event` processor drops the entire event if the associated condition +is fulfilled. The condition is mandatory, because without one, all the events +are dropped. + +[source,yaml] +------ +processors: + - drop_event: + when: + condition +------ + +See <> for a list of supported conditions. + +[[drop-fields]] +=== Drop fields from events + +The `drop_fields` processor specifies which fields to drop if a certain +condition is fulfilled. The condition is optional. If it's missing, the +specified fields are always dropped. The `@timestamp` and `type` fields cannot +be dropped, even if they show up in the `drop_fields` list. + +[source,yaml] +----------------------------------------------------- +processors: + - drop_fields: + when: + condition + fields: ["field1", "field2", ...] + ignore_missing: false +----------------------------------------------------- + +See <> for a list of supported conditions. + +NOTE: If you define an empty list of fields under `drop_fields`, then no fields +are dropped. + +The `drop_fields` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If `true` the processor will not return an error +when a specified field does not exist. Defaults to `false`. + +[[extract-array]] +=== Extract array + +experimental[] + +The `extract_array` processor populates fields with values read from an array +field. The following example will populate `source.ip` with the first element of +the `my_array` field, `destination.ip` with the second element, and +`network.transport` with the third. + +[source,yaml] +----------------------------------------------------- +processors: + - extract_array: + field: my_array + mappings: + source.ip: 0 + destination.ip: 1 + network.transport: 2 +----------------------------------------------------- + +The following settings are supported: + +`field`:: The array field whose elements are to be extracted. +`mappings`:: Maps each field name to an array index. Use 0 for the first element in + the array. Multiple fields can be mapped to the same array element. +`ignore_missing`:: (Optional) Whether to ignore events where the array field is + missing. The default is `false`, which will fail processing + of an event if the specified field does not exist. Set it to + `true` to ignore this condition. +`overwrite_keys`:: Whether the target fields specified in the mapping are + overwritten if they already exist. The default is `false`, + which will fail processing if a target field already exists. +`fail_on_error`:: (Optional) If set to `true` and an error happens, changes to + the event are reverted, and the original event is returned. If + set to `false`, processing continues despite errors. + Default is `true`. +`omit_empty`:: (Optional) Whether empty values are extracted from the array. If + set to `true`, instead of the target field being set to an + empty value, it is left unset. The empty string (`""`), an + empty array (`[]`) or an empty object (`{}`) are considered + empty values. Default is `false`. + +[[include-fields]] +=== Keep fields from events + +The `include_fields` processor specifies which fields to export if a certain +condition is fulfilled. The condition is optional. If it's missing, the +specified fields are always exported. The `@timestamp` and `type` fields are +always exported, even if they are not defined in the `include_fields` list. + +[source,yaml] +------- +processors: + - include_fields: + when: + condition + fields: ["field1", "field2", ...] +------- + +See <> for a list of supported conditions. + +You can specify multiple `include_fields` processors under the `processors` +section. + +NOTE: If you define an empty list of fields under `include_fields`, then only +the required fields, `@timestamp` and `type`, are exported. + +[[processor-registered-domain]] +=== Registered Domain + +The `registered_domain` processor reads a field containing a hostname and then +writes the "registered domain" contained in the hostname to the target field. +For example, given `www.google.co.uk` the processor would output `google.co.uk`. +In other words the "registered domain" is the effective top-level domain +(`co.uk`) plus one level (`google`). + +This processor uses the Mozilla Public Suffix list to determine the value. + +[source,yaml] +---- +processors: +- registered_domain: + field: dns.question.name + target_field: dns.question.registered_domain + ignore_missing: true + ignore_failure: true +---- + +The `registered_domain` processor has the following configuration settings: + +.Registered Domain options +[options="header"] +|====== +| Name | Required | Default | Description | +| `field` | yes | | Source field containing a fully qualified domain name (FQDN). | +| `target_field` | yes | | Target field for the registered domain value. | +| `ignore_missing` | no | false | Ignore errors when the source field is missing. | +| `ignore_failure` | no | false | Ignore all errors produced by the processor. | +| `id` | no | | An identifier for this processor instance. Useful for debugging. | +|====== + +[[rename-fields]] +=== Rename fields from events + +The `rename` processor specifies a list of fields to rename. Under the `fields` +key each entry contains a `from: old-key` and a `to: new-key` pair. `from` is +the origin and `to` the target name of the field. + +Renaming fields can be useful in cases where field names cause conflicts. For +example if an event has two fields, `c` and `c.b`, that are both assigned scalar +values (e.g. `{"c": 1, "c.b": 2}`) this will result in an Elasticsearch error at +ingest time. This is because the value of a cannot simultaneously be a scalar +and an object. To prevent this rename_fields can be used to rename `c` to +`c.value`. + +Rename fields cannot be used to overwrite fields. To overwrite fields either +first rename the target field or use the `drop_fields` processor to drop the +field and then rename the field. + +[source,yaml] +------- +processors: +- rename: + fields: + - from: "a.g" + to: "e.d" + ignore_missing: false + fail_on_error: true +------- + +The `rename` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If set to true, no error is logged in case a key +which should be renamed is missing. Default is `false`. + +`fail_on_error`:: (Optional) If set to true, in case of an error the renaming of +fields is stopped and the original event is returned. If set to false, renaming +continues also if an error happened during renaming. Default is `true`. + +See <> for a list of supported conditions. + +You can specify multiple `ignore_missing` processors under the `processors` +section. + +[[add-kubernetes-metadata]] +=== Add Kubernetes metadata + +The `add_kubernetes_metadata` processor annotates each event with relevant +metadata based on which Kubernetes pod the event originated from. +At startup it detects an `in_cluster` environment and caches the +Kubernetes-related metadata. Events are only annotated if a valid configuration +is detected. If it's not able to detect a valid Kubernetes configuration, +the events are not annotated with Kubernetes-related metadata. + +Each event is annotated with: + +* Pod Name +* Pod UID +* Namespace +* Labels + +The `add_kubernetes_metadata` processor has two basic building blocks which are: + +* Indexers +* Matchers + +Indexers take in a pod's metadata and builds indices based on the pod metadata. +For example, the `ip_port` indexer can take a Kubernetes pod and index the pod +metadata based on all `pod_ip:container_port` combinations. + +Matchers are used to construct lookup keys for querying indices. For example, +when the `fields` matcher takes `["metricset.host"]` as a lookup field, it would +construct a lookup key with the value of the field `metricset.host`. + +Each Beat can define its own default indexers and matchers which are enabled by +default. For example, FileBeat enables the `container` indexer, which indexes +pod metadata based on all container IDs, and a `logs_path` matcher, which takes +the `log.file.path` field, extracts the container ID, and uses it to retrieve +metadata. + +The configuration below enables the processor when {beatname_lc} is run as a pod in +Kubernetes. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_kubernetes_metadata: +------------------------------------------------------------------------------- + +The configuration below enables the processor on a Beat running as a process on +the Kubernetes node. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_kubernetes_metadata: + host: + # If kube_config is not set, KUBECONFIG environment variable will be checked + # and if not present it will fall back to InCluster + kube_config: ${HOME}/.kube/config +------------------------------------------------------------------------------- + +The configuration below has the default indexers and matchers disabled and +enables ones that the user is interested in. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_kubernetes_metadata: + host: + # If kube_config is not set, KUBECONFIG environment variable will be checked + # and if not present it will fall back to InCluster + kube_config: ~/.kube/config + default_indexers.enabled: false + default_matchers.enabled: false + indexers: + - ip_port: + matchers: + - fields: + lookup_fields: ["metricset.host"] +------------------------------------------------------------------------------- + +The `add_kubernetes_metadata` processor has the following configuration settings: + +`host`:: (Optional) Specify the node to scope {beatname_lc} to in case it +cannot be accurately detected, as when running {beatname_lc} in host network +mode. +`namespace`:: (Optional) Select the namespace from which to collect the +metadata. If it is not set, the processor collects metadata from all namespaces. +It is unset by default. +`kube_config`:: (Optional) Use given config file as configuration for Kubernetes +client. It defaults to `KUBECONFIG` environment variable if present. +`default_indexers.enabled`:: (Optional) Enable/Disable default pod indexers, in +case you want to specify your own. +`default_matchers.enabled`:: (Optional) Enable/Disable default pod matchers, in +case you want to specify your own. + +[[add-docker-metadata]] +=== Add Docker metadata + +The `add_docker_metadata` processor annotates each event with relevant metadata +from Docker containers. At startup it detects a docker environment and caches the metadata. +The events are annotated with Docker metadata, only if a valid configuration +is detected and the processor is able to reach Docker API. + +Each event is annotated with: + +* Container ID +* Name +* Image +* Labels + +[NOTE] +===== +When running {beatname_uc} in a container, you need to provide access to +Docker’s unix socket in order for the `add_docker_metadata` processor to work. +You can do this by mounting the socket inside the container. For example: + +`docker run -v /var/run/docker.sock:/var/run/docker.sock ...` + +To avoid privilege issues, you may also need to add `--user=root` to the +`docker run` flags. Because the user must be part of the docker group in order +to access `/var/run/docker.sock`, root access is required if {beatname_uc} is +running as non-root inside the container. +===== + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_docker_metadata: + host: "unix:///var/run/docker.sock" + #match_fields: ["system.process.cgroup.id"] + #match_pids: ["process.pid", "process.ppid"] + #match_source: true + #match_source_index: 4 + #match_short_id: true + #cleanup_timeout: 60 + #labels.dedot: false + # To connect to Docker over TLS you must specify a client and CA certificate. + #ssl: + # certificate_authority: "/etc/pki/root/ca.pem" + # certificate: "/etc/pki/client/cert.pem" + # key: "/etc/pki/client/cert.key" +------------------------------------------------------------------------------- + +It has the following settings: + +`host`:: (Optional) Docker socket (UNIX or TCP socket). It uses +`unix:///var/run/docker.sock` by default. + +`ssl`:: (Optional) SSL configuration to use when connecting to the Docker +socket. + +`match_fields`:: (Optional) A list of fields to match a container ID, at least +one of them should hold a container ID to get the event enriched. + +`match_pids`:: (Optional) A list of fields that contain process IDs. If the +process is running in Docker then the event will be enriched. The default value +is `["process.pid", "process.ppid"]`. + +`match_source`:: (Optional) Match container ID from a log path present in the +`log.file.path` field. Enabled by default. + +`match_short_id`:: (Optional) Match container short ID from a log path present +in the `log.file.path` field. Disabled by default. +This allows to match directories names that have the first 12 characters +of the container ID. For example, `/var/log/containers/b7e3460e2b21/*.log`. + +`match_source_index`:: (Optional) Index in the source path split by `/` to look +for container ID. It defaults to 4 to match +`/var/lib/docker/containers//*.log` + +`cleanup_timeout`:: (Optional) Time of inactivity to consider we can clean and +forget metadata for a container, 60s by default. + +`labels.dedot`:: (Optional) Default to be false. If set to true, replace dots in + labels with `_`. + +[[add-host-metadata]] +=== Add Host metadata + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_host_metadata: + netinfo.enabled: false + cache.ttl: 5m + geo: + name: nyc-dc1-rack1 + location: 40.7128, -74.0060 + continent_name: North America + country_iso_code: US + region_name: New York + region_iso_code: NY + city_name: New York +------------------------------------------------------------------------------- + +It has the following settings: + +`netinfo.enabled`:: (Optional) Default false. Include IP addresses and MAC addresses as fields host.ip and host.mac + +`cache.ttl`:: (Optional) The processor uses an internal cache for the host metadata. This sets the cache expiration time. The default is 5m, negative values disable caching altogether. + +`geo.name`:: (Optional) User definable token to be used for identifying a discrete location. Frequently a datacenter, rack, or similar. + +`geo.location`:: (Optional) Longitude and latitude in comma separated format. + +`geo.continent_name`:: (Optional) Name of the continent. + +`geo.country_name`:: (Optional) Name of the country. + +`geo.region_name`:: (Optional) Name of the region. + +`geo.city_name`:: (Optional) Name of the city. + +`geo.country_iso_code`:: (Optional) ISO country code. + +`geo.region_iso_code`:: (Optional) ISO region code. + + +The `add_host_metadata` processor annotates each event with relevant metadata from the host machine. +The fields added to the event look like the following: + +[source,json] +------------------------------------------------------------------------------- +{ + "host":{ + "architecture":"x86_64", + "name":"example-host", + "id":"", + "os":{ + "family":"darwin", + "build":"16G1212", + "platform":"darwin", + "version":"10.12.6", + "kernel":"16.7.0", + "name":"Mac OS X" + }, + "ip": ["192.168.0.1", "10.0.0.1"], + "mac": ["00:25:96:12:34:56", "72:00:06:ff:79:f1"], + "geo": { + "continent_name": "North America", + "country_iso_code": "US", + "region_name": "New York", + "region_iso_code": "NY", + "city_name": "New York", + "name": "nyc-dc1-rack1", + "location": "40.7128, -74.0060" + } + } +} +------------------------------------------------------------------------------- + +[[add-observer-metadata]] +=== Add Observer metadata + +beta[] + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_observer_metadata: + netinfo.enabled: false + cache.ttl: 5m + geo: + name: nyc-dc1-rack1 + location: 40.7128, -74.0060 + continent_name: North America + country_iso_code: US + region_name: New York + region_iso_code: NY + city_name: New York +------------------------------------------------------------------------------- + +It has the following settings: + +`netinfo.enabled`:: (Optional) Default false. Include IP addresses and MAC addresses as fields observer.ip and observer.mac + +`cache.ttl`:: (Optional) The processor uses an internal cache for the observer metadata. This sets the cache expiration time. The default is 5m, negative values disable caching altogether. + +`geo.name`:: (Optional) User definable token to be used for identifying a discrete location. Frequently a datacenter, rack, or similar. + +`geo.location`:: (Optional) Longitude and latitude in comma separated format. + +`geo.continent_name`:: (Optional) Name of the continent. + +`geo.country_name`:: (Optional) Name of the country. + +`geo.region_name`:: (Optional) Name of the region. + +`geo.city_name`:: (Optional) Name of the city. + +`geo.country_iso_code`:: (Optional) ISO country code. + +`geo.region_iso_code`:: (Optional) ISO region code. + + +The `add_geo_metadata` processor annotates each event with relevant metadata from the observer machine. +The fields added to the event look like the following: + +[source,json] +------------------------------------------------------------------------------- +{ + "observer" : { + "hostname" : "avce", + "type" : "heartbeat", + "vendor" : "elastic", + "ip" : [ + "192.168.1.251", + "fe80::64b2:c3ff:fe5b:b974", + ], + "mac" : [ + "dc:c1:02:6f:1b:ed", + ], + "geo": { + "continent_name": "North America", + "country_iso_code": "US", + "region_name": "New York", + "region_iso_code": "NY", + "city_name": "New York", + "name": "nyc-dc1-rack1", + "location": "40.7128, -74.0060" + } + } +} +------------------------------------------------------------------------------- + +[[dissect]] +=== Dissect strings + +The dissect processor tokenizes incoming strings using defined patterns. + +[source,yaml] +------- +processors: +- dissect: + tokenizer: "%{key1} %{key2}" + field: "message" + target_prefix: "dissect" +------- + +The `dissect` processor has the following configuration settings: + +`field`:: (Optional) The event field to tokenize. Default is `message`. + +`target_prefix`:: (Optional) The name of the field where the values will be extracted. When an empty +string is defined, the processor will create the keys at the root of the event. Default is +`dissect`. When the target key already exists in the event, the processor won't replace it and log +an error; you need to either drop or rename the key before using dissect. + +For tokenization to be successful, all keys must be found and extracted, if one of them cannot be +found an error will be logged and no modification is done on the original event. + +NOTE: A key can contain any characters except reserved suffix or prefix modifiers: `/`,`&`, `+` +and `?`. + +See <> for a list of supported conditions. + +[[processor-dns]] +=== DNS Reverse Lookup + +The DNS processor performs reverse DNS lookups of IP addresses. It caches the +responses that it receives in accordance to the time-to-live (TTL) value +contained in the response. It also caches failures that occur during lookups. +Each instance of this processor maintains its own independent cache. + +The processor uses its own DNS resolver to send requests to nameservers and does +not use the operating system's resolver. It does not read any values contained +in `/etc/hosts`. + +This processor can significantly slow down your pipeline's throughput if you +have a high latency network or slow upstream nameserver. The cache will help +with performance, but if the addresses being resolved have a high cardinality +then the cache benefits will be diminished due to the high miss ratio. + +By way of example, if each DNS lookup takes 2 milliseconds, the maximum +throughput you can achieve is 500 events per second (1000 milliseconds / 2 +milliseconds). If you have a high cache hit ratio then your throughput can be +higher. + +This is a minimal configuration example that resolves the IP addresses contained +in two fields. + +[source,yaml] +---- +processors: +- dns: + type: reverse + fields: + source.ip: source.hostname + destination.ip: destination.hostname +---- + +Next is a configuration example showing all options. + +[source,yaml] +---- +processors: +- dns: + type: reverse + action: append + fields: + server.ip: server.hostname + client.ip: client.hostname + success_cache: + capacity.initial: 1000 + capacity.max: 10000 + failure_cache: + capacity.initial: 1000 + capacity.max: 10000 + ttl: 1m + nameservers: ['192.0.2.1', '203.0.113.1'] + timeout: 500ms + tag_on_failure: [_dns_reverse_lookup_failed] +---- + +The `dns` processor has the following configuration settings: + +`type`:: The type of DNS lookup to perform. The only supported type is +`reverse` which queries for a PTR record. + +`action`:: This defines the behavior of the processor when the target field +already exists in the event. The options are `append` (default) and `replace`. + +`fields`:: This is a mapping of source field names to target field names. The +value of the source field will be used in the DNS query and result will be +written to the target field. + +`success_cache.capacity.initial`:: The initial number of items that the success +cache will be allocated to hold. When initialized the processor will allocate +the memory for this number of items. Default value is `1000`. + +`success_cache.capacity.max`:: The maximum number of items that the success +cache can hold. When the maximum capacity is reached a random item is evicted. +Default value is `10000`. + +`failure_cache.capacity.initial`:: The initial number of items that the failure +cache will be allocated to hold. When initialized the processor will allocate +the memory for this number of items. Default value is `1000`. + +`failure_cache.capacity.max`:: The maximum number of items that the failure +cache can hold. When the maximum capacity is reached a random item is evicted. +Default value is `10000`. + +`failure_cache.ttl`:: The duration for which failures are cached. Valid time +units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Default value is `1m`. + +`nameservers`:: A list of nameservers to query. If there are multiple servers, +the resolver queries them in the order listed. If none are specified then it +will read the nameservers listed in `/etc/resolv.conf` once at initialization. +On Windows you must always supply at least one nameserver. + +`timeout`:: The duration after which a DNS query will timeout. This is timeout +for each DNS request so if you have 2 nameservers then the total timeout will be +2 times this value. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", +"h". Default value is `500ms`. + +`tag_on_failure`:: A list of tags to add to the event when any lookup fails. The +tags are only added once even if multiple lookups fail. By default no tags are +added upon failure. + +[[add-process-metadata]] +=== Add process metadata + +The Add process metadata processor enriches events with information from running +processes, identified by their process ID (PID). + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_process_metadata: + match_pids: [system.process.ppid] + target: system.process.parent +------------------------------------------------------------------------------- + +The fields added to the event look as follows: +[source,json] +------------------------------------------------------------------------------- +"process": { + "name": "systemd", + "title": "/usr/lib/systemd/systemd --switched-root --system --deserialize 22", + "exe": "/usr/lib/systemd/systemd", + "args": ["/usr/lib/systemd/systemd", "--switched-root", "--system", "--deserialize", "22"], + "pid": 1, + "ppid": 0, + "start_time": "2018-08-22T08:44:50.684Z", +} +------------------------------------------------------------------------------- + +Optionally, the process environment can be included, too: +[source,json] +------------------------------------------------------------------------------- + ... + "env": { + "HOME": "/", + "TERM": "linux", + "BOOT_IMAGE": "/boot/vmlinuz-4.11.8-300.fc26.x86_64", + "LANG": "en_US.UTF-8", + } + ... +------------------------------------------------------------------------------- +It has the following settings: + +`match_pids`:: List of fields to lookup for a PID. The processor will +search the list sequentially until the field is found in the current event, and +the PID lookup will be applied to the value of this field. + +`target`:: (Optional) Destination prefix where the `process` object will be +created. The default is the event's root. + +`include_fields`:: (Optional) List of fields to add. By default, the processor +will add all the available fields except `process.env`. + +`ignore_missing`:: (Optional) When set to `false`, events that don't contain any +of the fields in match_pids will be discarded and an error will be generated. By +default, this condition is ignored. + +`overwrite_keys`:: (Optional) By default, if a target field already exists, it +will not be overwritten and an error will be logged. If `overwrite_keys` is +set to `true`, this condition will be ignored. + +`restricted_fields`:: (Optional) By default, the `process.env` field is not +output, to avoid leaking sensitive data. If `restricted_fields` is `true`, the +field will be present in the output. + +ifdef::has_script_processor[] +[[processor-script]] +=== Script Processor + +The script processor executes Javascript code to process an event. The processor +uses a pure Go implementation of ECMAScript 5.1 and has no external +dependencies. This can be useful in situations where one of the other processors +doesn't provide the functionality you need to filter events. + +The processor can be configured by embedding Javascript in your configuration +file or by pointing the processor at external file(s). + +[source,yaml] +---- +processors: +- script: + lang: javascript + id: my_filter + source: > + function process(event) { + event.Tag("js"); + } +---- + +This loads `filter.js` from disk. + +[source,yaml] +---- +processors: +- script: + lang: javascript + id: my_filter + file: ${path.config}/filter.js +---- + +Parameters can be passed to the script by adding `params` to the config. +This allows for a script to be made reusable. When using `params` the +code must define a `register(params)` function to receive the parameters. + +[source,yaml] +---- +processors: +- script: + lang: javascript + id: my_filter + params: + threshold: 15 + source: > + var params = {threshold: 42}; + function register(scriptParams) { + params = scriptParams; + } + function process(event) { + if (event.Get("severity") < params.threshold) { + event.Cancel(); + } + } +---- + +If the script defines a `test()` function it will be invoked when the processor +is loaded. Any exceptions thrown will cause the processor to fail to load. This +can be used to make assertions about the behavior of the script. + +[source,javascript] +---- +function process(event) { + if (event.Get("event.code") === 1102) { + event.Put("event.action", "cleared"); + } +} + +function test() { + var event = process(new Event({event: {code: 1102})); + if (event.Get("event.action") !== "cleared") { + throw "expected event.action === cleared"; + } +} +---- + +[float] +==== Configuration options + +The `script` processor has the following configuration settings: + +`lang`:: This field is required and its value must be `javascript`. + +`tag`:: This is an optional identifier that is added to log messages. If defined +it enables metrics logging for this instance of the processor. The metrics +include the number of exceptions and a histogram of the execution times for +the `process` function. + +`source`:: Inline Javascript source code. + +`file`:: Path to a script file to load. Relative paths are interpreted as +relative to the `path.config` directory. Globs are expanded. + +`files`:: List of script files to load. The scripts are concatenated together. +Relative paths are interpreted as relative to the `path.config` directory. +And globs are expanded. + +`params`:: A dictionary of parameters that are passed to the `register` of the +script. + +`tag_on_exception`:: Tag to add to events in case the Javascript code causes an +exception while processing an event. Defaults to `_js_exception`. + +`timeout`:: This sets an execution timeout for the `process` function. When +the `process` function takes longer than the `timeout` period the function +is interrupted. You can set this option to prevent a script from running for +too long (like preventing an infinite `while` loop). By default there is no +timeout. + +[float] +==== Event API + +The `Event` object passed to the `process` method has the following API. + +[frame="topbot",options="header"] +|=== +|Method |Description + +|`Get(string)` +|Get a value from the event (either a scalar or an object). If the key does not +exist `null` is returned. If no key is provided then an object containing all +fields is returned. + +*Example*: `var value = event.Get(key);` + +|`Put(string, value)` +|Put a value into the event. If the key was already set then the +previous value is returned. It throws an exception if the key cannot be set +because one of the intermediate values is not an object. + +*Example*: `var old = event.Put(key, value);` + +|`Rename(string, string)` +|Rename a key in the event. The target key must not exist. It +returns true if the source key was successfully renamed to the target key. + +*Example*: `var success = event.Rename("source", "target");` + +|`Delete(string)` +|Delete a field from the event. It returns true on success. + +*Example*: `var deleted = event.Delete("user.email");` + +|`Cancel()` +|Flag the event as cancelled which causes the processor to drop +event. + +*Example*: `event.Cancel(); return;` + +|`Tag(string)` +|Append a tag to the `tags` field if the tag does not already +exist. Throws an exception if `tags` exists and is not a string or a list of +strings. + +*Example*: `event.Tag("user_event");` + +|`AppendTo(string, string)` +|`AppendTo` is a specialized `Put` method that converts the existing value to an +array and appends the value if it does not already exist. If there is an +existing value that's not a string or array of strings then an exception is +thrown. + +*Example*: `event.AppendTo("error.message", "invalid file hash");` +|=== +endif::[] + +ifdef::has_timestamp_processor[] +[[processor-timestamp]] +=== Timestamp + +beta[] + +The `timestamp` processor parses a timestamp from a field. By default the +timestamp processor writes the parsed result to the `@timestamp` field. You can +specify a different field by setting the `target_field` parameter. The timestamp +value is parsed according to the `layouts` parameter. Multiple layouts can be +specified and they will be used sequentially to attempt parsing the timestamp +field. + +NOTE: The timestamp layouts used by this processor are different than the + formats supported by date processors in Logstash and Elasticsearch Ingest + Node. + +The `layouts` are described using a reference time that is based on this +specific time: + + Mon Jan 2 15:04:05 MST 2006 + +Since MST is GMT-0700, the reference time is: + + 01/02 03:04:05PM '06 -0700 + +To define your own layout, rewrite the reference time in a format that matches +the timestamps you expect to parse. For more layout examples and details see the +https://godoc.org/time#pkg-constants[Go time package documentation]. + +If a layout does not contain a year then the current year in the specified +`timezone` is added to the time value. + +.Timestamp options +[options="header"] +|====== +| Name | Required | Default | Description | +| `field` | yes | | Source field containing the time to be parsed. | +| `target_field` | no | @timestamp | Target field for the parsed time value. The target value is always written as UTC. | +| `layouts` | yes | | Timestamp layouts that define the expected time value format. In addition layouts, `UNIX` and `UNIX_MS` are accepted. | +| `timezone` | no | UTC | Timezone (e.g. America/New_York) to use when parsing a timestamp not containing a timezone. | +| `ignore_missing` | no | false | Ignore errors when the source field is missing. | +| `ignore_failure` | no | false | Ignore all errors produced by the processor. | +| `test` | no | | A list of timestamps that must parse successfully when loading the processor. | +| `id` | no | | An identifier for this processor instance. Useful for debugging. | +|====== + +Here is an example that parses the `start_time` field and writes the result +to the `@timestamp` field then deletes the `start_time` field. When the +processor is loaded it will immediately validate that the two `test` timestamps +parse with this configuration. + +[source,yaml] +---- +processors: +- timestamp: + field: start_time + layouts: + - '2006-01-02T15:04:05Z' + - '2006-01-02T15:04:05.999Z' + test: + - '2019-06-22T16:33:51Z' + - '2019-11-18T04:59:51.123Z' +- drop_fields: + fields: [start_time] +---- +endif::[]