From 67d21aeffb7ae38d37c7f715a8361153ce87febc Mon Sep 17 00:00:00 2001 From: Dan Jaglowski Date: Fri, 20 Sep 2024 12:05:53 -0400 Subject: [PATCH] [pkg/ottl] Add ElementizeAttributesXML Converter --- .chloggen/elementize-attributes-xml.yaml | 27 ++++ pkg/ottl/e2e/e2e_test.go | 6 + pkg/ottl/ottlfuncs/README.md | 25 ++++ .../func_elementize_attributes_xml.go | 103 +++++++++++++ .../func_elementize_attributes_xml_test.go | 136 ++++++++++++++++++ pkg/ottl/ottlfuncs/functions.go | 1 + 6 files changed, 298 insertions(+) create mode 100644 .chloggen/elementize-attributes-xml.yaml create mode 100644 pkg/ottl/ottlfuncs/func_elementize_attributes_xml.go create mode 100644 pkg/ottl/ottlfuncs/func_elementize_attributes_xml_test.go diff --git a/.chloggen/elementize-attributes-xml.yaml b/.chloggen/elementize-attributes-xml.yaml new file mode 100644 index 000000000000..6cbfa5190e3e --- /dev/null +++ b/.chloggen/elementize-attributes-xml.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/ottl + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add ElementizeAttributesXML Converter + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/pkg/ottl/e2e/e2e_test.go b/pkg/ottl/e2e/e2e_test.go index 97ff3ad107ef..cb41e7f350e3 100644 --- a/pkg/ottl/e2e/e2e_test.go +++ b/pkg/ottl/e2e/e2e_test.go @@ -377,6 +377,12 @@ func Test_e2e_converters(t *testing.T) { tCtx.GetLogRecord().Attributes().PutStr("test", "pass") }, }, + { + statement: `set(attributes["test"], ElementizeAttributesXML("This is a log message!"))`, + want: func(tCtx ottllog.TransformContext) { + tCtx.GetLogRecord().Attributes().PutStr("test", `This is a log message!1`) + }, + }, { statement: `set(attributes["test"], ExtractPatterns("aa123bb", "(?P\\d+)"))`, want: func(tCtx ottllog.TransformContext) { diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 10f5acd833be..4b3785454ce7 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -597,6 +597,31 @@ Examples: - `Duration("333ms")` - `Duration("1000000h")` + +### ElementizeAttributesXML + +`ElementizeAttributesXML(target, Optional[xpath])` + +The `ElementizeAttributesXML` Converter returns an edited version of an XML string where attributes are converted into child elements. + +`target` is a Getter that returns a string. This string should be in XML format. +If `target` is not a string, nil, or cannot be parsed as XML, `ElementizeAttributesXML` will return an error. + +`xpath` (optional) is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that +selects one or more elements. Attributes will only be converted within the result(s) of the xpath. + +For example, `baz` will be converted to `bazbar`. + +Examples: + +Convert all attributes in a document + +- `ElementizeAttributesXML(body)` + +Convert only attributes within "Record" elements + +- `ElementizeAttributesXML(body, "/Log/Record")` + ### ExtractPatterns `ExtractPatterns(target, pattern)` diff --git a/pkg/ottl/ottlfuncs/func_elementize_attributes_xml.go b/pkg/ottl/ottlfuncs/func_elementize_attributes_xml.go new file mode 100644 index 000000000000..e6731454ad98 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_elementize_attributes_xml.go @@ -0,0 +1,103 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + +import ( + "context" + "fmt" + "strings" + + "github.com/antchfx/xmlquery" + "github.com/antchfx/xpath" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +type ElementizeAttributesXMLArguments[K any] struct { + Target ottl.StringGetter[K] + XPath ottl.Optional[string] +} + +func NewElementizeAttributesXMLFactory[K any]() ottl.Factory[K] { + return ottl.NewFactory("ElementizeAttributesXML", &ElementizeAttributesXMLArguments[K]{}, createElementizeAttributesXMLFunction[K]) +} + +func createElementizeAttributesXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { + args, ok := oArgs.(*ElementizeAttributesXMLArguments[K]) + + if !ok { + return nil, fmt.Errorf("ElementizeAttributesXML args must be of type *ElementizeAttributesXMLAguments[K]") + } + + xPath := args.XPath.Get() + if xPath == "" { + xPath = "//@*" // All attributes in the document + } + + return elementizeAttributesXML(args.Target, xPath), nil +} + +// elementizeAttributesXML returns a `pcommon.String` that is a result of converting all attributes of the +// target XML into child elements. These new elements are added as the last child elements of the parent. +// e.g. -> worldbar +func elementizeAttributesXML[K any](target ottl.StringGetter[K], xPath string) ottl.ExprFunc[K] { + return func(ctx context.Context, tCtx K) (any, error) { + if err := validateXPath(xPath); err != nil { + return nil, err + } + + targetVal, err := target.Get(ctx, tCtx) + if err != nil { + return nil, err + } + + top, err := parseNodesXML(targetVal) + if err != nil { + return nil, err + } + + xmlquery.FindEach(top, xPath, func(_ int, n *xmlquery.Node) { + if n.Type != xmlquery.AttributeNode { + return + } + + xmlquery.AddChild(n.Parent, &xmlquery.Node{ + Type: xmlquery.ElementNode, + Data: n.Data, + FirstChild: &xmlquery.Node{ + Type: xmlquery.TextNode, + Data: n.InnerText(), + }, + }) + + n.Parent.RemoveAttr(n.Data) + }) + + return top.OutputXML(false), nil + } +} + +// TODO move these and use them in other xml functions +func validateXPath(xPath string) error { + _, err := xpath.Compile(xPath) + if err != nil { + return fmt.Errorf("invalid xpath: %w", err) + } + return nil +} + +// Aside from parsing the XML document, this function also ensures that +// the XML declaration is included in the result only if it was present in +// the original document. +func parseNodesXML(targetVal string) (*xmlquery.Node, error) { + preserveDeclearation := strings.HasPrefix(targetVal, "`, + want: ``, + }, + { + name: "nop declaration", + document: ``, + want: ``, + }, + { + name: "single attribute", + document: ``, + want: `bar`, + }, + { + name: "multiple attributes - order 1", + document: ``, + want: `barworld`, + }, + { + name: "multiple attributes - order 2", + document: ``, + want: `worldbar`, + }, + { + name: "with child elements", + document: ``, + want: `worldbar`, + }, + { + name: "with child value", + document: `free value`, + want: `free valueworldbar`, + }, + { + name: "with child elements and values", + document: `free value2`, + want: `free value2worldbar`, + }, + { + name: "multiple levels", + document: ``, + want: `www.example.comworldbar`, + }, + { + name: "xpath filtered", + document: ``, + xPath: "/a/b/@*", // only convert attributes of b + want: `www.example.com`, + }, + { + name: "attributes found with non-attributes xpath", + document: ``, + xPath: "/a/b", // convert b (the attributes of b, even though the element b was selected) + want: ``, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + args := &ElementizeAttributesXMLArguments[any]{ + Target: ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return tt.document, nil + }, + }, + XPath: ottl.NewTestingOptional(tt.xPath), + } + exprFunc, err := createElementizeAttributesXMLFunction[any](ottl.FunctionContext{}, args) + assert.NoError(t, err) + + result, err := exprFunc(context.Background(), nil) + assert.NoError(t, err) + assert.Equal(t, tt.want, result) + }) + } +} + +func Test_ElementizeAttributesXML_InvalidXML(t *testing.T) { + exprFunc := elementizeAttributesXML(invalidXMLGetter(), "/foo") + _, err := exprFunc(context.Background(), nil) + assert.Error(t, err) +} + +func Test_ElementizeAttributesXML_InvalidXPath(t *testing.T) { + exprFunc := elementizeAttributesXML(invalidXPathGetter(), "!") + _, err := exprFunc(context.Background(), nil) + assert.Error(t, err) +} + +func TestCreateElementizeAttributesXMLFunc(t *testing.T) { + exprFunc, err := createElementizeAttributesXMLFunction[any](ottl.FunctionContext{}, &ElementizeAttributesXMLArguments[any]{}) + assert.NoError(t, err) + assert.NotNil(t, exprFunc) + + _, err = createElementizeAttributesXMLFunction[any](ottl.FunctionContext{}, nil) + assert.Error(t, err) +} + +// TODO move these and use them in other xml tests +func invalidXMLGetter() ottl.StandardStringGetter[any] { + return ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return `>>>>>>`, nil + }, + } +} + +func invalidXPathGetter() ottl.StandardStringGetter[any] { + return ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return ``, nil + }, + } +} diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go index ca165bf1b999..9287bd1f277b 100644 --- a/pkg/ottl/ottlfuncs/functions.go +++ b/pkg/ottl/ottlfuncs/functions.go @@ -43,6 +43,7 @@ func converters[K any]() []ottl.Factory[K] { NewDayFactory[K](), NewDoubleFactory[K](), NewDurationFactory[K](), + NewElementizeAttributesXMLFactory[K](), NewExtractPatternsFactory[K](), NewExtractGrokPatternsFactory[K](), NewFnvFactory[K](),