Skip to content

Commit

Permalink
[pkg/ottl] Add ElementizeAttributesXML Converter
Browse files Browse the repository at this point in the history
  • Loading branch information
djaglowski committed Sep 20, 2024
1 parent 3e48af2 commit 67d21ae
Show file tree
Hide file tree
Showing 6 changed files with 298 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .chloggen/elementize-attributes-xml.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/ottl

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add ElementizeAttributesXML Converter

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: []

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
6 changes: 6 additions & 0 deletions pkg/ottl/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,12 @@ func Test_e2e_converters(t *testing.T) {
tCtx.GetLogRecord().Attributes().PutStr("test", "pass")
},
},
{
statement: `set(attributes["test"], ElementizeAttributesXML("<Log id=\"1\"><Message>This is a log message!</Message></Log>"))`,
want: func(tCtx ottllog.TransformContext) {
tCtx.GetLogRecord().Attributes().PutStr("test", `<Log><Message>This is a log message!</Message><id>1</id></Log>`)
},
},
{
statement: `set(attributes["test"], ExtractPatterns("aa123bb", "(?P<numbers>\\d+)"))`,
want: func(tCtx ottllog.TransformContext) {
Expand Down
25 changes: 25 additions & 0 deletions pkg/ottl/ottlfuncs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,31 @@ Examples:
- `Duration("333ms")`
- `Duration("1000000h")`


### ElementizeAttributesXML

`ElementizeAttributesXML(target, Optional[xpath])`

The `ElementizeAttributesXML` Converter returns an edited version of an XML string where attributes are converted into child elements.

`target` is a Getter that returns a string. This string should be in XML format.
If `target` is not a string, nil, or cannot be parsed as XML, `ElementizeAttributesXML` will return an error.

`xpath` (optional) is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that
selects one or more elements. Attributes will only be converted within the result(s) of the xpath.

For example, `<a foo="bar"><b>baz</b></a>` will be converted to `<a><b>baz</b><foo>bar</foo></a>`.

Examples:

Convert all attributes in a document

- `ElementizeAttributesXML(body)`

Convert only attributes within "Record" elements

- `ElementizeAttributesXML(body, "/Log/Record")`

### ExtractPatterns

`ExtractPatterns(target, pattern)`
Expand Down
103 changes: 103 additions & 0 deletions pkg/ottl/ottlfuncs/func_elementize_attributes_xml.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"fmt"
"strings"

"github.com/antchfx/xmlquery"
"github.com/antchfx/xpath"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

type ElementizeAttributesXMLArguments[K any] struct {
Target ottl.StringGetter[K]
XPath ottl.Optional[string]
}

func NewElementizeAttributesXMLFactory[K any]() ottl.Factory[K] {
return ottl.NewFactory("ElementizeAttributesXML", &ElementizeAttributesXMLArguments[K]{}, createElementizeAttributesXMLFunction[K])
}

func createElementizeAttributesXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
args, ok := oArgs.(*ElementizeAttributesXMLArguments[K])

if !ok {
return nil, fmt.Errorf("ElementizeAttributesXML args must be of type *ElementizeAttributesXMLAguments[K]")
}

xPath := args.XPath.Get()
if xPath == "" {
xPath = "//@*" // All attributes in the document
}

return elementizeAttributesXML(args.Target, xPath), nil
}

// elementizeAttributesXML returns a `pcommon.String` that is a result of converting all attributes of the
// target XML into child elements. These new elements are added as the last child elements of the parent.
// e.g. <a foo="bar" hello="world"><b/></a> -> <a><hello>world</hello><foo>bar</foo><b/></a>
func elementizeAttributesXML[K any](target ottl.StringGetter[K], xPath string) ottl.ExprFunc[K] {
return func(ctx context.Context, tCtx K) (any, error) {
if err := validateXPath(xPath); err != nil {
return nil, err
}

targetVal, err := target.Get(ctx, tCtx)
if err != nil {
return nil, err
}

top, err := parseNodesXML(targetVal)
if err != nil {
return nil, err
}

xmlquery.FindEach(top, xPath, func(_ int, n *xmlquery.Node) {
if n.Type != xmlquery.AttributeNode {
return
}

xmlquery.AddChild(n.Parent, &xmlquery.Node{
Type: xmlquery.ElementNode,
Data: n.Data,
FirstChild: &xmlquery.Node{
Type: xmlquery.TextNode,
Data: n.InnerText(),
},
})

n.Parent.RemoveAttr(n.Data)
})

return top.OutputXML(false), nil
}
}

// TODO move these and use them in other xml functions
func validateXPath(xPath string) error {
_, err := xpath.Compile(xPath)
if err != nil {
return fmt.Errorf("invalid xpath: %w", err)
}
return nil
}

// Aside from parsing the XML document, this function also ensures that
// the XML declaration is included in the result only if it was present in
// the original document.
func parseNodesXML(targetVal string) (*xmlquery.Node, error) {
preserveDeclearation := strings.HasPrefix(targetVal, "<?xml")
top, err := xmlquery.Parse(strings.NewReader(targetVal))
if err != nil {
return nil, fmt.Errorf("parse xml: %w", err)
}
if !preserveDeclearation {
xmlquery.RemoveFromTree(top.FirstChild)
}
return top, nil
}
136 changes: 136 additions & 0 deletions pkg/ottl/ottlfuncs/func_elementize_attributes_xml_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"testing"

"github.com/stretchr/testify/assert"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

func Test_ElementizeAttributesXML(t *testing.T) {
tests := []struct {
name string
document string
xPath string
want string
}{
{
name: "nop",
document: `<a><b/></a>`,
want: `<a><b></b></a>`,
},
{
name: "nop declaration",
document: `<?xml version="1.0" encoding="UTF-8"?><a><b/></a>`,
want: `<?xml version="1.0" encoding="UTF-8"?><a><b></b></a>`,
},
{
name: "single attribute",
document: `<a foo="bar"/>`,
want: `<a><foo>bar</foo></a>`,
},
{
name: "multiple attributes - order 1",
document: `<a foo="bar" hello="world"/>`,
want: `<a><foo>bar</foo><hello>world</hello></a>`,
},
{
name: "multiple attributes - order 2",
document: `<a hello="world" foo="bar"/>`,
want: `<a><hello>world</hello><foo>bar</foo></a>`,
},
{
name: "with child elements",
document: `<a hello="world" foo="bar"><b/><c/><b/></a>`,
want: `<a><b></b><c></c><b></b><hello>world</hello><foo>bar</foo></a>`,
},
{
name: "with child value",
document: `<a hello="world" foo="bar">free value</a>`,
want: `<a>free value<hello>world</hello><foo>bar</foo></a>`,
},
{
name: "with child elements and values",
document: `<a hello="world" foo="bar">free value<b/>2<c/></a>`,
want: `<a>free value<b></b>2<c></c><hello>world</hello><foo>bar</foo></a>`,
},
{
name: "multiple levels",
document: `<a hello="world" foo="bar"><b href="www.example.com"></b></a>`,
want: `<a><b><href>www.example.com</href></b><hello>world</hello><foo>bar</foo></a>`,
},
{
name: "xpath filtered",
document: `<a hello="world" foo="bar"><b href="www.example.com"></b></a>`,
xPath: "/a/b/@*", // only convert attributes of b
want: `<a hello="world" foo="bar"><b><href>www.example.com</href></b></a>`,
},
{
name: "attributes found with non-attributes xpath",
document: `<a hello="world" foo="bar"><b href="www.example.com"></b></a>`,
xPath: "/a/b", // convert b (the attributes of b, even though the element b was selected)
want: `<a hello="world" foo="bar"><b href="www.example.com"></b></a>`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := &ElementizeAttributesXMLArguments[any]{
Target: ottl.StandardStringGetter[any]{
Getter: func(_ context.Context, _ any) (any, error) {
return tt.document, nil
},
},
XPath: ottl.NewTestingOptional(tt.xPath),
}
exprFunc, err := createElementizeAttributesXMLFunction[any](ottl.FunctionContext{}, args)
assert.NoError(t, err)

result, err := exprFunc(context.Background(), nil)
assert.NoError(t, err)
assert.Equal(t, tt.want, result)
})
}
}

func Test_ElementizeAttributesXML_InvalidXML(t *testing.T) {
exprFunc := elementizeAttributesXML(invalidXMLGetter(), "/foo")
_, err := exprFunc(context.Background(), nil)
assert.Error(t, err)
}

func Test_ElementizeAttributesXML_InvalidXPath(t *testing.T) {
exprFunc := elementizeAttributesXML(invalidXPathGetter(), "!")
_, err := exprFunc(context.Background(), nil)
assert.Error(t, err)
}

func TestCreateElementizeAttributesXMLFunc(t *testing.T) {
exprFunc, err := createElementizeAttributesXMLFunction[any](ottl.FunctionContext{}, &ElementizeAttributesXMLArguments[any]{})
assert.NoError(t, err)
assert.NotNil(t, exprFunc)

_, err = createElementizeAttributesXMLFunction[any](ottl.FunctionContext{}, nil)
assert.Error(t, err)
}

// TODO move these and use them in other xml tests
func invalidXMLGetter() ottl.StandardStringGetter[any] {
return ottl.StandardStringGetter[any]{
Getter: func(_ context.Context, _ any) (any, error) {
return `<a>>>>>>>`, nil
},
}
}

func invalidXPathGetter() ottl.StandardStringGetter[any] {
return ottl.StandardStringGetter[any]{
Getter: func(_ context.Context, _ any) (any, error) {
return `<a></a>`, nil
},
}
}
1 change: 1 addition & 0 deletions pkg/ottl/ottlfuncs/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func converters[K any]() []ottl.Factory[K] {
NewDayFactory[K](),
NewDoubleFactory[K](),
NewDurationFactory[K](),
NewElementizeAttributesXMLFactory[K](),
NewExtractPatternsFactory[K](),
NewExtractGrokPatternsFactory[K](),
NewFnvFactory[K](),
Expand Down

0 comments on commit 67d21ae

Please sign in to comment.