Skip to content

Commit

Permalink
Merge pull request #40 from jf-tech/jf-tech/streamparser-fix
Browse files Browse the repository at this point in the history
Fix a bug in xml stream parsing where a previously unmatched node causing all subsequent valid matches fail.
  • Loading branch information
zhengchun authored Sep 14, 2020
2 parents 5648b2f + e26cec5 commit 1871a20
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
6 changes: 5 additions & 1 deletion parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,11 @@ func (p *parser) parse() (*Node, error) {
if p.streamElementFilter == nil || QuerySelector(p.doc, p.streamElementFilter) != nil {
return p.streamNode, nil
}
// otherwise, this isn't our target node. clean things up.
// otherwise, this isn't our target node, clean things up.
// note we also remove the underlying *Node from the node tree, to prevent
// future stream node candidate selection error.
RemoveFromTree(p.streamNode)
p.prev = p.streamNodePrev
p.streamNode = nil
p.streamNodePrev = nil
}
Expand Down
40 changes: 23 additions & 17 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,18 +278,22 @@ func testOutputXML(t *testing.T, msg string, expectedXML string, n *Node) {

func TestStreamParser_Success1(t *testing.T) {
s := `
<AAA>
<CCC>c1</CCC>
<BBB>b1</BBB>
<DDD>d1</DDD>
<BBB>b2<ZZZ z="1">z1</ZZZ></BBB>
<BBB>b3</BBB>
<BBB>b4</BBB>
<BBB>b5</BBB>
<CCC>c3</CCC>
</AAA>`

sp, err := CreateStreamParser(strings.NewReader(s), "/AAA/BBB", "/AAA/BBB[. != 'b3']")
<ROOT>
<AAA>
<CCC>c1</CCC>
<BBB>b1</BBB>
<DDD>d1</DDD>
<BBB>b2<ZZZ z="1">z1</ZZZ></BBB>
<BBB>b3</BBB>
</AAA>
<ZZZ>
<BBB>b4</BBB>
<BBB>b5</BBB>
<CCC>c3</CCC>
</ZZZ>
</ROOT>`

sp, err := CreateStreamParser(strings.NewReader(s), "/ROOT/*/BBB", "/ROOT/*/BBB[. != 'b3']")
if err != nil {
t.Fatal(err.Error())
}
Expand All @@ -300,7 +304,8 @@ func TestStreamParser_Success1(t *testing.T) {
t.Fatal(err.Error())
}
testOutputXML(t, "first call result", `<BBB>b1</BBB>`, n)
testOutputXML(t, "doc after first call", `<><?xml?><AAA><CCC>c1</CCC><BBB>b1</BBB></AAA></>`, findRoot(n))
testOutputXML(t, "doc after first call",
`<><?xml?><ROOT><AAA><CCC>c1</CCC><BBB>b1</BBB></AAA></ROOT></>`, findRoot(n))

// Second `<BBB>` read
n, err = sp.Read()
Expand All @@ -309,7 +314,7 @@ func TestStreamParser_Success1(t *testing.T) {
}
testOutputXML(t, "second call result", `<BBB>b2<ZZZ z="1">z1</ZZZ></BBB>`, n)
testOutputXML(t, "doc after second call",
`<><?xml?><AAA><CCC>c1</CCC><DDD>d1</DDD><BBB>b2<ZZZ z="1">z1</ZZZ></BBB></AAA></>`, findRoot(n))
`<><?xml?><ROOT><AAA><CCC>c1</CCC><DDD>d1</DDD><BBB>b2<ZZZ z="1">z1</ZZZ></BBB></AAA></ROOT></>`, findRoot(n))

// Third `<BBB>` read (Note we will skip 'b3' since the streamElementFilter excludes it)
n, err = sp.Read()
Expand All @@ -321,17 +326,18 @@ func TestStreamParser_Success1(t *testing.T) {
// been filtered out and is not our target node, thus it is considered just like any other
// non target nodes such as `<CCC>`` or `<DDD>`
testOutputXML(t, "doc after third call",
`<><?xml?><AAA><CCC>c1</CCC><DDD>d1</DDD><BBB>b3</BBB><BBB>b4</BBB></AAA></>`, findRoot(n))
`<><?xml?><ROOT><AAA><CCC>c1</CCC><DDD>d1</DDD></AAA><ZZZ><BBB>b4</BBB></ZZZ></ROOT></>`,
findRoot(n))

// Fourth `<BBB>` read
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "fourth call result", `<BBB>b5</BBB>`, n)
// Note the inclusion of `<BBB>b3</BBB>` in the document.
testOutputXML(t, "doc after fourth call",
`<><?xml?><AAA><CCC>c1</CCC><DDD>d1</DDD><BBB>b3</BBB><BBB>b5</BBB></AAA></>`, findRoot(n))
`<><?xml?><ROOT><AAA><CCC>c1</CCC><DDD>d1</DDD></AAA><ZZZ><BBB>b5</BBB></ZZZ></ROOT></>`,
findRoot(n))

_, err = sp.Read()
if err != io.EOF {
Expand Down

0 comments on commit 1871a20

Please sign in to comment.