Skip to content

Commit

Permalink
when serializing back to html, use lxml html method
Browse files Browse the repository at this point in the history
instead of xml method which auto closes tags
and turns \r into 
 entity.
  • Loading branch information
cekk authored and gotcha committed Mar 15, 2022
1 parent f36783f commit 47eee97
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 19 deletions.
4 changes: 2 additions & 2 deletions Products/PortalTransforms/tests/output/test_word.html
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<br/>
<br>



<p/><div name="Default" align="left" style=" padding: 0.00mm 0.00mm 0.00mm 0.00mm; ">
<p></p><div name="Default" align="left" style=" padding: 0.00mm 0.00mm 0.00mm 0.00mm; ">

<p style="text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; ">
how odd: blank named file in directory
Expand Down
15 changes: 11 additions & 4 deletions Products/PortalTransforms/tests/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def test_kill_nasty_tags_which_are_not_valid(self):
self.assertTrue('script' in self.settings.nasty_tags)
self.assertFalse('script' in self.settings.valid_tags)
orig = '<p><script>foo</script></p>'
data_out = '<p/>'
data_out = '<p></p>'
data = self.transforms.convertTo(target_mimetype='text/x-html-safe', orig=orig)
got = data.getData()
self.assertIsInstance(got, self.allowed_types)
Expand All @@ -207,15 +207,15 @@ def test_kill_nasty_tags_which_are_not_valid(self):
self.assertTrue('h1' in self.settings.nasty_tags)
self.assertFalse('h1' in self.settings.valid_tags)
orig = '<p><h1>foo</h1></p>'
data_out = '<p/>'
data_out = '<p></p>'
data = self.transforms.convertTo(target_mimetype='text/x-html-safe', orig=orig)
got = data.getData()
self.assertIsInstance(got, self.allowed_types)
self.assertEqual(got, data_out)

def test_entityiref_attributes(self):
orig = '<a href="&uuml;">foo</a>'
data_out = '<a href="&#xFC;">foo</a>'
data_out = '<a href="%C3%BC">foo</a>'
data = self.transforms.convertTo(target_mimetype='text/x-html-safe', orig=orig)
got = data.getData()
self.assertIsInstance(got, self.allowed_types)
Expand Down Expand Up @@ -245,6 +245,13 @@ def test_charref_data(self):
self.assertIsInstance(got, self.allowed_types)
self.assertEqual(got, data_out)

def test_do_not_autoclose_tags(self):
orig = '<p></p>'
data_out = '<p></p>'
data = self.transforms.convertTo(target_mimetype='text/x-html-safe', orig=orig)
got = data.getData()
self.assertEqual(got, data_out)


class SafeHtmlTransformsWithScriptTest(TransformTestCase):

Expand Down Expand Up @@ -391,7 +398,7 @@ def test_form_with_input_kept(self):
'<form>'
'<label>Hello</label> '
'<button name="but">Click here</button> '
'<input type="text" value="hi"/> '
'<input type="text" value="hi"> '
'<select name="sel"><option value="1">One</option></select> '
'<textarea name="text">Stuff</textarea>'
'</form>')
Expand Down
23 changes: 12 additions & 11 deletions Products/PortalTransforms/tests/test_xss.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,35 +31,35 @@ def doTest(self, data_in, data_out): # noqa

def test_1(self):
data_in = """<html><body><img src="javascript:Alert('XSS');" /></body></html>""" # noqa
data_out = '<img/>'
data_out = '<img>'
self.doTest(data_in, data_out)

def test_2(self):
data_in = """<img src="javascript:Alert('XSS');" />"""
data_out = '<img/>'
data_out = '<img>'
self.doTest(data_in, data_out)

def test_3(self):
data_in = """<html><body><IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;></body></html>""" # noqa
data_out = '<img/>'
data_out = '<img>'
self.doTest(data_in, data_out)

def test_4(self):
data_in = """<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>""" # noqa
data_out = '<img/>'
data_out = '<img>'

self.doTest(data_in, data_out)

def test_5(self):
data_in = """<img src="jav
asc
ript:Alert('XSS');" />"""
data_out = '<img/>'
data_out = '<img>'
self.doTest(data_in, data_out)

def test_6(self):
data_in = """<img src="jav asc ript:Alert('XSS');"/>"""
data_out = '<img/>'
data_out = '<img>'
self.doTest(data_in, data_out)

def test_7(self):
Expand Down Expand Up @@ -92,14 +92,14 @@ def test_11(self):

def test_12(self):
data_in = """<img src="vbscript:msgbox('XSS')"/>"""
data_out = '<img/>'
data_out = '<img>'
self.doTest(data_in, data_out)

def test_13(self):
data_in = """<img src="vb
sc
ript:msgbox('XSS')"/>"""
data_out = '<img/>'
data_out = '<img>'
self.doTest(data_in, data_out)

def test_14(self):
Expand Down Expand Up @@ -137,7 +137,7 @@ def test_19(self):
self.doTest(data_in, data_out)

def test_20(self):
data_in = '<img src="http://www.headnet.dk/log.jpg"/>'
data_in = '<img src="http://www.headnet.dk/log.jpg">'
data_out = data_in
self.doTest(data_in, data_out)

Expand Down Expand Up @@ -228,9 +228,10 @@ def test_37(self):

def test_38(self):
data_in = """<p><a href="http://T\\foo\\20111015\\bar.msg">FOO</a></p>""" # noqa
self.doTest(data_in, data_in)
data_out = """<p><a href="http://T%5Cfoo%5C20111015%5Cbar.msg">FOO</a></p>"""
self.doTest(data_in, data_out)

def test_39(self):
data_in = """<a href="&#42;&Ascr;\xa9"></a>"""
data_out = '<a href="*&amp;Ascr;&#xA9;"/>'
data_out = '<a href="*&amp;Ascr;%C2%A9"></a>'
self.doTest(data_in, data_out)
5 changes: 3 additions & 2 deletions Products/PortalTransforms/transforms/safe_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2458,9 +2458,10 @@ def strip_outer(s):
return ''
# remove all except body or outer div
if six.PY2:
result = etree.tostring(tree, encoding='utf-8').strip()
result = etree.tostring(
tree, encoding='utf-8', method="html").strip()
else:
result = etree.tounicode(tree).strip()
result = etree.tounicode(tree, method="html").strip()
return strip_outer(result)


Expand Down
2 changes: 2 additions & 0 deletions news/43.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Prevent auto-closed empty tags in safe_html output.
[cekk]

0 comments on commit 47eee97

Please sign in to comment.