Skip to content

Escaping name and desc properties before converting to HMTL #253

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/systemrdl/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.29.3"
__version__ = "1.29.4"
15 changes: 12 additions & 3 deletions src/systemrdl/core/rdlformatcode.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
import re
from typing import TYPE_CHECKING, Optional
import html

from . import helpers
from ..node import Node, AddressableNode

if TYPE_CHECKING:
from markdown import Markdown

def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']=None, is_desc: bool=True) -> str:
def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']=None,
is_desc: bool=True, escape_html: bool=False) -> str:
"""
Convert an RDLFormatCode string to HTML
"""

# --------------------------------------------------------------------------
# Escape any characters which may cause problems when HTML is interpreted
# --------------------------------------------------------------------------
if escape_html:
text = html.escape(text, quote=True)

# --------------------------------------------------------------------------
# Remove any common indentation
# --------------------------------------------------------------------------
text = helpers.dedent_text(text)


# --------------------------------------------------------------------------
# Parse and replace RDLFormatCode Tags
# --------------------------------------------------------------------------
Expand Down Expand Up @@ -160,9 +169,9 @@ def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']
list_end_tag.pop()

elif m.lastgroup == 'quote':
text_segs.append('"')
text_segs.append('"')
elif m.lastgroup == 'xquote':
text_segs.append('"')
text_segs.append('"')
elif m.lastgroup == 'br':
text_segs.append("<br>")
elif m.lastgroup == 'lb':
Expand Down
21 changes: 17 additions & 4 deletions src/systemrdl/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def get_rel_path(self, ref: 'Node', uplevel: str="^", hier_separator: str=".", a
return hier_separator.join(self_segs_fmt)


def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[str]:
def get_html_desc(self, markdown_inst: Optional['Markdown']=None, escape_html: bool=False) -> Optional[str]:
"""
Translates the node's 'desc' property into HTML.

Expand All @@ -665,6 +665,11 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
Override the class instance of the Markdown processor.
See the `Markdown module <https://python-markdown.github.io/reference/#Markdown>`_
for more details.
escape_html:
The desc property from the SystemRDL is passed through the python `html.escape`
function before processing. This can help avoid cases where plain text inadvertently
contains syntax that will result in undesirable behaviour when rendered. This option
should not be turned on if html tags are intentionally included in the `desc` property

Returns
-------
Expand All @@ -679,15 +684,23 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
desc_str = self.get_property('desc')
if desc_str is None:
return None
return rdlformatcode.rdlfc_to_html(desc_str, self, md=markdown_inst)
return rdlformatcode.rdlfc_to_html(desc_str, self, md=markdown_inst, escape_html=escape_html)


def get_html_name(self) -> Optional[str]:
def get_html_name(self, escape_html: bool=False) -> Optional[str]:
"""
Translates the node's 'name' property into HTML.

Any RDLFormatCode tags used are converted to HTML.

Parameters
----------
escape_html:
The desc property from the SystemRDL is passed through the python `html.escape`
function before processing. This can help avoid cases where plain text inadvertently
contains syntax that will result in undesirable behaviour when rendered. This option
should not be turned on if html tags are intentionally included in the `name` property

Returns
-------
str or None
Expand All @@ -700,7 +713,7 @@ def get_html_name(self) -> Optional[str]:
name_str = self.get_property('name', default=None)
if name_str is None:
return None
return rdlformatcode.rdlfc_to_html(name_str, self, is_desc=False)
return rdlformatcode.rdlfc_to_html(name_str, self, is_desc=False, escape_html=escape_html)


@property
Expand Down
21 changes: 17 additions & 4 deletions src/systemrdl/rdltypes/user_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def rdl_desc(self) -> Optional[str]:
"""
return self._rdl_desc

def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[str]:
def get_html_desc(self, markdown_inst: Optional['Markdown']=None, escape_html: bool=False) -> Optional[str]:
"""
Translates the enum's 'desc' property into HTML.

Expand All @@ -234,6 +234,11 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
Override the class instance of the Markdown processor.
See the `Markdown module <https://python-markdown.github.io/reference/#Markdown>`_
for more details.
escape_html:
The desc property from the SystemRDL is passed through the python `html.escape`
function before processing. This can help avoid cases where plain text inadvertently
contains syntax that will result in undesirable behaviour when rendered. This option
should not be turned on if html tags are intentionally included in the `desc` property

Returns
-------
Expand All @@ -248,14 +253,22 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
desc_str = self._rdl_desc
if desc_str is None:
return None
return rdlformatcode.rdlfc_to_html(desc_str, md=markdown_inst)
return rdlformatcode.rdlfc_to_html(desc_str, md=markdown_inst, escape_html=escape_html)

def get_html_name(self) -> Optional[str]:
def get_html_name(self, escape_html: bool=False) -> Optional[str]:
"""
Translates the enum's 'name' property into HTML.

Any RDLFormatCode tags used are converted to HTML.

Parameters
----------
escape_html:
The desc property from the SystemRDL is passed through the python `html.escape`
function before processing. This can help avoid cases where plain text inadvertently
contains syntax that will result in undesirable behaviour when rendered. This option
should not be turned on if html tags are intentionally included in the `name` property

Returns
-------
str or None
Expand All @@ -268,7 +281,7 @@ def get_html_name(self) -> Optional[str]:
name_str = self._rdl_name
if name_str is None:
return None
return rdlformatcode.rdlfc_to_html(name_str, is_desc=False)
return rdlformatcode.rdlfc_to_html(name_str, is_desc=False, escape_html=escape_html)


# Tell pickle how to reduce dynamically generated UserEnum classes
Expand Down
8 changes: 8 additions & 0 deletions test/rdl_src/rdlformatcode.rdl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ addrmap rdlformatcode {
r20->desc = "[index]";
r21->desc = "[index_parent]";

reg_t r22,r23,r24;
r22->desc = "string with a \"quote\" in it";
r23->desc = "tag to be escaped <h1> h1";
r24->desc = "signal &lt";


r1->name = "[b]asdf[/b]";
r2->name = "[i]asdf[/i]";
Expand All @@ -52,4 +57,7 @@ addrmap rdlformatcode {
r16->name = "";
r17->name = "
";
r22->name = "string with a \"quote\" in it";
r23->name = "tag to be escaped <h1> h1";
r24->name = "signal &lt";
};
3 changes: 2 additions & 1 deletion test/test_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def test_enums(self):
self.assertIsNone(f_default_enum['four'].get_html_name())
self.assertIsNone(f_default_enum['four'].get_html_desc())

self.assertEqual(f_default_enum['five'].get_html_name(), "five's <b>name</b>")
self.assertEqual(f_default_enum['five'].get_html_name(escape_html=False), "five's <b>name</b>")
self.assertEqual(f_default_enum['five'].get_html_name(escape_html=True),"five&#x27;s <b>name</b>")
self.assertEqual(f_default_enum['five'].get_html_desc(), "<p>this is five</p>")

f0 = root.find_by_path("enum_test1.reg2.f0")
Expand Down
167 changes: 105 additions & 62 deletions test/test_rdlformatcode.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest_utils import RDLSourceTestCase


class TestRDLFormatCode(RDLSourceTestCase):

def test_desc_tags(self):
Expand All @@ -8,46 +9,67 @@ def test_desc_tags(self):
"rdlformatcode"
)

self.assertIs(root.top.get_html_desc(), None)

html = []
for i in range(0,22):
reg = root.find_by_path("rdlformatcode.r%d" % i)
html.append(reg.get_html_desc())

def p(s):
return "<p>%s</p>" % s

self.assertEqual(html[0], "<p>asdf</p>")
self.assertEqual(html[1], p("<b>asdf</b>"))
self.assertEqual(html[2], p("<i>asdf</i>"))
self.assertEqual(html[3], p("<u>asdf</u>"))
self.assertEqual(html[4], p('<span style="color:red">asdf</span>'))
self.assertEqual(html[5], p('<span style="font-size:12">asdf</span>'))
self.assertEqual(html[6], p('<a href="github.com">github.com</a>'))
self.assertEqual(html[7], p('<a href="github.com">asdf</a>'))
self.assertEqual(html[8], p('<a href="mailto:asdf@example.com">asdf@example.com</a>'))
self.assertEqual(html[9], p('<img src="image.png">'))
self.assertEqual(html[10], p('<code>asdf</code>'))
self.assertEqual(html[11], p('"asdf"'))
self.assertEqual(html[12], p('<br>[]&nbsp;'))
self.assertEqual(html[13], p("r13"))
self.assertEqual(html[14], p("r14"))

r15 = root.find_by_path("rdlformatcode.r15[1]")
self.assertEqual(r15.get_html_desc(), p("<span class='rdlfc-index'>[1]</span>"))

f = root.find_by_path("rdlformatcode.r15[2].f")
self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[2]</span>"))
f = root.find_by_path("rdlformatcode.r15.f")
self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[0:2]</span>"))

self.assertEqual(html[16], "")
self.assertEqual(html[17], "")
self.assertEqual(html[18], "<ul><li>a</li><li>b</li><li>c</li></ul>")
self.assertEqual(html[19], '<ol type="a"><li>a</li><li>b</li><li>c</li></ol>')
self.assertEqual(html[20], p("[index]"))
self.assertEqual(html[21], p("[index_parent]"))
for escape_html in [True, False, None]:
with self.subTest(escape_html=escape_html):

if escape_html is not None:
self.assertIs(root.top.get_html_desc(escape_html=escape_html), None)
else:
# escaping html needs to be off for the default behaviour so it is not
self.assertIs(root.top.get_html_desc(), None)

html = []
for i in range(0,25):
reg = root.find_by_path("rdlformatcode.r%d" % i)
if escape_html is not None:
html.append(reg.get_html_desc(escape_html=escape_html))
else:
# escaping html needs to be off for the default behaviour so it is not
html.append(reg.get_html_desc())


def p(s):
return "<p>%s</p>" % s

self.assertEqual(html[0], "<p>asdf</p>")
self.assertEqual(html[1], p("<b>asdf</b>"))
self.assertEqual(html[2], p("<i>asdf</i>"))
self.assertEqual(html[3], p("<u>asdf</u>"))
self.assertEqual(html[4], p('<span style="color:red">asdf</span>'))
self.assertEqual(html[5], p('<span style="font-size:12">asdf</span>'))
self.assertEqual(html[6], p('<a href="github.com">github.com</a>'))
self.assertEqual(html[7], p('<a href="github.com">asdf</a>'))
self.assertEqual(html[8], p('<a href="mailto:asdf@example.com">asdf@example.com</a>'))
self.assertEqual(html[9], p('<img src="image.png">'))
self.assertEqual(html[10], p('<code>asdf</code>'))
self.assertEqual(html[11], p('&quot;asdf&quot;'))
self.assertEqual(html[12], p('<br>[]&nbsp;'))
self.assertEqual(html[13], p("r13"))
self.assertEqual(html[14], p("r14"))

r15 = root.find_by_path("rdlformatcode.r15[1]")
self.assertEqual(r15.get_html_desc(), p("<span class='rdlfc-index'>[1]</span>"))

f = root.find_by_path("rdlformatcode.r15[2].f")
self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[2]</span>"))
f = root.find_by_path("rdlformatcode.r15.f")
self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[0:2]</span>"))

self.assertEqual(html[16], "")
self.assertEqual(html[17], "")
self.assertEqual(html[18], "<ul><li>a</li><li>b</li><li>c</li></ul>")
self.assertEqual(html[19], '<ol type="a"><li>a</li><li>b</li><li>c</li></ol>')
self.assertEqual(html[20], p("[index]"))
self.assertEqual(html[21], p("[index_parent]"))

if escape_html is True:
self.assertEqual(html[22], p("string with a &quot;quote&quot; in it"))
self.assertEqual(html[23], p("tag to be escaped &lt;h1&gt; h1"))
else:
self.assertEqual(html[22], p("string with a \"quote\" in it"))
self.assertEqual(html[23], p("tag to be escaped <h1> h1"))
# the & character is escaped using the default Markdown processing anyway
self.assertEqual(html[24], p("signal &amp;lt"))


def test_name_tags(self):
Expand All @@ -56,25 +78,46 @@ def test_name_tags(self):
"rdlformatcode"
)

self.assertIs(root.top.get_html_name(), None)

html = []
for i in range(0,20):
reg = root.find_by_path("rdlformatcode.r%d" % i)
html.append(reg.get_html_name())

self.assertEqual(html[1], "<b>asdf</b>")
self.assertEqual(html[2], "<i>asdf</i>")
self.assertEqual(html[3], "<u>asdf</u>")
self.assertEqual(html[4], '<span style="color:red">asdf</span>')
self.assertEqual(html[5], '<span style="font-size:12">asdf</span>')
self.assertEqual(html[6], '<a href="github.com">github.com</a>')
self.assertEqual(html[7], '<a href="github.com">asdf</a>')
self.assertEqual(html[8], '<a href="mailto:asdf@example.com">asdf@example.com</a>')
self.assertEqual(html[10], '<code>asdf</code>')
self.assertEqual(html[11], '"asdf"')
self.assertEqual(html[12], '[]&nbsp;')
self.assertEqual(html[14], "r14")

self.assertEqual(html[16], "")
self.assertEqual(html[17], "")
for escape_html in [True, False, None]:
with self.subTest(escape_html=escape_html):

if escape_html is not None:
self.assertIs(root.top.get_html_name(escape_html=escape_html), None)
else:
# escaping html needs to be off for the default behaviour so it is not
self.assertIs(root.top.get_html_name(), None)

html = []
for i in range(0,25):
reg = root.find_by_path("rdlformatcode.r%d" % i)
if escape_html is not None:
html.append(reg.get_html_name(escape_html=escape_html))
else:
# escaping html needs to be off for the default behaviour so it is not
html.append(reg.get_html_name())

self.assertEqual(html[1], "<b>asdf</b>")
self.assertEqual(html[2], "<i>asdf</i>")
self.assertEqual(html[3], "<u>asdf</u>")
self.assertEqual(html[4], '<span style="color:red">asdf</span>')
self.assertEqual(html[5], '<span style="font-size:12">asdf</span>')
self.assertEqual(html[6], '<a href="github.com">github.com</a>')
self.assertEqual(html[7], '<a href="github.com">asdf</a>')
self.assertEqual(html[8], '<a href="mailto:asdf@example.com">asdf@example.com</a>')
self.assertEqual(html[10], '<code>asdf</code>')
self.assertEqual(html[11], '&quot;asdf&quot;')
self.assertEqual(html[12], '[]&nbsp;')
self.assertEqual(html[14], "r14")

self.assertEqual(html[16], "")
self.assertEqual(html[17], "")

if escape_html is True:
self.assertEqual(html[22], "string with a &quot;quote&quot; in it")
self.assertEqual(html[23], "tag to be escaped &lt;h1&gt; h1")
self.assertEqual(html[24], "signal &amp;lt")
else:
self.assertEqual(html[22], "string with a \"quote\" in it")
self.assertEqual(html[23], "tag to be escaped <h1> h1")
self.assertEqual(html[24], "signal &lt")

Loading