SystemRDL · krcb197 · Feb 24, 2025 · Feb 24, 2025 · Mar 1, 2025
diff --git a/src/systemrdl/__about__.py b/src/systemrdl/__about__.py
@@ -1 +1 @@
-__version__ = "1.29.3"
+__version__ = "1.29.4"
diff --git a/src/systemrdl/core/rdlformatcode.py b/src/systemrdl/core/rdlformatcode.py
@@ -1,22 +1,31 @@
 import re
 from typing import TYPE_CHECKING, Optional
+import html
 
 from . import helpers
 from ..node import Node, AddressableNode
 
 if TYPE_CHECKING:
     from markdown import Markdown
 
-def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']=None, is_desc: bool=True) -> str:
+def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']=None,
+                  is_desc: bool=True, escape_html: bool=False) -> str:
     """
     Convert an RDLFormatCode string to HTML
     """
 
+    # --------------------------------------------------------------------------
+    # Escape any characters which may cause problems when HTML is interpreted
+    # --------------------------------------------------------------------------
+    if escape_html:
+        text = html.escape(text, quote=True)
+
     # --------------------------------------------------------------------------
     # Remove any common indentation
     # --------------------------------------------------------------------------
     text = helpers.dedent_text(text)
 
+
     # --------------------------------------------------------------------------
     # Parse and replace RDLFormatCode Tags
     # --------------------------------------------------------------------------
@@ -160,9 +169,9 @@ def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']
                 list_end_tag.pop()
 
         elif m.lastgroup == 'quote':
-            text_segs.append('"')
+            text_segs.append('&quot;')
         elif m.lastgroup == 'xquote':
-            text_segs.append('"')
+            text_segs.append('&quot;')
         elif m.lastgroup == 'br':
             text_segs.append("<br>")
         elif m.lastgroup == 'lb':

diff --git a/src/systemrdl/node.py b/src/systemrdl/node.py
@@ -648,7 +648,7 @@ def get_rel_path(self, ref: 'Node', uplevel: str="^", hier_separator: str=".", a
         return hier_separator.join(self_segs_fmt)
 
 
-    def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[str]:
+    def get_html_desc(self, markdown_inst: Optional['Markdown']=None, escape_html: bool=False) -> Optional[str]:
         """
         Translates the node's 'desc' property into HTML.
 
@@ -665,6 +665,11 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
             Override the class instance of the Markdown processor.
             See the `Markdown module <https://python-markdown.github.io/reference/#Markdown>`_
             for more details.
+        escape_html:
+            The desc property from the SystemRDL is passed through the python `html.escape`
+            function before processing. This can help avoid cases where plain text inadvertently
+            contains syntax that will result in undesirable behaviour when rendered. This option
+            should not be turned on if html tags are intentionally included in the `desc` property
 
         Returns
         -------
@@ -679,15 +684,23 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
         desc_str = self.get_property('desc')
         if desc_str is None:
             return None
-        return rdlformatcode.rdlfc_to_html(desc_str, self, md=markdown_inst)
+        return rdlformatcode.rdlfc_to_html(desc_str, self, md=markdown_inst, escape_html=escape_html)
 
 
-    def get_html_name(self) -> Optional[str]:
+    def get_html_name(self, escape_html: bool=False) -> Optional[str]:
         """
         Translates the node's 'name' property into HTML.
 
         Any RDLFormatCode tags used are converted to HTML.
 
+        Parameters
+        ----------
+        escape_html:
+            The desc property from the SystemRDL is passed through the python `html.escape`
+            function before processing. This can help avoid cases where plain text inadvertently
+            contains syntax that will result in undesirable behaviour when rendered. This option
+            should not be turned on if html tags are intentionally included in the `name` property
+
         Returns
         -------
         str or None
@@ -700,7 +713,7 @@ def get_html_name(self) -> Optional[str]:
         name_str = self.get_property('name', default=None)
         if name_str is None:
             return None
-        return rdlformatcode.rdlfc_to_html(name_str, self, is_desc=False)
+        return rdlformatcode.rdlfc_to_html(name_str, self, is_desc=False, escape_html=escape_html)
 
 
     @property

diff --git a/src/systemrdl/rdltypes/user_enum.py b/src/systemrdl/rdltypes/user_enum.py
@@ -217,7 +217,7 @@ def rdl_desc(self) -> Optional[str]:
         """
         return self._rdl_desc
 
-    def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[str]:
+    def get_html_desc(self, markdown_inst: Optional['Markdown']=None, escape_html: bool=False) -> Optional[str]:
         """
         Translates the enum's 'desc' property into HTML.
 
@@ -234,6 +234,11 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
             Override the class instance of the Markdown processor.
             See the `Markdown module <https://python-markdown.github.io/reference/#Markdown>`_
             for more details.
+        escape_html:
+            The desc property from the SystemRDL is passed through the python `html.escape`
+            function before processing. This can help avoid cases where plain text inadvertently
+            contains syntax that will result in undesirable behaviour when rendered. This option
+            should not be turned on if html tags are intentionally included in the `desc` property
 
         Returns
         -------
@@ -248,14 +253,22 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
         desc_str = self._rdl_desc
         if desc_str is None:
             return None
-        return rdlformatcode.rdlfc_to_html(desc_str, md=markdown_inst)
+        return rdlformatcode.rdlfc_to_html(desc_str, md=markdown_inst, escape_html=escape_html)
 
-    def get_html_name(self) -> Optional[str]:
+    def get_html_name(self, escape_html: bool=False) -> Optional[str]:
         """
         Translates the enum's 'name' property into HTML.
 
         Any RDLFormatCode tags used are converted to HTML.
 
+        Parameters
+        ----------
+        escape_html:
+            The desc property from the SystemRDL is passed through the python `html.escape`
+            function before processing. This can help avoid cases where plain text inadvertently
+            contains syntax that will result in undesirable behaviour when rendered. This option
+            should not be turned on if html tags are intentionally included in the `name` property
+
         Returns
         -------
         str or None
@@ -268,7 +281,7 @@ def get_html_name(self) -> Optional[str]:
         name_str = self._rdl_name
         if name_str is None:
             return None
-        return rdlformatcode.rdlfc_to_html(name_str, is_desc=False)
+        return rdlformatcode.rdlfc_to_html(name_str, is_desc=False, escape_html=escape_html)
 
 
 # Tell pickle how to reduce dynamically generated UserEnum classes

diff --git a/test/rdl_src/rdlformatcode.rdl b/test/rdl_src/rdlformatcode.rdl
@@ -36,6 +36,11 @@ addrmap rdlformatcode {
     r20->desc = "[index]";
     r21->desc = "[index_parent]";
 
+    reg_t r22,r23,r24;
+    r22->desc = "string with a \"quote\" in it";
+    r23->desc = "tag to be escaped <h1> h1";
+    r24->desc = "signal &lt";
+
 
     r1->name = "[b]asdf[/b]";
     r2->name = "[i]asdf[/i]";
@@ -52,4 +57,7 @@ addrmap rdlformatcode {
     r16->name = "";
     r17->name = "
     ";
+    r22->name = "string with a \"quote\" in it";
+    r23->name = "tag to be escaped <h1> h1";
+    r24->name = "signal &lt";
 };
diff --git a/test/test_enums.py b/test/test_enums.py
@@ -30,7 +30,8 @@ def test_enums(self):
         self.assertIsNone(f_default_enum['four'].get_html_name())
         self.assertIsNone(f_default_enum['four'].get_html_desc())
 
-        self.assertEqual(f_default_enum['five'].get_html_name(), "five's <b>name</b>")
+        self.assertEqual(f_default_enum['five'].get_html_name(escape_html=False), "five's <b>name</b>")
+        self.assertEqual(f_default_enum['five'].get_html_name(escape_html=True),"five&#x27;s <b>name</b>")
         self.assertEqual(f_default_enum['five'].get_html_desc(), "<p>this is five</p>")
 
         f0 = root.find_by_path("enum_test1.reg2.f0")

diff --git a/test/test_rdlformatcode.py b/test/test_rdlformatcode.py
@@ -1,5 +1,6 @@
 from unittest_utils import RDLSourceTestCase
 
+
 class TestRDLFormatCode(RDLSourceTestCase):
 
     def test_desc_tags(self):
@@ -8,46 +9,67 @@ def test_desc_tags(self):
             "rdlformatcode"
         )
 
-        self.assertIs(root.top.get_html_desc(), None)
-
-        html = []
-        for i in range(0,22):
-            reg = root.find_by_path("rdlformatcode.r%d" % i)
-            html.append(reg.get_html_desc())
-
-        def p(s):
-            return "<p>%s</p>" % s
-
-        self.assertEqual(html[0], "<p>asdf</p>")
-        self.assertEqual(html[1], p("<b>asdf</b>"))
-        self.assertEqual(html[2], p("<i>asdf</i>"))
-        self.assertEqual(html[3], p("<u>asdf</u>"))
-        self.assertEqual(html[4], p('<span style="color:red">asdf</span>'))
-        self.assertEqual(html[5], p('<span style="font-size:12">asdf</span>'))
-        self.assertEqual(html[6], p('<a href="github.com">github.com</a>'))
-        self.assertEqual(html[7], p('<a href="github.com">asdf</a>'))
-        self.assertEqual(html[8], p('<a href="mailto:asdf@example.com">asdf@example.com</a>'))
-        self.assertEqual(html[9], p('<img src="image.png">'))
-        self.assertEqual(html[10], p('<code>asdf</code>'))
-        self.assertEqual(html[11], p('"asdf"'))
-        self.assertEqual(html[12], p('<br>[]&nbsp;'))
-        self.assertEqual(html[13], p("r13"))
-        self.assertEqual(html[14], p("r14"))
-
-        r15 = root.find_by_path("rdlformatcode.r15[1]")
-        self.assertEqual(r15.get_html_desc(), p("<span class='rdlfc-index'>[1]</span>"))
-
-        f = root.find_by_path("rdlformatcode.r15[2].f")
-        self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[2]</span>"))
-        f = root.find_by_path("rdlformatcode.r15.f")
-        self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[0:2]</span>"))
-
-        self.assertEqual(html[16], "")
-        self.assertEqual(html[17], "")
-        self.assertEqual(html[18], "<ul><li>a</li><li>b</li><li>c</li></ul>")
-        self.assertEqual(html[19], '<ol type="a"><li>a</li><li>b</li><li>c</li></ol>')
-        self.assertEqual(html[20], p("[index]"))
-        self.assertEqual(html[21], p("[index_parent]"))
+        for escape_html in [True, False, None]:
+            with self.subTest(escape_html=escape_html):
+
+                if escape_html is not None:
+                    self.assertIs(root.top.get_html_desc(escape_html=escape_html), None)
+                else:
+                    # escaping html needs to be off for the default behaviour so it is not
+                    self.assertIs(root.top.get_html_desc(), None)
+
+                html = []
+                for i in range(0,25):
+                    reg = root.find_by_path("rdlformatcode.r%d" % i)
+                    if escape_html is not None:
+                        html.append(reg.get_html_desc(escape_html=escape_html))
+                    else:
+                        # escaping html needs to be off for the default behaviour so it is not
+                        html.append(reg.get_html_desc())
+
+
+                def p(s):
+                    return "<p>%s</p>" % s
+
+                self.assertEqual(html[0], "<p>asdf</p>")
+                self.assertEqual(html[1], p("<b>asdf</b>"))
+                self.assertEqual(html[2], p("<i>asdf</i>"))
+                self.assertEqual(html[3], p("<u>asdf</u>"))
+                self.assertEqual(html[4], p('<span style="color:red">asdf</span>'))
+                self.assertEqual(html[5], p('<span style="font-size:12">asdf</span>'))
+                self.assertEqual(html[6], p('<a href="github.com">github.com</a>'))
+                self.assertEqual(html[7], p('<a href="github.com">asdf</a>'))
+                self.assertEqual(html[8], p('<a href="mailto:asdf@example.com">asdf@example.com</a>'))
+                self.assertEqual(html[9], p('<img src="image.png">'))
+                self.assertEqual(html[10], p('<code>asdf</code>'))
+                self.assertEqual(html[11], p('&quot;asdf&quot;'))
+                self.assertEqual(html[12], p('<br>[]&nbsp;'))
+                self.assertEqual(html[13], p("r13"))
+                self.assertEqual(html[14], p("r14"))
+
+                r15 = root.find_by_path("rdlformatcode.r15[1]")
+                self.assertEqual(r15.get_html_desc(), p("<span class='rdlfc-index'>[1]</span>"))
+
+                f = root.find_by_path("rdlformatcode.r15[2].f")
+                self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[2]</span>"))
+                f = root.find_by_path("rdlformatcode.r15.f")
+                self.assertEqual(f.get_html_desc(), p("<span class='rdlfc-index_parent'>[0:2]</span>"))
+
+                self.assertEqual(html[16], "")
+                self.assertEqual(html[17], "")
+                self.assertEqual(html[18], "<ul><li>a</li><li>b</li><li>c</li></ul>")
+                self.assertEqual(html[19], '<ol type="a"><li>a</li><li>b</li><li>c</li></ol>')
+                self.assertEqual(html[20], p("[index]"))
+                self.assertEqual(html[21], p("[index_parent]"))
+
+                if escape_html is True:
+                    self.assertEqual(html[22], p("string with a &quot;quote&quot; in it"))
+                    self.assertEqual(html[23], p("tag to be escaped &lt;h1&gt; h1"))
+                else:
+                    self.assertEqual(html[22], p("string with a \"quote\" in it"))
+                    self.assertEqual(html[23], p("tag to be escaped <h1> h1"))
+                # the & character is escaped using the default Markdown processing anyway
+                self.assertEqual(html[24], p("signal &amp;lt"))
 
 
     def test_name_tags(self):
@@ -56,25 +78,46 @@ def test_name_tags(self):
             "rdlformatcode"
         )
 
-        self.assertIs(root.top.get_html_name(), None)
-
-        html = []
-        for i in range(0,20):
-            reg = root.find_by_path("rdlformatcode.r%d" % i)
-            html.append(reg.get_html_name())
-
-        self.assertEqual(html[1], "<b>asdf</b>")
-        self.assertEqual(html[2], "<i>asdf</i>")
-        self.assertEqual(html[3], "<u>asdf</u>")
-        self.assertEqual(html[4], '<span style="color:red">asdf</span>')
-        self.assertEqual(html[5], '<span style="font-size:12">asdf</span>')
-        self.assertEqual(html[6], '<a href="github.com">github.com</a>')
-        self.assertEqual(html[7], '<a href="github.com">asdf</a>')
-        self.assertEqual(html[8], '<a href="mailto:asdf@example.com">asdf@example.com</a>')
-        self.assertEqual(html[10], '<code>asdf</code>')
-        self.assertEqual(html[11], '"asdf"')
-        self.assertEqual(html[12], '[]&nbsp;')
-        self.assertEqual(html[14], "r14")
-
-        self.assertEqual(html[16], "")
-        self.assertEqual(html[17], "")
+        for escape_html in [True, False, None]:
+            with self.subTest(escape_html=escape_html):
+
+                if escape_html is not None:
+                    self.assertIs(root.top.get_html_name(escape_html=escape_html), None)
+                else:
+                    # escaping html needs to be off for the default behaviour so it is not
+                    self.assertIs(root.top.get_html_name(), None)
+
+                html = []
+                for i in range(0,25):
+                    reg = root.find_by_path("rdlformatcode.r%d" % i)
+                    if escape_html is not None:
+                        html.append(reg.get_html_name(escape_html=escape_html))
+                    else:
+                        # escaping html needs to be off for the default behaviour so it is not
+                        html.append(reg.get_html_name())
+
+                self.assertEqual(html[1], "<b>asdf</b>")
+                self.assertEqual(html[2], "<i>asdf</i>")
+                self.assertEqual(html[3], "<u>asdf</u>")
+                self.assertEqual(html[4], '<span style="color:red">asdf</span>')
+                self.assertEqual(html[5], '<span style="font-size:12">asdf</span>')
+                self.assertEqual(html[6], '<a href="github.com">github.com</a>')
+                self.assertEqual(html[7], '<a href="github.com">asdf</a>')
+                self.assertEqual(html[8], '<a href="mailto:asdf@example.com">asdf@example.com</a>')
+                self.assertEqual(html[10], '<code>asdf</code>')
+                self.assertEqual(html[11], '&quot;asdf&quot;')
+                self.assertEqual(html[12], '[]&nbsp;')
+                self.assertEqual(html[14], "r14")
+
+                self.assertEqual(html[16], "")
+                self.assertEqual(html[17], "")
+
+                if escape_html is True:
+                    self.assertEqual(html[22], "string with a &quot;quote&quot; in it")
+                    self.assertEqual(html[23], "tag to be escaped &lt;h1&gt; h1")
+                    self.assertEqual(html[24], "signal &amp;lt")
+                else:
+                    self.assertEqual(html[22], "string with a \"quote\" in it")
+                    self.assertEqual(html[23], "tag to be escaped <h1> h1")
+                    self.assertEqual(html[24], "signal &lt")
+