From 161b8fd8e506ea6aec734fae4bb8e96ce91dcf99 Mon Sep 17 00:00:00 2001
From: Baptiste Mispelon <baptiste.mispelon@torchbox.com>
Date: Thu, 17 Apr 2025 08:11:51 +0200
Subject: [PATCH] Fixed escaping of alt text in ContentFormat.img()

---
 blog/models.py | 23 ++++++++++++++++++++---
 blog/tests.py  | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 3 deletions(-)
diff --git a/blog/models.py b/blog/models.py
index 688803936..657a19851 100644
--- a/blog/models.py
+++ b/blog/models.py
@@ -1,3 +1,4 @@
+import re
 from urllib.parse import urlparse
 
 from django.conf import settings
@@ -6,6 +7,7 @@
 from django.test import RequestFactory
 from django.utils import timezone
 from django.utils.cache import _generate_cache_header_key
+from django.utils.html import format_html
 from django.utils.translation import gettext_lazy as _
 from django_hosts.resolvers import reverse
 from docutils.core import publish_parts
@@ -21,12 +23,27 @@
 }
 BLOG_DOCUTILS_SETTINGS.update(getattr(settings, "BLOG_DOCUTILS_SETTINGS", {}))
 
+# List copied from:
+# https://github.com/Python-Markdown/markdown/blob/3.8/markdown/core.py#L112
+_MD_ESCAPE_CHARS = "\\`*_{}[]>()#+-.!"
+_MD_ESCAPE_REGEX = re.compile(f"[{re.escape(_MD_ESCAPE_CHARS)}]")
+
 
 def _md_slugify(value, separator):
     # matches the `id_prefix` setting of BLOG_DOCUTILS_SETTINGS
     return "s" + separator + _md_title_slugify(value, separator)
 
 
+def _md_escape(s):
+    # Add a backslash \ before any reserved characters
+    return _MD_ESCAPE_REGEX.sub(r"\\\g<0>", s)
+
+
+def _rst_escape(s):
+    # New lines mess up rst, it's easier to replace them with spaces.
+    return s.replace("\n", " ")
+
+
 class EntryQuerySet(models.QuerySet):
     def published(self):
         return self.active().filter(pub_date__lte=timezone.now())
@@ -70,9 +87,9 @@ def img(self, url, alt_text):
         """
         CF = type(self)
         return {
-            CF.REST: f".. image:: {url}\n   :alt: {alt_text}",
-            CF.HTML: f'<img src="{url}" alt="{alt_text}">',
-            CF.MARKDOWN: f"![{alt_text}]({url})",
+            CF.REST: f".. image:: {url}\n   :alt: {_rst_escape(alt_text)}",
+            CF.HTML: format_html('<img src="{}" alt="{}">', url, alt_text),
+            CF.MARKDOWN: f"![{_md_escape(alt_text)}]({url})",
         }[self]
 
 
diff --git a/blog/tests.py b/blog/tests.py
index e63366a12..5d407cc46 100644
--- a/blog/tests.py
+++ b/blog/tests.py
@@ -274,3 +274,44 @@ def test_contentformat_image_tags(self):
                     cf.img(url="/test/image.png", alt_text="TEST"),
                     expected,
                 )
+
+    def test_alt_text_html_escape(self):
+        testdata = [
+            (ContentFormat.HTML, 'te"st', '<img src="." alt="te&quot;st">'),
+            (ContentFormat.HTML, "te<st>", '<img src="." alt="te&lt;st&gt;">'),
+            (ContentFormat.MARKDOWN, 'te"st', '<img src="." alt="te&quot;st">'),
+            (ContentFormat.MARKDOWN, "te[st]", '<img src="." alt="te[st]">'),
+            (ContentFormat.MARKDOWN, "te{st}", '<img src="." alt="te{st}">'),
+            (ContentFormat.MARKDOWN, "te<st>", '<img src="." alt="te&lt;st&gt;">'),
+            (ContentFormat.MARKDOWN, "test*", '<img src="." alt="test*">'),
+            (ContentFormat.MARKDOWN, "test_", '<img src="." alt="test_">'),
+            (ContentFormat.MARKDOWN, "test`", '<img src="." alt="test`">'),
+            (ContentFormat.MARKDOWN, "test+", '<img src="." alt="test+">'),
+            (ContentFormat.MARKDOWN, "test-", '<img src="." alt="test-">'),
+            (ContentFormat.MARKDOWN, "test.", '<img src="." alt="test.">'),
+            (ContentFormat.MARKDOWN, "test!", '<img src="." alt="test!">'),
+            (ContentFormat.MARKDOWN, "te\nst", '<img src="." alt="te\nst">'),
+            (ContentFormat.REST, 'te"st', '<img src="." alt="te&quot;st">'),
+            (ContentFormat.REST, "te[st]", '<img src="." alt="te[st]">'),
+            (ContentFormat.REST, "te{st}", '<img src="." alt="te{st}">'),
+            (ContentFormat.REST, "te<st>", '<img src="." alt="te&lt;st&gt;">'),
+            (ContentFormat.REST, "te:st", '<img src="." alt="te:st">'),
+            (ContentFormat.REST, "test*", '<img src="." alt="test*">'),
+            (ContentFormat.REST, "test_", '<img src="." alt="test_">'),
+            (ContentFormat.REST, "test`", '<img src="." alt="test`">'),
+            (ContentFormat.REST, "test+", '<img src="." alt="test+">'),
+            (ContentFormat.REST, "test-", '<img src="." alt="test-">'),
+            (ContentFormat.REST, "test.", '<img src="." alt="test.">'),
+            (ContentFormat.REST, "test!", '<img src="." alt="test!">'),
+            (ContentFormat.REST, "te\nst", '<img src="." alt="te st">'),
+        ]
+        for cf, alt_text, expected in testdata:
+            # RST doesn't like an empty src, so we use . instead
+            img_tag = cf.img(url=".", alt_text=alt_text)
+            if cf is ContentFormat.MARKDOWN:
+                expected = f"<p>{expected}</p>"
+            with self.subTest(cf=cf, alt_text=alt_text):
+                self.assertHTMLEqual(
+                    ContentFormat.to_html(cf, img_tag),
+                    expected,
+                )