From 161b8fd8e506ea6aec734fae4bb8e96ce91dcf99 Mon Sep 17 00:00:00 2001 From: Baptiste Mispelon Date: Thu, 17 Apr 2025 08:11:51 +0200 Subject: [PATCH] Fixed escaping of alt text in ContentFormat.img() --- blog/models.py | 23 ++++++++++++++++++++--- blog/tests.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/blog/models.py b/blog/models.py index 688803936..657a19851 100644 --- a/blog/models.py +++ b/blog/models.py @@ -1,3 +1,4 @@ +import re from urllib.parse import urlparse from django.conf import settings @@ -6,6 +7,7 @@ from django.test import RequestFactory from django.utils import timezone from django.utils.cache import _generate_cache_header_key +from django.utils.html import format_html from django.utils.translation import gettext_lazy as _ from django_hosts.resolvers import reverse from docutils.core import publish_parts @@ -21,12 +23,27 @@ } BLOG_DOCUTILS_SETTINGS.update(getattr(settings, "BLOG_DOCUTILS_SETTINGS", {})) +# List copied from: +# https://github.com/Python-Markdown/markdown/blob/3.8/markdown/core.py#L112 +_MD_ESCAPE_CHARS = "\\`*_{}[]>()#+-.!" +_MD_ESCAPE_REGEX = re.compile(f"[{re.escape(_MD_ESCAPE_CHARS)}]") + def _md_slugify(value, separator): # matches the `id_prefix` setting of BLOG_DOCUTILS_SETTINGS return "s" + separator + _md_title_slugify(value, separator) +def _md_escape(s): + # Add a backslash \ before any reserved characters + return _MD_ESCAPE_REGEX.sub(r"\\\g<0>", s) + + +def _rst_escape(s): + # New lines mess up rst, it's easier to replace them with spaces. + return s.replace("\n", " ") + + class EntryQuerySet(models.QuerySet): def published(self): return self.active().filter(pub_date__lte=timezone.now()) @@ -70,9 +87,9 @@ def img(self, url, alt_text): """ CF = type(self) return { - CF.REST: f".. image:: {url}\n :alt: {alt_text}", - CF.HTML: f'{alt_text}', - CF.MARKDOWN: f"![{alt_text}]({url})", + CF.REST: f".. image:: {url}\n :alt: {_rst_escape(alt_text)}", + CF.HTML: format_html('{}', url, alt_text), + CF.MARKDOWN: f"![{_md_escape(alt_text)}]({url})", }[self] diff --git a/blog/tests.py b/blog/tests.py index e63366a12..5d407cc46 100644 --- a/blog/tests.py +++ b/blog/tests.py @@ -274,3 +274,44 @@ def test_contentformat_image_tags(self): cf.img(url="/test/image.png", alt_text="TEST"), expected, ) + + def test_alt_text_html_escape(self): + testdata = [ + (ContentFormat.HTML, 'te"st', 'te"st'), + (ContentFormat.HTML, "te", 'te<st>'), + (ContentFormat.MARKDOWN, 'te"st', 'te"st'), + (ContentFormat.MARKDOWN, "te[st]", 'te[st]'), + (ContentFormat.MARKDOWN, "te{st}", 'te{st}'), + (ContentFormat.MARKDOWN, "te", 'te<st>'), + (ContentFormat.MARKDOWN, "test*", 'test*'), + (ContentFormat.MARKDOWN, "test_", 'test_'), + (ContentFormat.MARKDOWN, "test`", 'test`'), + (ContentFormat.MARKDOWN, "test+", 'test+'), + (ContentFormat.MARKDOWN, "test-", 'test-'), + (ContentFormat.MARKDOWN, "test.", 'test.'), + (ContentFormat.MARKDOWN, "test!", 'test!'), + (ContentFormat.MARKDOWN, "te\nst", 'te\nst'), + (ContentFormat.REST, 'te"st', 'te"st'), + (ContentFormat.REST, "te[st]", 'te[st]'), + (ContentFormat.REST, "te{st}", 'te{st}'), + (ContentFormat.REST, "te", 'te<st>'), + (ContentFormat.REST, "te:st", 'te:st'), + (ContentFormat.REST, "test*", 'test*'), + (ContentFormat.REST, "test_", 'test_'), + (ContentFormat.REST, "test`", 'test`'), + (ContentFormat.REST, "test+", 'test+'), + (ContentFormat.REST, "test-", 'test-'), + (ContentFormat.REST, "test.", 'test.'), + (ContentFormat.REST, "test!", 'test!'), + (ContentFormat.REST, "te\nst", 'te st'), + ] + for cf, alt_text, expected in testdata: + # RST doesn't like an empty src, so we use . instead + img_tag = cf.img(url=".", alt_text=alt_text) + if cf is ContentFormat.MARKDOWN: + expected = f"

{expected}

" + with self.subTest(cf=cf, alt_text=alt_text): + self.assertHTMLEqual( + ContentFormat.to_html(cf, img_tag), + expected, + )