Skip to content

Fixed escaping of alt text in ContentFormat.img() #2036

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions blog/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from urllib.parse import urlparse

from django.conf import settings
Expand All @@ -6,6 +7,7 @@
from django.test import RequestFactory
from django.utils import timezone
from django.utils.cache import _generate_cache_header_key
from django.utils.html import format_html
from django.utils.translation import gettext_lazy as _
from django_hosts.resolvers import reverse
from docutils.core import publish_parts
Expand All @@ -21,12 +23,27 @@
}
BLOG_DOCUTILS_SETTINGS.update(getattr(settings, "BLOG_DOCUTILS_SETTINGS", {}))

# List copied from:
# https://github.com/Python-Markdown/markdown/blob/3.8/markdown/core.py#L112
_MD_ESCAPE_CHARS = "\\`*_{}[]>()#+-.!"
_MD_ESCAPE_REGEX = re.compile(f"[{re.escape(_MD_ESCAPE_CHARS)}]")


def _md_slugify(value, separator):
# matches the `id_prefix` setting of BLOG_DOCUTILS_SETTINGS
return "s" + separator + _md_title_slugify(value, separator)


def _md_escape(s):
# Add a backslash \ before any reserved characters
return _MD_ESCAPE_REGEX.sub(r"\\\g<0>", s)


def _rst_escape(s):
# New lines mess up rst, it's easier to replace them with spaces.
return s.replace("\n", " ")


class EntryQuerySet(models.QuerySet):
def published(self):
return self.active().filter(pub_date__lte=timezone.now())
Expand Down Expand Up @@ -70,9 +87,9 @@ def img(self, url, alt_text):
"""
CF = type(self)
return {
CF.REST: f".. image:: {url}\n :alt: {alt_text}",
CF.HTML: f'<img src="{url}" alt="{alt_text}">',
CF.MARKDOWN: f"![{alt_text}]({url})",
CF.REST: f".. image:: {url}\n :alt: {_rst_escape(alt_text)}",
CF.HTML: format_html('<img src="{}" alt="{}">', url, alt_text),
CF.MARKDOWN: f"![{_md_escape(alt_text)}]({url})",
}[self]


Expand Down
41 changes: 41 additions & 0 deletions blog/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,44 @@ def test_contentformat_image_tags(self):
cf.img(url="/test/image.png", alt_text="TEST"),
expected,
)

def test_alt_text_html_escape(self):
testdata = [
(ContentFormat.HTML, 'te"st', '<img src="." alt="te&quot;st">'),
(ContentFormat.HTML, "te<st>", '<img src="." alt="te&lt;st&gt;">'),
(ContentFormat.MARKDOWN, 'te"st', '<img src="." alt="te&quot;st">'),
(ContentFormat.MARKDOWN, "te[st]", '<img src="." alt="te[st]">'),
(ContentFormat.MARKDOWN, "te{st}", '<img src="." alt="te{st}">'),
(ContentFormat.MARKDOWN, "te<st>", '<img src="." alt="te&lt;st&gt;">'),
(ContentFormat.MARKDOWN, "test*", '<img src="." alt="test*">'),
(ContentFormat.MARKDOWN, "test_", '<img src="." alt="test_">'),
(ContentFormat.MARKDOWN, "test`", '<img src="." alt="test`">'),
(ContentFormat.MARKDOWN, "test+", '<img src="." alt="test+">'),
(ContentFormat.MARKDOWN, "test-", '<img src="." alt="test-">'),
(ContentFormat.MARKDOWN, "test.", '<img src="." alt="test.">'),
(ContentFormat.MARKDOWN, "test!", '<img src="." alt="test!">'),
(ContentFormat.MARKDOWN, "te\nst", '<img src="." alt="te\nst">'),
(ContentFormat.REST, 'te"st', '<img src="." alt="te&quot;st">'),
(ContentFormat.REST, "te[st]", '<img src="." alt="te[st]">'),
(ContentFormat.REST, "te{st}", '<img src="." alt="te{st}">'),
(ContentFormat.REST, "te<st>", '<img src="." alt="te&lt;st&gt;">'),
(ContentFormat.REST, "te:st", '<img src="." alt="te:st">'),
(ContentFormat.REST, "test*", '<img src="." alt="test*">'),
(ContentFormat.REST, "test_", '<img src="." alt="test_">'),
(ContentFormat.REST, "test`", '<img src="." alt="test`">'),
(ContentFormat.REST, "test+", '<img src="." alt="test+">'),
(ContentFormat.REST, "test-", '<img src="." alt="test-">'),
(ContentFormat.REST, "test.", '<img src="." alt="test.">'),
(ContentFormat.REST, "test!", '<img src="." alt="test!">'),
(ContentFormat.REST, "te\nst", '<img src="." alt="te st">'),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to support newlines in alt text? I'm having trouble verifying that newlines would have any affect on rendering.

]
for cf, alt_text, expected in testdata:
# RST doesn't like an empty src, so we use . instead
img_tag = cf.img(url=".", alt_text=alt_text)
if cf is ContentFormat.MARKDOWN:
expected = f"<p>{expected}</p>"
with self.subTest(cf=cf, alt_text=alt_text):
self.assertHTMLEqual(
ContentFormat.to_html(cf, img_tag),
expected,
)