From 16ccc52296979bc6956ff450e92148f29965fa64 Mon Sep 17 00:00:00 2001
From: Waylan Limberg
Date: Wed, 6 Mar 2024 15:33:18 -0500
Subject: [PATCH 1/2] Explicitly omit carot and backslash from abbr
Fixes #1444.
---
docs/changelog.md | 1 +
docs/extensions/abbreviations.md | 8 +++
markdown/extensions/abbr.py | 15 +++---
tests/test_extensions.py | 29 -----------
tests/test_syntax/extensions/test_abbr.py | 61 +++++++++++++++++++++++
5 files changed, 76 insertions(+), 38 deletions(-)
diff --git a/docs/changelog.md b/docs/changelog.md
index 33f05ce8f..7676c7864 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Include `scripts/*.py` in the generated source tarballs (#1430).
* Ensure lines after heading in loose list are properly detabbed (#1443).
* Give smarty tree processor higher priority than toc (#1440).
+* Explicitly omit carot (`^`) and backslash (`\`) from abbreviations (#1444).
## [3.5.2] -- 2024-01-10
diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md
index d03651f01..a460f4212 100644
--- a/docs/extensions/abbreviations.md
+++ b/docs/extensions/abbreviations.md
@@ -36,6 +36,14 @@ will be rendered as:
is maintained by the W3C.
```
+The following three characters are not permitted in an abbreviation. Any
+abbreviation defninitiosn which include one will not be recognized as an
+abbreviation definition.
+
+1. carrot (`^`)
+2. backslash (`\`)
+3. left square bracket (`]`)
+
Usage
-----
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index 738368afe..46d3f35cd 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -41,7 +41,7 @@ def extendMarkdown(self, md):
class AbbrPreprocessor(BlockProcessor):
""" Abbreviation Preprocessor - parse text for abbr references. """
- RE = re.compile(r'^[*]\[(?P[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE)
+ RE = re.compile(r'^[*]\[(?P[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE)
def test(self, parent: etree.Element, block: str) -> bool:
return True
@@ -73,18 +73,15 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
def _generate_pattern(self, text: str) -> str:
"""
- Given a string, returns an regex pattern to match that string.
+ Given a string, returns a regex pattern to match that string.
- 'HTML' -> r'(?P[H][T][M][L])'
+ 'HTML' -> r'(?P\b[H][T][M][L]\b)'
- Note: we force each char as a literal match (in brackets) as we don't
- know what they will be beforehand.
+ Note: we force each char as a literal match via a character set (in brackets)
+ as we don't know what they will be beforehand.
"""
- chars = list(text)
- for i in range(len(chars)):
- chars[i] = r'[%s]' % chars[i]
- return r'(?P\b%s\b)' % (r''.join(chars))
+ return f"(?P\\b{ ''.join(f'[{ c }]' for c in text) }\\b)"
class AbbrInlineProcessor(InlineProcessor):
diff --git a/tests/test_extensions.py b/tests/test_extensions.py
index a9e789f19..c96772fff 100644
--- a/tests/test_extensions.py
+++ b/tests/test_extensions.py
@@ -85,35 +85,6 @@ def testConfigAsKwargsOnInit(self):
self.assertEqual(ext.getConfigs(), {'foo': 'baz', 'bar': 'blah'})
-class TestAbbr(unittest.TestCase):
- """ Test abbr extension. """
-
- def setUp(self):
- self.md = markdown.Markdown(extensions=['abbr'])
-
- def testSimpleAbbr(self):
- """ Test Abbreviations. """
- text = 'Some text with an ABBR and a REF. Ignore REFERENCE and ref.' + \
- '\n\n*[ABBR]: Abbreviation\n' + \
- '*[REF]: Abbreviation Reference'
- self.assertEqual(
- self.md.convert(text),
- 'Some text with an ABBR '
- 'and a REF. Ignore '
- 'REFERENCE and ref.
'
- )
-
- def testNestedAbbr(self):
- """ Test Nested Abbreviations. """
- text = '[ABBR](/foo) and _ABBR_\n\n' + \
- '*[ABBR]: Abbreviation'
- self.assertEqual(
- self.md.convert(text),
- 'ABBR '
- 'and ABBR
'
- )
-
-
class TestMetaData(unittest.TestCase):
""" Test `MetaData` extension. """
diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py
index fbb25ffb2..708af51ba 100644
--- a/tests/test_syntax/extensions/test_abbr.py
+++ b/tests/test_syntax/extensions/test_abbr.py
@@ -95,6 +95,25 @@ def test_abbr_override(self):
)
)
+ def test_abbr_nested(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ [ABBR](/foo)
+
+ _ABBR_
+
+ *[ABBR]: Abbreviation
+ """
+ ),
+ self.dedent(
+ """
+ ABBR
+ ABBR
+ """
+ )
+ )
+
def test_abbr_no_blank_Lines(self):
self.assertMarkdownRenders(
self.dedent(
@@ -240,3 +259,45 @@ def test_abbr_single_quoted(self):
"""
)
)
+
+ def test_abbr_ignore_special_chars(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ r"""
+ [^] [\\] [\]] []]
+
+ *[^]: Not an abbreviation
+
+ *[\\]: Not an abbreviation
+
+ *[\]]: Not an abbreviation
+
+ *[]]: Not an abbreviation
+ """
+ ),
+ self.dedent(
+ r"""
+ [^] [\] []] []]
+ *[^]: Not an abbreviation
+ *[\]: Not an abbreviation
+ *[]]: Not an abbreviation
+ *[]]: Not an abbreviation
+ """
+ )
+ )
+
+ def test_abbr_hyphen(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ ABBR-abbr
+
+ *[ABBR-abbr]: Abbreviation
+ """
+ ),
+ self.dedent(
+ """
+ ABBR-abbr
+ """
+ )
+ )
From 4decdfe2ce8eff81091d1f0186b7be12eb968140 Mon Sep 17 00:00:00 2001
From: Waylan Limberg
Date: Wed, 6 Mar 2024 15:39:48 -0500
Subject: [PATCH 2/2] cleanup
---
docs/changelog.md | 2 +-
docs/extensions/abbreviations.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/changelog.md b/docs/changelog.md
index 7676c7864..8deaefd28 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Include `scripts/*.py` in the generated source tarballs (#1430).
* Ensure lines after heading in loose list are properly detabbed (#1443).
* Give smarty tree processor higher priority than toc (#1440).
-* Explicitly omit carot (`^`) and backslash (`\`) from abbreviations (#1444).
+* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444).
## [3.5.2] -- 2024-01-10
diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md
index a460f4212..9a98a91bd 100644
--- a/docs/extensions/abbreviations.md
+++ b/docs/extensions/abbreviations.md
@@ -37,7 +37,7 @@ is maintained by the W3C.
```
The following three characters are not permitted in an abbreviation. Any
-abbreviation defninitiosn which include one will not be recognized as an
+abbreviation definitions which include one will not be recognized as an
abbreviation definition.
1. carrot (`^`)