Skip to content

Commit

Permalink
Changed Abbreviation Extension's glossary option
Browse files Browse the repository at this point in the history
to accept a dictionary instead of a file.

Changed `load_glossary` to merge in a dictionary,
overwriting any existing records.

Added a `reset_glossary` method to clear the glossary.

Removed the `use_last_abbr` option, since `glossary`
can be used to resolve the use case `use_last_abbr`
was written for.
  • Loading branch information
nbanyan committed Jun 5, 2024
1 parent 2009c02 commit 831449b
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 74 deletions.
45 changes: 14 additions & 31 deletions markdown/extensions/abbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

from __future__ import annotations

import codecs
from . import Extension
from ..util import parseBoolValue
from ..blockprocessors import BlockProcessor
Expand All @@ -44,15 +43,10 @@ class AbbrExtension(Extension):
def __init__(self, **kwargs):
""" Initiate Extension and set up configs. """
self.config = {
'use_last_abbr': [
True,
'True to use the last instance of an abbreviation, rather than the first instance.'
'Default: `True`.'
],
'glossary': [
'',
'Path to the Markdown file containing abbreviations to be applied to every page.'
"Default: `''`"
{},
'A dictionary where the `key` is the abbreviation and the `value` is the definition.'
"Default: `{}`"
],
}
""" Default configuration options. """
Expand All @@ -66,28 +60,19 @@ def reset(self):
if (self.glossary):
self.abbrs.update(self.glossary)

def load_glossary(self, md: Markdown, filename: str):
if filename and isinstance(filename, str):
input_file = codecs.open(filename, mode="r", encoding='utf-8')
text = input_file.read()
input_file.close()
text = text.lstrip('\ufeff') # remove the byte-order mark
try:
text = str(text)
except UnicodeDecodeError as e: # pragma: no cover
# Customize error message while maintaining original traceback
e.reason += '. -- Note: Markdown only accepts Unicode input!'
raise
lines = text.split("\n")

bp = AbbrBlockprocessor(md.parser, self.glossary, self.getConfigs())
for line in lines:
bp.run(None, [line])
def reset_glossary(self):
""" Clear all abbreviations from the glossary. """
self.glossary.clear()

def load_glossary(self, dictionary : dict[str, str]):
"""Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten."""
if dictionary:
self.glossary = {**dictionary, **self.glossary}

def extendMarkdown(self, md):
""" Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
if (self.config['glossary'][0]):
self.load_glossary(md, self.config['glossary'][0])
self.load_glossary(self.config['glossary'][0])
self.abbrs.update(self.glossary)
md.registerExtension(self)
md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
Expand Down Expand Up @@ -144,9 +129,8 @@ class AbbrBlockprocessor(BlockProcessor):

RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)

def __init__(self, parser: BlockParser, abbrs: dict, config: dict[str, Any]):
def __init__(self, parser: BlockParser, abbrs: dict, config: dict):
self.abbrs: dict = abbrs
self.use_last_abbr: bool = parseBoolValue(config["use_last_abbr"])
super().__init__(parser)

def test(self, parent: etree.Element, block: str) -> bool:
Expand All @@ -163,8 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
if m:
abbr = m.group('abbr').strip()
title = m.group('title').strip()
if self.use_last_abbr or abbr not in self.abbrs:
self.abbrs[abbr] = title
self.abbrs[abbr] = title
if block[m.end():].strip():
# Add any content after match back to blocks as separate block
blocks.insert(0, block[m.end():].lstrip('\n'))
Expand Down
85 changes: 42 additions & 43 deletions tests/test_syntax/extensions/test_abbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@
"""

import os
from tempfile import mkstemp
import atexit
from markdown.test_tools import TestCase
from markdown import Markdown
from markdown.extensions.abbr import AbbrExtension


class TestAbbr(TestCase):
maxDiff = None

Expand Down Expand Up @@ -121,55 +120,51 @@ def test_abbr_multiple_nested(self):
)
)

def test_abbr_override(self):
self.assertMarkdownRenders(
self.dedent(
"""
ABBR
def test_abbr_glossary(self):

*[ABBR]: Ignored
*[ABBR]: The override
"""
),
self.dedent(
"""
<p><abbr title="The override">ABBR</abbr></p>
"""
),
extensions=[AbbrExtension(use_last_abbr=True)]
)
glossary = {
"ABBR" : "Abbreviation",
"abbr" : "Abbreviation",
"HTML" : "Hyper Text Markup Language",
"W3C" : "World Wide Web Consortium"
}

def test_abbr_override_Ignored(self):
self.assertMarkdownRenders(
self.dedent(
"""
ABBR
abbr
*[ABBR]: Abbreviation
*[ABBR]: Override Ignored
HTML
W3C
"""
),
self.dedent(
"""
<p><abbr title="Abbreviation">ABBR</abbr></p>
<p><abbr title="Abbreviation">ABBR</abbr>
<abbr title="Abbreviation">abbr</abbr></p>
<p><abbr title="Hyper Text Markup Language">HTML</abbr>
<abbr title="World Wide Web Consortium">W3C</abbr></p>
"""
),
extensions=[AbbrExtension(use_last_abbr=False)]
extensions=[AbbrExtension(glossary=glossary)]
)

def test_abbr_glossary(self):
# Create temporary glossary file and set a trigger to guarantee it is deleted even if this test fails
temp_file, glossary_file = mkstemp(suffix='.md')
os.close(temp_file)
cleanup_trigger = atexit.register(os.remove, glossary_file)

with open(glossary_file, 'w', encoding='utf-8') as temp_file:
temp_file.writelines([
"*[ABBR]: Abbreviation\n",
"*[abbr]: Abbreviation\n",
"*[HTML]: Hyper Text Markup Language\n",
"*[W3C]: World Wide Web Consortium\n"
])
def test_abbr_glossary_2(self):

glossary = {
"ABBR" : "Abbreviation",
"abbr" : "Abbreviation",
"HTML" : "Hyper Text Markup Language",
"W3C" : "World Wide Web Consortium"
}

glossary_2 = {
"ABBR" : "New Abbreviation"
}

abbr_ext = AbbrExtension(glossary=glossary)
abbr_ext.load_glossary(glossary_2)

self.assertMarkdownRenders(
self.dedent(
Expand All @@ -181,15 +176,12 @@ def test_abbr_glossary(self):
),
self.dedent(
"""
<p><abbr title="Abbreviation">ABBR</abbr> <abbr title="Abbreviation">abbr</abbr></p>
<p><abbr title="New Abbreviation">ABBR</abbr> <abbr title="Abbreviation">abbr</abbr></p>
<p><abbr title="Hyper Text Markup Language">HTML</abbr> <abbr title="World Wide Web Consortium">W3C</abbr></p>
"""
),
extensions=[AbbrExtension(glossary=glossary_file)]
extensions=[abbr_ext]
)
# cleanup
os.remove(glossary_file)
atexit.unregister(cleanup_trigger)

def test_abbr_nested(self):
self.assertMarkdownRenders(
Expand Down Expand Up @@ -443,15 +435,18 @@ def test_abbr_superset_vs_subset(self):
self.dedent(
"""
abbr, SS, and abbr-SS should have different definitions.
*[abbr]: Abbreviation Definition
*[abbr-SS]: Abbreviation Superset Definition
*[SS]: Superset Definition
"""
),
self.dedent(
"""
<p><abbr title="Abbreviation Definition">abbr</abbr>, <abbr title="Superset Definition">SS</abbr>, and <abbr title="Abbreviation Superset Definition">abbr-SS</abbr> should have different definitions.</p>
<p><abbr title="Abbreviation Definition">abbr</abbr>, """
+ """<abbr title="Superset Definition">SS</abbr>, """
+ """and <abbr title="Abbreviation Superset Definition">abbr-SS</abbr> """
+ """should have different definitions.</p>
"""
)
)
Expand Down Expand Up @@ -487,3 +482,7 @@ def test_abbr_reset(self):
self.assertEqual(ext.abbrs, {})
md.convert('*[foo]: Foo Definition')
self.assertEqual(ext.abbrs, {'foo': 'Foo Definition'})

import unittest
if __name__ == '__main__':
unittest.main()

0 comments on commit 831449b

Please sign in to comment.