Skip to content

Commit

Permalink
Added a glossary option for the abbr extension
Browse files Browse the repository at this point in the history
The glossary file also uses the Markdown
abbreviation syntax (`AbbrBlockprocessor` is
used to process the file) and keeps the glossary
definitions separate from the page definitions,
allowing the glossary to be applied to every page
while only being processed once.
  • Loading branch information
nbanyan committed Jun 3, 2024
1 parent 9fed7aa commit 2009c02
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 8 deletions.
12 changes: 7 additions & 5 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,18 @@ Abbreviations are now sorted by length before executing `AbbrTreeprocessor`
to ensure that multi-word abbreviations are implemented even if an abbreviation
exists for one of those component words. (#1465)

Added an optional `use_last_abbr` configuration option to the abbreviations
extension. Default (`True`) maintains the existing behavior. `False` causes
the extension to only use the first instance of an abbreviation, rather than
the last.

Empty abbreviations are now skipped by `AbbrTreeprocessor`. This avoids applying
abbr tags to text without a title value. This also allows disabling an
abbreviation, which may be useful for documents that uses two terms with
identical abbreviations.

Added an optional `glossary` configuration option to the abbreviations extension.
This provides a simple and efficient way to apply abbreviations to every page.

Added an optional `use_last_abbr` configuration option to the abbreviations
extension. Default (`True`) maintains the existing behavior. `False` causes
the extension to only use the first instance of an abbreviation, rather than
the last.


### Fixed
Expand Down
15 changes: 14 additions & 1 deletion docs/extensions/abbreviations.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,20 @@ Usage
See [Extensions](index.md) for general extension usage. Use `abbr` as the name
of the extension.

This extension does not accept any special configuration options.
The following options are provided to configure the output:

* **`use_last_abbr`**:
`True` to use the last instance of an abbreviation, rather than the first instance.

This is useful when auto-appending glossary files to pages while still wanting the page's
abbreviations to take precedence. Not recommended for use with the `glossary` option.

* **`glossary`**:
Path to a Markdown file containing abbreviations to be applied to every page.

The abbreviations from this file will be the default abbreviations applied to every page with
abbreviations defined on the page taking precedence (unless also using `use_last_abbr`). The
glossary syntax should use the same Markdown syntax described on this page.

A trivial example:

Expand Down
32 changes: 31 additions & 1 deletion markdown/extensions/abbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from __future__ import annotations

import codecs
from . import Extension
from ..util import parseBoolValue
from ..blockprocessors import BlockProcessor
Expand All @@ -48,17 +49,46 @@ def __init__(self, **kwargs):
'True to use the last instance of an abbreviation, rather than the first instance.'
'Default: `True`.'
],
'glossary': [
'',
'Path to the Markdown file containing abbreviations to be applied to every page.'
"Default: `''`"
],
}
""" Default configuration options. """
super().__init__(**kwargs)
self.abbrs = {}
self.glossary = {}

def reset(self):
""" Clear all previously defined abbreviations. """
self.abbrs.clear()
if (self.glossary):
self.abbrs.update(self.glossary)

def load_glossary(self, md: Markdown, filename: str):
if filename and isinstance(filename, str):
input_file = codecs.open(filename, mode="r", encoding='utf-8')
text = input_file.read()
input_file.close()
text = text.lstrip('\ufeff') # remove the byte-order mark
try:
text = str(text)
except UnicodeDecodeError as e: # pragma: no cover
# Customize error message while maintaining original traceback
e.reason += '. -- Note: Markdown only accepts Unicode input!'
raise
lines = text.split("\n")

bp = AbbrBlockprocessor(md.parser, self.glossary, self.getConfigs())
for line in lines:
bp.run(None, [line])

def extendMarkdown(self, md):
""" Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
if (self.config['glossary'][0]):
self.load_glossary(md, self.config['glossary'][0])
self.abbrs.update(self.glossary)
md.registerExtension(self)
md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs, self.getConfigs()), 'abbr', 16)
Expand All @@ -85,7 +115,7 @@ def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -
el.insert(0, abbr)
text = text[:m.start()]
el.text = text
if parent and el.tail:
if parent is not None and el.tail:
tail = el.tail
index = list(parent).index(el) + 1
for m in reversed(list(self.RE.finditer(tail))):
Expand Down
38 changes: 37 additions & 1 deletion tests/test_syntax/extensions/test_abbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
License: BSD (see LICENSE.md for details).
"""

import os
from tempfile import mkstemp
import atexit
from markdown.test_tools import TestCase
from markdown import Markdown
from markdown.extensions.abbr import AbbrExtension


class TestAbbr(TestCase):
maxDiff = None

Expand Down Expand Up @@ -155,6 +157,40 @@ def test_abbr_override_Ignored(self):
extensions=[AbbrExtension(use_last_abbr=False)]
)

def test_abbr_glossary(self):
# Create temporary glossary file and set a trigger to guarantee it is deleted even if this test fails
temp_file, glossary_file = mkstemp(suffix='.md')
os.close(temp_file)
cleanup_trigger = atexit.register(os.remove, glossary_file)

with open(glossary_file, 'w', encoding='utf-8') as temp_file:
temp_file.writelines([
"*[ABBR]: Abbreviation\n",
"*[abbr]: Abbreviation\n",
"*[HTML]: Hyper Text Markup Language\n",
"*[W3C]: World Wide Web Consortium\n"
])

self.assertMarkdownRenders(
self.dedent(
"""
ABBR abbr
HTML W3C
"""
),
self.dedent(
"""
<p><abbr title="Abbreviation">ABBR</abbr> <abbr title="Abbreviation">abbr</abbr></p>
<p><abbr title="Hyper Text Markup Language">HTML</abbr> <abbr title="World Wide Web Consortium">W3C</abbr></p>
"""
),
extensions=[AbbrExtension(glossary=glossary_file)]
)
# cleanup
os.remove(glossary_file)
atexit.unregister(cleanup_trigger)

def test_abbr_nested(self):
self.assertMarkdownRenders(
self.dedent(
Expand Down

0 comments on commit 2009c02

Please sign in to comment.