diff --git a/ChangeLog.rst b/ChangeLog.rst index b648fe4350..8956610209 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -15,6 +15,7 @@ UNRELEASED * Support for Python 3.10. * Fix #320 padding empty tables and tables with no tags. * Add ``ignore_mailto_links`` config option to ignore ``mailto:`` style links. +* Feature #407: Support the superscript and subscript tags. diff --git a/docs/usage.md b/docs/usage.md index 2a5b78cc39..726d3440cb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -136,6 +136,7 @@ Command line options | `--single-line-break` | Use a single line break after a block element rather than two. | `--reference-links` | Use reference links instead of inline links to create markdown | `--ignore-emphasis` | Ignore all emphasis formatting in the html. +| `--include-sup-sub` | Include `` and `` tags. | `-e`, `--asterisk-emphasis` | Use asterisk rather than underscore to emphasize text | `--unicode-snob` | Use unicode throughout instead of ASCII | `--no-automatic-links` | Do not use automatic links like diff --git a/html2text/__init__.py b/html2text/__init__.py index 9054388cfe..32bd365127 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -86,6 +86,7 @@ def __init__( self.tag_callback = None self.open_quote = config.OPEN_QUOTE # covered in cli self.close_quote = config.CLOSE_QUOTE # covered in cli + self.include_sup_sub = config.INCLUDE_SUP_SUB # covered in cli if out is None: self.out = self.outtextf @@ -716,6 +717,12 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: self.out("\n[/code]") self.p() + if tag in ["sup", "sub"] and self.include_sup_sub: + if start: + self.o("<{}>".format(tag)) + else: + self.o("".format(tag)) + # TODO: Add docstring for these one letter functions def pbr(self) -> None: "Pretty print has a line break" diff --git a/html2text/_typing.py b/html2text/_typing.py index 6e17fed2f6..eed83251cd 100644 --- a/html2text/_typing.py +++ b/html2text/_typing.py @@ -1,3 +1,2 @@ class OutCallback: - def __call__(self, s: str) -> None: - ... + def __call__(self, s: str) -> None: ... diff --git a/html2text/cli.py b/html2text/cli.py index d0c62c976c..015322743d 100644 --- a/html2text/cli.py +++ b/html2text/cli.py @@ -264,6 +264,13 @@ class bcolors: ) p.add_argument("filename", nargs="?") p.add_argument("encoding", nargs="?", default="utf-8") + p.add_argument( + "--include-sup-sub", + dest="include_sup_sub", + action="store_true", + default=config.INCLUDE_SUP_SUB, + help="Include the sup and sub tags", + ) args = p.parse_args() if args.filename and args.filename != "-": @@ -318,5 +325,6 @@ class bcolors: h.default_image_alt = args.default_image_alt h.open_quote = args.open_quote h.close_quote = args.close_quote + h.include_sup_sub = args.include_sup_sub sys.stdout.write(h.handle(html)) diff --git a/html2text/config.py b/html2text/config.py index 88d3f912f8..4069740772 100644 --- a/html2text/config.py +++ b/html2text/config.py @@ -163,3 +163,6 @@ # Use double quotation marks when converting the tag. OPEN_QUOTE = '"' CLOSE_QUOTE = '"' + +# Include the and tags +INCLUDE_SUP_SUB = False diff --git a/test/sub_tag.html b/test/sub_tag.html new file mode 100644 index 0000000000..5914bf0723 --- /dev/null +++ b/test/sub_tag.html @@ -0,0 +1,4 @@ +

+ According to the computations by Nakamura, Johnson, and Mason1 this + will result in the complete annihilation of both particles. +

diff --git a/test/sub_tag.md b/test/sub_tag.md new file mode 100644 index 0000000000..5502ac2461 --- /dev/null +++ b/test/sub_tag.md @@ -0,0 +1,2 @@ +According to the computations by Nakamura, Johnson, and Mason1 this +will result in the complete annihilation of both particles. diff --git a/test/sup_tag.html b/test/sup_tag.html new file mode 100644 index 0000000000..d2424fd573 --- /dev/null +++ b/test/sup_tag.html @@ -0,0 +1,4 @@ +

+ One of the most common equations in all of physics is E=mc2. +

diff --git a/test/sup_tag.md b/test/sup_tag.md new file mode 100644 index 0000000000..cf1910c750 --- /dev/null +++ b/test/sup_tag.md @@ -0,0 +1 @@ +One of the most common equations in all of physics is E=mc2. diff --git a/test/test_html2text.py b/test/test_html2text.py index f57cb6b1d9..ea861445ee 100644 --- a/test/test_html2text.py +++ b/test/test_html2text.py @@ -140,6 +140,11 @@ def generate_testdata(): # CLI doesn't support baseurl. cmdline_args = skip + if base_fn in ["sup_tag.html", "sub_tag.html"]: + module_args["include_sup_sub"] = True + cmdline_args.append("--include-sup-sub") + func_args = skip + yield fn, module_args, cmdline_args, func_args @@ -201,7 +206,6 @@ def test_command(fn, cmdline_args): actual = out.decode() actual = cleanup_eol(actual) - assert actual.rstrip() == expected.rstrip()