Skip to content

Commit

Permalink
Support sup and sub html tags (#408)
Browse files Browse the repository at this point in the history
Co-authored-by: cowboysync <[email protected]>
  • Loading branch information
Alir3z4 and cowboysync authored Feb 2, 2024
1 parent e375689 commit 42278c6
Show file tree
Hide file tree
Showing 11 changed files with 37 additions and 3 deletions.
1 change: 1 addition & 0 deletions ChangeLog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ UNRELEASED
* Support for Python 3.10.
* Fix #320 padding empty tables and tables with no </tr> tags.
* Add ``ignore_mailto_links`` config option to ignore ``mailto:`` style links.
* Feature #407: Support the superscript and subscript tags.



Expand Down
1 change: 1 addition & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Command line options
| `--single-line-break` | Use a single line break after a block element rather than two.
| `--reference-links` | Use reference links instead of inline links to create markdown
| `--ignore-emphasis` | Ignore all emphasis formatting in the html.
| `--include-sup-sub` | Include `<sub>` and `<sub>` tags.
| `-e`, `--asterisk-emphasis` | Use asterisk rather than underscore to emphasize text
| `--unicode-snob` | Use unicode throughout instead of ASCII
| `--no-automatic-links` | Do not use automatic links like <https://www.google.com/>
Expand Down
7 changes: 7 additions & 0 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(
self.tag_callback = None
self.open_quote = config.OPEN_QUOTE # covered in cli
self.close_quote = config.CLOSE_QUOTE # covered in cli
self.include_sup_sub = config.INCLUDE_SUP_SUB # covered in cli

if out is None:
self.out = self.outtextf
Expand Down Expand Up @@ -716,6 +717,12 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.out("\n[/code]")
self.p()

if tag in ["sup", "sub"] and self.include_sup_sub:
if start:
self.o("<{}>".format(tag))
else:
self.o("</{}>".format(tag))

# TODO: Add docstring for these one letter functions
def pbr(self) -> None:
"Pretty print has a line break"
Expand Down
3 changes: 1 addition & 2 deletions html2text/_typing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
class OutCallback:
def __call__(self, s: str) -> None:
...
def __call__(self, s: str) -> None: ...
8 changes: 8 additions & 0 deletions html2text/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,13 @@ class bcolors:
)
p.add_argument("filename", nargs="?")
p.add_argument("encoding", nargs="?", default="utf-8")
p.add_argument(
"--include-sup-sub",
dest="include_sup_sub",
action="store_true",
default=config.INCLUDE_SUP_SUB,
help="Include the sup and sub tags",
)
args = p.parse_args()

if args.filename and args.filename != "-":
Expand Down Expand Up @@ -318,5 +325,6 @@ class bcolors:
h.default_image_alt = args.default_image_alt
h.open_quote = args.open_quote
h.close_quote = args.close_quote
h.include_sup_sub = args.include_sup_sub

sys.stdout.write(h.handle(html))
3 changes: 3 additions & 0 deletions html2text/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,6 @@
# Use double quotation marks when converting the <q> tag.
OPEN_QUOTE = '"'
CLOSE_QUOTE = '"'

# Include the <sup> and <sub> tags
INCLUDE_SUP_SUB = False
4 changes: 4 additions & 0 deletions test/sub_tag.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<p>
According to the computations by Nakamura, Johnson, and Mason<sub>1</sub> this
will result in the complete annihilation of both particles.
</p>
2 changes: 2 additions & 0 deletions test/sub_tag.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
According to the computations by Nakamura, Johnson, and Mason<sub>1</sub> this
will result in the complete annihilation of both particles.
4 changes: 4 additions & 0 deletions test/sup_tag.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<p>
One of the most common equations in all of physics is <var>E</var>=<var>m</var
><var>c</var><sup>2</sup>.
</p>
1 change: 1 addition & 0 deletions test/sup_tag.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
One of the most common equations in all of physics is E=mc<sup>2</sup>.
6 changes: 5 additions & 1 deletion test/test_html2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ def generate_testdata():
# CLI doesn't support baseurl.
cmdline_args = skip

if base_fn in ["sup_tag.html", "sub_tag.html"]:
module_args["include_sup_sub"] = True
cmdline_args.append("--include-sup-sub")
func_args = skip

yield fn, module_args, cmdline_args, func_args


Expand Down Expand Up @@ -201,7 +206,6 @@ def test_command(fn, cmdline_args):
actual = out.decode()

actual = cleanup_eol(actual)

assert actual.rstrip() == expected.rstrip()


Expand Down

0 comments on commit 42278c6

Please sign in to comment.