diff --git a/README.md b/README.md index 7a30882..aa2b710 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,19 @@ -# doxygentoasciidoc: A Doxygen > Asciidoc Converter +# doxygentoasciidoc: A Doxygen to AsciiDoc Converter -This project converts doxygen XML output to asciidoc. - -Allowed args: - -`-f`: the full path to the file to be converted +``` +usage: doxygentoasciidoc [-h] [-o OUTPUT] [-c] file -`-o`: the full path the the output file (will print to STDOUT if not specified) +Convert Doxygen XML to AsciiDoc -`-c`: process a node other than `doxygenindex` +positional arguments: + file The path of the Doxygen XML file to convert -The following attributes from the XML will be preserved in the generated asciidoc: role, tag, type. +optional arguments: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Write to file instead of stdout + -c, --child Is NOT the root index file +``` ## Development diff --git a/cli.py b/cli.py index 4ff4a42..48cd21a 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,4 @@ import os -import sys import argparse from bs4 import BeautifulSoup @@ -8,40 +7,42 @@ def main(): """Convert the given Doxygen index.xml to AsciiDoc and output the result.""" - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + prog="doxygentoasciidoc", description="Convert Doxygen XML to AsciiDoc" + ) parser.add_argument( - "-f", "--file", help="The path of the file to convert", default=None + "file", + type=argparse.FileType("r", encoding="utf-8"), + help="The path of the Doxygen XML file to convert", ) parser.add_argument( - "-o", "--output", help="The path of the output file", default=None + "-o", + "--output", + help="Write to file instead of stdout", ) parser.add_argument( "-c", "--child", help="Is NOT the root index file", - default=False, action="store_true", ) + args = parser.parse_args() - filename = args.file - output_filename = args.output - is_child = args.child - if filename: - xmldir = os.path.dirname(filename) - with open(filename, encoding="utf-8") as xml: - if is_child: - result = Node( - BeautifulSoup(xml, "xml").doxygen, xmldir=xmldir - ).to_asciidoc(depth=1) - else: - result = DoxygenindexNode( - BeautifulSoup(xml, "xml").doxygenindex, xmldir=xmldir - ).to_asciidoc(depth=2) - if output_filename is not None: - with open(output_filename, "w", encoding="utf-8") as output: + + with args.file as file: + xmldir = os.path.dirname(file.name) + + if args.child: + result = Node( + BeautifulSoup(file, "xml").doxygen, xmldir=xmldir + ).to_asciidoc(depth=1) + else: + result = DoxygenindexNode( + BeautifulSoup(file, "xml").doxygenindex, xmldir=xmldir + ).to_asciidoc(depth=2) + + if args.output: + with open(args.output, "w", encoding="utf-8") as output: output.write(result) else: print(result) - - else: - sys.exit(1) diff --git a/helpers.py b/helpers.py index e0ca580..e6f8651 100644 --- a/helpers.py +++ b/helpers.py @@ -19,19 +19,41 @@ def sanitize(identifier): def title(text, level, attributes=None): - """Return text formatted as a title with the given level.""" + """Return text formatted as a title with the given level and attributes.""" + if attributes is None: + attributes = {} + + attrlist = [] + + if "id" in attributes: + attrlist.append(f"#{sanitize(attributes.pop('id'))}") + + roles = [] + if level > 5: - if attributes is not None: - if re.search(r"([,\s]role=)", attributes) is not None: - attributes = re.sub( - r"([,\s]role=)(.*?[,\s]?$)", "\\1h6 \\2", attributes - ) - else: - attributes += ",role=h6" - return f"[{attributes}]\n*{escape_text(text)}*" - return f"[.h6]\n*{escape_text(text)}*" - - marker = "=" * (level + 1) - if attributes is not None: - return f"[{attributes}]\n{marker} {escape_text(text)}" - return f"{marker} {escape_text(text)}" + roles.append("h6") + if "role" in attributes: + roles.append(attributes.pop("role")) + + if roles: + attrlist.append(f"role={' '.join(roles)}") + + if "tag" in attributes: + attrlist.append(f"tag={escape_text(attributes.pop('tag'))}") + if "type" in attributes: + attrlist.append(f"type={escape_text(attributes.pop('type'))}") + for key, value in attributes.items(): + attrlist.append(f'{escape_text(key)}="{escape_text(value)}"') + + output = [] + + if attrlist: + output.append(f"[{','.join(attrlist)}]") + + if level > 5: + output.append(f"*{escape_text(text)}*") + else: + marker = "=" * (level + 1) + output.append(f"{marker} {escape_text(text)}") + + return "\n".join(output) diff --git a/nodes.py b/nodes.py index 74f3606..8422962 100644 --- a/nodes.py +++ b/nodes.py @@ -29,7 +29,6 @@ class Node: "sect3", "simplesect", "table", - "title", "verbatim", ) @@ -161,20 +160,13 @@ def children(self, selector=None, **kwargs): for position, child in enumerate(children) ] - def attributes(self, skip=None): - """Return an asciidoc string of any attributes specified, plus the node id.""" - if skip is None: - skip = [] - preserved_attributes = ["role", "tag", "type"] - atts = [] - if self.node.get("id", None) is not None and "id" not in skip: - atts.append(f"#{sanitize(self.node.get('id'))}") - for att in preserved_attributes: - if self.node.get(att) is not None and att not in skip: - atts.append(f"{att}={self.node.get(att)}") - if len(atts) > 0: - return ",".join(atts) - return None + def attributes(self): + """Return the attributes for this node.""" + return { + key: self.node.attrs.get(key) + for key in ("id", "role", "type", "tag") + if key in self.node.attrs + } def descendants(self, selector, **kwargs): """Return a list of descendant Nodes matching the given selector.""" @@ -185,14 +177,6 @@ def descendants(self, selector, **kwargs): ) ] - @staticmethod - def has_page_parent(element): - return any( - parent.get("kind", "") == "page" - for parent in element.parents - if parent.name == "compounddef" - ) - def text(self, selector=None): """Return the stripped text of the given child.""" if not selector: @@ -274,7 +258,6 @@ def nodefor(self, element): "simplesect": SimplesectNode, "sp": SpNode, "table": TableNode, - "title": TitleNode, "type": Node, "ulink": UlinkNode, "verbatim": VerbatimNode, @@ -289,14 +272,16 @@ def to_asciidoc(self, depth=0, **kwargs): output = [] for module in self.rootmodules(): title_ = module.node.text("title") - attributes = [ - f"#{sanitize(module.refid)}", - f'reftext="{escape_text(title_)}"', - ] - if self.attributes(skip=["id"]) is not None: - attributes.append(self.attributes(skip=["id"])) output.append( - title(title_, depth, ",".join(attributes)), + title( + title_, + depth, + attributes={ + **self.attributes(), + "id": module.refid, + "reftext": title_, + }, + ), ) briefdescription = module.node.child("briefdescription").to_asciidoc( **kwargs, depth=depth @@ -433,10 +418,7 @@ def to_asciidoc(self, depth=0, **kwargs): def __output_title(self, depth=0): title_ = self.text("title") - attributes = ["#" + self.id, f'reftext="{escape_text(title_)}"'] - if self.attributes(skip=["id"]) is not None: - attributes.append(self.attributes(skip=["id"])) - return title(title_, depth, ",".join(attributes)) + return title(title_, depth, attributes={**self.attributes(), "reftext": title_}) def __output_briefdescription(self, **kwargs): return self.child("briefdescription").to_asciidoc(**kwargs) @@ -539,9 +521,8 @@ def to_asciidoc(self, depth=0, **kwargs): def __output_title(self, depth=0): title_ = self.text("title") - attributes = ["#" + self.id] if title_: - return title(title_, depth, ",".join(attributes)) + return title(title_, depth, attributes=self.attributes()) return None def __output_detaileddescription(self, **kwargs): @@ -607,7 +588,7 @@ def to_asciidoc(self, **kwargs): class VerbatimNode(Node): def to_asciidoc(self, **kwargs): kwargs["programlisting"] = True - return "".join(("[source,c]\n----\n", super().to_asciidoc(**kwargs), "----")) + return f"[source,c]\n----\n{super().to_asciidoc(**kwargs)}----" class CodelineNode(Node): @@ -638,7 +619,7 @@ def to_asciidoc(self, **kwargs): class UlinkNode(Node): def to_asciidoc(self, **kwargs): - return "".join((f"{self.node['url']}[", super().to_asciidoc(**kwargs), "]")) + return f"{self.node['url']}[{super().to_asciidoc(**kwargs)}]" class NonbreakablespaceNode(Node): @@ -648,52 +629,57 @@ def to_asciidoc(self, **kwargs): class SectNode(Node): def to_asciidoc(self, depth=0, **kwargs): - attributes = f"[{self.attributes()}]" - return "\n".join((attributes, super().to_asciidoc(**kwargs, depth=depth + 1))) + output = [] + title_ = self.text("title") + if title_: + output.append(title(title_, depth + 1, attributes=self.attributes())) -class TitleNode(Node): - def to_asciidoc(self, depth=0, **kwargs): - return title(super().to_asciidoc(**kwargs), depth) + for child in self.children(["para", "sect2", "sect3"]): + output.append(child.to_asciidoc(**kwargs, depth=depth + 1)) + + return "\n\n".join(output) class SimplesectNode(Node): def to_asciidoc(self, **kwargs): previous_node = self.previous_node() next_node = self.next_node() - if self.node.get("kind", "") == "see": + kind = self.node.get("kind") + + if kind == "see": output = [] if not ( previous_node and previous_node.name == "simplesect" - and previous_node.get("kind", "") == "see" + and previous_node.get("kind") == "see" ): output.append("--\n*See also*\n\n") output.append(super().to_asciidoc(**kwargs)) if not ( next_node and next_node.name == "simplesect" - and next_node.get("kind", "") == "see" + and next_node.get("kind") == "see" ): output.append("\n--") return "".join(output) - if self.node.get("kind", "") == "return": - return "".join(("--\n*Returns*\n\n", super().to_asciidoc(**kwargs), "\n--")) + if kind == "return": + return f"--\n*Returns*\n\n{super().to_asciidoc(**kwargs)}\n--" - if self.node.get("kind", "") == "note": + if kind == "note": output = [] if not ( previous_node and previous_node.name == "simplesect" - and previous_node.get("kind", "") == "note" + and previous_node.get("kind") == "note" ): output.append("[NOTE]\n====\n") output.append(super().to_asciidoc(**kwargs)) if not ( next_node and next_node.name == "simplesect" - and next_node.get("kind", "") == "note" + and next_node.get("kind") == "note" ): output.append("\n====") @@ -747,12 +733,12 @@ def refid(self): class EmphasisNode(Node): def to_asciidoc(self, **kwargs): - return "".join(("_", super().to_asciidoc(**kwargs), "_")) + return f"_{super().to_asciidoc(**kwargs)}_" class BoldNode(Node): def to_asciidoc(self, **kwargs): - return "".join(("*", super().to_asciidoc(**kwargs), "*")) + return f"*{super().to_asciidoc(**kwargs)}*" class CopyrightNode(Node): @@ -762,7 +748,7 @@ def to_asciidoc(self, **kwargs): class ComputeroutputNode(Node): def to_asciidoc(self, **kwargs): - return "".join(("`", super().to_asciidoc(**kwargs).rstrip(), "`")) + return f"`{super().to_asciidoc(**kwargs)}`" class ItemizedlistNode(Node): @@ -797,7 +783,7 @@ def block_separator(self, **kwargs): class TableNode(Node): def to_asciidoc(self, **kwargs): - return "".join(("|===\n", super().to_asciidoc(**kwargs), "\n|===")) + return f"|===\n{super().to_asciidoc(**kwargs)}\n|===" class RowNode(Node): @@ -809,7 +795,7 @@ def block_separator(self, **_kwargs): class EntryNode(Node): def to_asciidoc(self, **kwargs): - return "".join(("|", super().to_asciidoc(**kwargs))) + return f"|{super().to_asciidoc(**kwargs)}" class DetaileddescriptionNode(Node): @@ -822,7 +808,7 @@ def to_asciidoc(self, depth=0, **kwargs): title( "Detailed Description", depth, - self.attributes(), + attributes=self.attributes(), ) ) output.append(contents) @@ -832,7 +818,7 @@ def to_asciidoc(self, depth=0, **kwargs): class FunctionMemberdefNode(Node): def to_asciidoc(self, depth=0, **kwargs): - output = [title(self.text("name"), depth, self.attributes())] + output = [title(self.text("name"), depth, attributes=self.attributes())] if self.node["static"] == "yes": definition = ["[.memname]`static "] else: @@ -858,8 +844,6 @@ def to_asciidoc(self, depth=0, **kwargs): if suffix: definition.append(" ") definition.append(", ".join(suffix)) - # make sure there are no trailing spaces - definition[-1] = definition[-1].rstrip() definition.append("`") output.append("".join(definition)) briefdescription = self.child("briefdescription").to_asciidoc( @@ -877,7 +861,7 @@ def to_asciidoc(self, depth=0, **kwargs): class TypedefMemberdefNode(Node): def to_asciidoc(self, depth=0, **kwargs): - output = [title(self.text("name"), depth, self.attributes())] + output = [title(self.text("name"), depth, attributes=self.attributes())] output.append(f"[.memname]`{escape_text(self.text('definition'))}`") briefdescription = self.child("briefdescription").to_asciidoc( **kwargs, depth=depth @@ -895,7 +879,7 @@ def to_asciidoc(self, depth=0, **kwargs): class EnumMemberdefNode(Node): def to_asciidoc(self, depth=0, **kwargs): name = self.text("name") - output = [title(name or "anonymous enum", depth, self.attributes())] + output = [title(name or "anonymous enum", depth, attributes=self.attributes())] if name: output.append(f"[.memname]`enum {escape_text(name)}`") else: @@ -937,7 +921,7 @@ class VariableMemberdefNode(Node): def to_asciidoc(self, depth=0, **kwargs): name = self.text("name") or self.text("qualifiedname") output = [ - title(name, depth, self.attributes()), + title(name, depth, attributes=self.attributes()), ] definition = self.text("definition") if self.text("initializer"): @@ -969,7 +953,7 @@ def to_asciidoc(self, depth=0, **kwargs): class DefineMemberdefNode(Node): def to_asciidoc(self, depth=0, **kwargs): - output = [title(self.text("name"), depth, self.attributes())] + output = [title(self.text("name"), depth, attributes=self.attributes())] name = self.text("name") params = [param.text() for param in self.children("param")] if params: diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 968d002..ab14712 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -58,11 +58,11 @@ def test_title_with_level_5(): def test_title_with_level_6(): - assert title("Level 6 Section Title", 6) == "[.h6]\n*Level 6 Section Title*" + assert title("Level 6 Section Title", 6) == "[role=h6]\n*Level 6 Section Title*" def test_title_with_level_7(): - assert title("Level 7 Section Title", 7) == "[.h6]\n*Level 7 Section Title*" + assert title("Level 7 Section Title", 7) == "[role=h6]\n*Level 7 Section Title*" def test_title_escapes_text(): @@ -70,4 +70,90 @@ def test_title_escapes_text(): def test_title_escapes_text_with_level_6(): - assert title("3 * 2 = 6", 6) == "[.h6]\n*3 ++*++ 2 = 6*" + assert title("3 * 2 = 6", 6) == "[role=h6]\n*3 ++*++ 2 = 6*" + + +def test_title_with_id(): + assert ( + title("Title", 1, attributes={"id": "group__foo"}) == "[#group_foo]\n== Title" + ) + + +def test_title_with_role(): + assert ( + title("Title", 1, attributes={"role": "contextspecific"}) + == "[role=contextspecific]\n== Title" + ) + + +def test_title_with_level_6_and_role(): + assert ( + title("Title", 6, attributes={"role": "contextspecific"}) + == "[role=h6 contextspecific]\n*Title*" + ) + + +def test_title_with_tag_and_type(): + assert ( + title("Title", 1, attributes={"type": "TYPE", "tag": "TAG"}) + == "[tag=TAG,type=TYPE]\n== Title" + ) + + +def test_title_with_all_attributes(): + assert ( + title( + "Title", + 1, + attributes={ + "id": "group__foo", + "role": "contextspecific", + "tag": "TAG", + "type": "TYPE", + }, + ) + == "[#group_foo,role=contextspecific,tag=TAG,type=TYPE]\n== Title" + ) + + +def test_title_with_level_6_and_all_attributes(): + assert ( + title( + "Title", + 6, + attributes={ + "id": "group__foo", + "role": "contextspecific", + "tag": "TAG", + "type": "TYPE", + }, + ) + == "[#group_foo,role=h6 contextspecific,tag=TAG,type=TYPE]\n*Title*" + ) + + +def test_title_with_arbitrary_attributes(): + assert ( + title( + "Title", + 1, + attributes={ + "foo": "bar", + "baz": "quux", + }, + ) + == '[foo="bar",baz="quux"]\n== Title' + ) + + +def test_title_with_attribute_with_space(): + assert ( + title( + "Title", + 1, + attributes={ + "foo": "bar baz", + }, + ) + == '[foo="bar baz"]\n== Title' + ) diff --git a/tests/test_node.py b/tests/test_node.py index 1a01ceb..e56bccb 100644 --- a/tests/test_node.py +++ b/tests/test_node.py @@ -277,7 +277,7 @@ def test_id_returns_sanitized_id(): assert node.id == "group_hardware_base" -def test_attributes_are_preserved(): +def test_attributes_returns_allowed_attributes(): xml = """ void void foo @@ -294,30 +294,12 @@ def test_attributes_are_preserved(): """ node = Node(BeautifulSoup(xml, "xml").memberdef) - assert ( - node.attributes() - == "#group_hardware_gpio_1ga5d7dbadb2233e2e6627e9101411beb27,role=contextspecific,tag=TAG,type=TYPE" - ) - - -def test_attributes_are_preserved_except_skipped(): - xml = """ - void - void foo - () - foo - - A function. - - - - - - - """ - node = Node(BeautifulSoup(xml, "xml").memberdef) - - assert node.attributes(skip=["id"]) == "role=contextspecific,tag=TAG,type=TYPE" + assert node.attributes() == { + "id": "group__hardware__gpio_1ga5d7dbadb2233e2e6627e9101411beb27", + "role": "contextspecific", + "tag": "TAG", + "type": "TYPE", + } def test_nodes_are_subscriptable(): diff --git a/tests/test_title_node.py b/tests/test_title_node.py deleted file mode 100644 index 66ef78e..0000000 --- a/tests/test_title_node.py +++ /dev/null @@ -1,20 +0,0 @@ -from bs4 import BeautifulSoup -from doxygentoasciidoc.nodes import TitleNode - - -def test_title_node(tmp_path): - xml = """Interrupt Numbers""" - - asciidoc = TitleNode(BeautifulSoup(xml, "xml").title, xmldir=tmp_path).to_asciidoc() - - assert asciidoc == "= Interrupt Numbers" - - -def test_title_node_with_depth(tmp_path): - xml = """Interrupt Numbers""" - - asciidoc = TitleNode(BeautifulSoup(xml, "xml").title, xmldir=tmp_path).to_asciidoc( - depth=2 - ) - - assert asciidoc == "=== Interrupt Numbers"