From bfc49a4bed56bd521d996e2ea5398950908f8604 Mon Sep 17 00:00:00 2001 From: Danilo Horta Date: Thu, 16 May 2024 21:46:14 +0100 Subject: [PATCH] Remove tabulate dependency and optimize table formatting --- snap/deciphon_snap/match.py | 2 +- snap/deciphon_snap/tabulate.py | 58 ++++++++++++++++++++++++++++++++++ snap/deciphon_snap/view.py | 12 ++----- snap/pyproject.toml | 3 +- snap/tests/test_alignments.py | 4 +-- 5 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 snap/deciphon_snap/tabulate.py diff --git a/snap/deciphon_snap/match.py b/snap/deciphon_snap/match.py index 4b4edbf..cceccd3 100644 --- a/snap/deciphon_snap/match.py +++ b/snap/deciphon_snap/match.py @@ -20,7 +20,7 @@ class MatchElemName(Enum): AMINO = 4 -@dataclass(slots=True, frozen=True) +@dataclass(slots=True, frozen=True, match_args=False) class Match: raw: str start: int diff --git a/snap/deciphon_snap/tabulate.py b/snap/deciphon_snap/tabulate.py new file mode 100644 index 0000000..fe3f491 --- /dev/null +++ b/snap/deciphon_snap/tabulate.py @@ -0,0 +1,58 @@ +from itertools import zip_longest + +__all__ = ["tabulate"] + + +def tabulate(tabular_data, aligns): + cols = list(zip_longest(*tabular_data)) + cols = [["" if v is None else str(v) for v in c] for c in cols] + cols = [_align_column(c, a) for c, a in zip(cols, aligns)] + rows = list(zip(*cols)) + return _format_table(rows) + + +def _padleft(width, s): + fmt = "{0:>%ds}" % width + return fmt.format(s) + + +def _padright(width, s): + fmt = "{0:<%ds}" % width + return fmt.format(s) + + +def _align_column_choose_padfn(strings, alignment): + strings = [s.strip() for s in strings] + padfn = _padleft if alignment == "right" else _padright + return strings, padfn + + +def _align_column(strings, alignment): + """[string] -> [padded_string]""" + strings, padfn = _align_column_choose_padfn(strings, alignment) + + s_widths = list(map(len, strings)) + maxwidth = max(s_widths) + return [padfn(maxwidth, s) for s in strings] + + +def _build_row(padded_cells): + "Return a string which represents a row of data cells." + return " ".join(padded_cells).rstrip() + + +def _append_basic_row(lines, padded_cells): + lines.append(_build_row(padded_cells)) + return lines + + +def _format_table(rows): + """Produce a plain-text representation of the table.""" + lines = [] + + padded_rows = [[cell for cell in row] for row in rows] + + for row in padded_rows: + _append_basic_row(lines, row) + + return "\n".join(lines) diff --git a/snap/deciphon_snap/view.py b/snap/deciphon_snap/view.py index d86e5c4..8bd3ea5 100644 --- a/snap/deciphon_snap/view.py +++ b/snap/deciphon_snap/view.py @@ -5,11 +5,11 @@ from hmmer_tables.query import DomAnnot, read_query from pydantic import BaseModel -from tabulate import simple_separated_format, tabulate from deciphon_snap.match import Match, MatchList from deciphon_snap.prod import H3Result, Prod from deciphon_snap.snap_file import SnapFile +from deciphon_snap.tabulate import tabulate __all__ = ["view_alignments"] @@ -197,16 +197,10 @@ def grab_query(x, i): [None, None, "".join(score[sl]) + pad, "PP"], ] table += row + [[None, None, None, None]] - tablefmt = simple_separated_format(" ") - txt += tabulate( - table, tablefmt=tablefmt, colalign=("right", "right", "left", "left") - ) + txt += tabulate(table, ["right", "right", "left", "left"]) txt = txt.replace("&", "") + "\n" return txt def view_alignments(snap: SnapFile): - txt = [] - for prod in snap.products: - txt.append(view_alignment(prod)) - return "\n".join(txt) + return (view_alignment(prod) for prod in snap.products) diff --git a/snap/pyproject.toml b/snap/pyproject.toml index 17c6e2a..595d6b0 100644 --- a/snap/pyproject.toml +++ b/snap/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "deciphon-snap" -version = "0.11.3" +version = "0.12.0" description = "Reader for Deciphon snap files." authors = ["Danilo Horta "] license = "MIT" @@ -11,7 +11,6 @@ packages = [{ include = "deciphon_snap" }] python = "^3.9" fsspec = ">=2024.5.0" h3result = "^0.3" -tabulate = "^0.9" fasta-reader = "^3.0" deciphon-intervals = "^0.1" prettytable = "^3.10" diff --git a/snap/tests/test_alignments.py b/snap/tests/test_alignments.py index 0b3e8ef..43ec65c 100644 --- a/snap/tests/test_alignments.py +++ b/snap/tests/test_alignments.py @@ -3,7 +3,7 @@ from deciphon_snap.read_snap import read_snap from deciphon_snap.view import view_alignments + def test_alignments(files_path: Path): snap_file = read_snap(files_path / "consensus.dcs") - txt = view_alignments(snap_file) - print(txt) + print("\n".join(view_alignments(snap_file)))