python-trio · ntninja · Dec 18, 2020 · Dec 18, 2020 · Dec 18, 2020 · Sep 6, 2020
diff --git a/.coveragerc b/.coveragerc
@@ -2,8 +2,12 @@
 branch=True
 source=unasync
 
+[paths]
+source = src/unasync
+
 [report]
 precision = 1
 exclude_lines =
   pragma: no cover
   abc.abstractmethod
+  \# PY2
diff --git a/.coveragerc-py2 b/.coveragerc-py2
@@ -0,0 +1,13 @@
+[run]
+branch=True
+source=unasync
+
+[paths]
+source = src/unasync
+
+[report]
+precision = 1
+exclude_lines =
+  pragma: no cover
+  abc.abstractmethod
+  \# PY3
diff --git a/ci/travis.sh b/ci/travis.sh
@@ -51,6 +51,16 @@ if [ "$USE_PYPY_RELEASE_VERSION" != "" ]; then
     source testenv/bin/activate
 fi
 
+case "${MACPYTHON:-${TRAVIS_PYTHON_VERSION:-}}" in
+	2*)
+		COVERAGE_FILE=.coveragerc-py2
+	;;
+
+	*)
+		COVERAGE_FILE=.coveragerc
+	;;
+esac
+
 pip install -U pip setuptools wheel
 
 if [ "$CHECK_FORMATTING" = "1" ]; then
@@ -91,7 +101,7 @@ else
     mkdir empty
     cd empty
 
-    pytest -ra -v --cov=unasync --cov-config=../.coveragerc --verbose ../tests
+    pytest -ra -v --cov=unasync --cov-config="../${COVERAGE_FILE}" --verbose ../tests
 
     bash <(curl -s https://codecov.io/bash)
 fi
diff --git a/src/unasync/__init__.py b/src/unasync/__init__.py
@@ -1,9 +1,11 @@
+# -*- encoding: utf8 -*-
 """Top-level package for unasync."""
 
 from __future__ import print_function
 
 import collections
 import errno
+import io
 import os
 import sys
 import tokenize as std_tokenize
@@ -34,13 +36,34 @@
     "StopAsyncIteration": "StopIteration",
 }
 
+_TYPE_COMMENT_PREFIX = "# type: "
+
+
+if sys.version_info[0] == 2:  # PY2
+
+    def isidentifier(s):
+        return all([c.isalnum() or c == "_" for c in s])
+
+    StringIO = io.BytesIO
+else:  # PY3
+
+    def isidentifier(s):
+        return s.isidentifier()
+
+    StringIO = io.StringIO
+
+if hasattr(os, "fspath"):  # PY3
+    fspath = os.fspath
+else:  # PY2
+    fspath = str
+
 
 class Rule:
     """A single set of rules for 'unasync'ing file(s)"""
 
     def __init__(self, fromdir, todir, additional_replacements=None):
-        self.fromdir = fromdir.replace("/", os.sep)
-        self.todir = todir.replace("/", os.sep)
+        self.fromdir = fspath(fromdir).replace("/", os.sep)
+        self.todir = fspath(todir).replace("/", os.sep)
 
         # Add any additional user-defined token replacements to our list.
         self.token_replacements = _ASYNC_TO_SYNC.copy()
@@ -51,6 +74,8 @@ def _match(self, filepath):
         """Determines if a Rule matches a given filepath and if so
         returns a higher comparable value if the match is more specific.
         """
+        filepath = fspath(filepath)
+
         file_segments = [x for x in filepath.split(os.sep) if x]
         from_segments = [x for x in self.fromdir.split(os.sep) if x]
         len_from_segments = len(from_segments)
@@ -65,9 +90,10 @@ def _match(self, filepath):
         return False
 
     def _unasync_file(self, filepath):
+        filepath = fspath(filepath)
         with open(filepath, "rb") as f:
             write_kwargs = {}
-            if sys.version_info[0] >= 3:
+            if sys.version_info[0] >= 3:  # PY3  # pragma: no branch
                 encoding, _ = std_tokenize.detect_encoding(f.readline)
                 write_kwargs["encoding"] = encoding
                 f.seek(0)
@@ -82,7 +108,57 @@ def _unasync_file(self, filepath):
     def _unasync_tokens(self, tokens):
         # TODO __await__, ...?
         used_space = None
+        context = None  # Can be `None`, `"func_decl"`, `"func_name"`, `"arg_list"`, `"arg_list_end"`, `"return_type"`
+        brace_depth = 0
+        typing_ctx = False
+
         for space, toknum, tokval in tokens:
+            # Update context state tracker
+            if context is None and toknum == std_tokenize.NAME and tokval == "def":
+                context = "func_decl"
+            elif context == "func_decl" and toknum == std_tokenize.NAME:
+                context = "func_name"
+            elif context == "func_name" and toknum == std_tokenize.OP and tokval == "(":
+                context = "arg_list"
+            elif context == "arg_list":
+                if toknum == std_tokenize.OP and tokval in ("(", "["):
+                    brace_depth += 1
+                elif (
+                    toknum == std_tokenize.OP
+                    and tokval in (")", "]")
+                    and brace_depth >= 1
+                ):
+                    brace_depth -= 1
+                elif toknum == std_tokenize.OP and tokval == ")":
+                    context = "arg_list_end"
+                elif toknum == std_tokenize.OP and tokval == ":" and brace_depth < 1:
+                    typing_ctx = True
+                elif toknum == std_tokenize.OP and tokval == "," and brace_depth < 1:
+                    typing_ctx = False
+            elif (
+                context == "arg_list_end"
+                and toknum == std_tokenize.OP
+                and tokval == "->"
+            ):
+                context = "return_type"
+                typing_ctx = True
+            elif context == "return_type":
+                if toknum == std_tokenize.OP and tokval in ("(", "["):
+                    brace_depth += 1
+                elif (
+                    toknum == std_tokenize.OP
+                    and tokval in (")", "]")
+                    and brace_depth >= 1
+                ):
+                    brace_depth -= 1
+                elif toknum == std_tokenize.OP and tokval == ":":
+                    context = None
+                    typing_ctx = False
+            else:  # Something unexpected happend - reset state
+                context = None
+                brace_depth = 0
+                typing_ctx = False
+
             if tokval in ["async", "await"]:
                 # When removing async or await, we want to use the whitespace that
                 # was before async/await before the next token so that
@@ -93,8 +169,59 @@ def _unasync_tokens(self, tokens):
                 if toknum == std_tokenize.NAME:
                     tokval = self._unasync_name(tokval)
                 elif toknum == std_tokenize.STRING:
-                    left_quote, name, right_quote = tokval[0], tokval[1:-1], tokval[-1]
-                    tokval = left_quote + self._unasync_name(name) + right_quote
+                    # Strings in typing context are forward-references and should be unasyncified
+                    quote = ""
+                    prefix = ""
+                    while ord(tokval[0]) in range(ord("a"), ord("z") + 1):
+                        prefix += tokval[0]
+                        tokval = tokval[1:]
+
+                    if tokval.startswith('"""') and tokval.endswith('"""'):
+                        quote = '"""'  # Broken syntax highlighters workaround: """
+                    elif tokval.startswith("'''") and tokval.endswith("'''"):
+                        quote = "'''"  # Broken syntax highlighters wokraround: '''
+                    elif tokval.startswith('"') and tokval.endswith('"'):
+                        quote = '"'
+                    elif tokval.startswith(  # pragma: no branch
+                        "'"
+                    ) and tokval.endswith("'"):
+                        quote = "'"
+                    assert (
+                        len(quote) > 0
+                    ), "Quoting style of string {0!r} unknown".format(tokval)
+                    stringval = tokval[len(quote) : -len(quote)]
+                    if typing_ctx:
+                        stringval = _untokenize(
+                            self._unasync_tokens(_tokenize(StringIO(stringval)))
+                        )
+                    else:
+                        stringval = self._unasync_name(stringval)
+                    tokval = prefix + quote + stringval + quote
+                elif toknum == std_tokenize.COMMENT and tokval.startswith(
+                    _TYPE_COMMENT_PREFIX
+                ):
+                    type_decl, suffix = tokval[len(_TYPE_COMMENT_PREFIX) :], ""
+                    if "#" in type_decl:
+                        type_decl, suffix = type_decl.split("#", 1)
+                        suffix = "#" + suffix
+                    type_decl_stripped = type_decl.strip()
+
+                    # Do not process `type: ignore` or `type: ignore[…]` as these aren't actual identifiers
+                    is_type_ignore = type_decl_stripped == "ignore"
+                    is_type_ignore |= type_decl_stripped.startswith(
+                        "ignore"
+                    ) and not isidentifier(type_decl_stripped[0:7])
+                    if not is_type_ignore:
+                        # Preserve trailing whitespace since the tokenizer won't
+                        trailing_space_len = len(type_decl) - len(type_decl.rstrip())
+                        if trailing_space_len > 0:
+                            suffix = type_decl[-trailing_space_len:] + suffix
+                            type_decl = type_decl[:-trailing_space_len]
+                        type_decl = _untokenize(
+                            self._unasync_tokens(_tokenize(StringIO(type_decl)))
+                        )
+
+                    tokval = _TYPE_COMMENT_PREFIX + type_decl + suffix
                 if used_space is None:
                     used_space = space
                 yield (used_space, tokval)
@@ -128,12 +255,16 @@ def unasync_files(fpath_list, rules):
 
 
 def _get_tokens(f):
-    if sys.version_info[0] == 2:
+    if sys.version_info[0] == 2:  # PY2
         for tok in std_tokenize.generate_tokens(f.readline):
             type_, string, start, end, line = tok
             yield Token(type_, string, start, end, line)
-    else:
-        for tok in std_tokenize.tokenize(f.readline):
+    else:  # PY3
+        if isinstance(f, io.TextIOBase):
+            gen = std_tokenize.generate_tokens(f.readline)
+        else:
+            gen = std_tokenize.tokenize(f.readline)
+        for tok in gen:
             if tok.type == std_tokenize.ENCODING:
                 continue
             yield tok
@@ -143,13 +274,16 @@ def _tokenize(f):
     last_end = (1, 0)
     for tok in _get_tokens(f):
         if last_end[0] < tok.start[0]:
-            yield ("", std_tokenize.STRING, " \\\n")
+            # Somehow Python 3.5 and below produce the ENDMARKER in a way that
+            # causes superfluous continuation lines to be generated
+            if tok.type != std_tokenize.ENDMARKER:
+                yield (" ", std_tokenize.NEWLINE, "\\\n")
             last_end = (tok.start[0], 0)
 
         space = ""
         if tok.start > last_end:
             assert tok.start[0] == last_end[0]
-            space = " " * (tok.start[1] - last_end[1])
+            space = tok.line[last_end[1] : tok.start[1]]
         yield (space, tok.type, tok.string)
 
         last_end = tok.end

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,2 +1,3 @@
 pytest>=4.3.0
-pytest-cov
+pytest-cov
+pathlib2 ; python_version < '3.5'
diff --git a/tests/data/async/tabs.py b/tests/data/async/tabs.py
@@ -0,0 +1,8 @@
+# fmt: off
+async def dummy():
+	await dummy2()  # This line is indented with a tab that should be preserved
+# fmt: on
+
+
+async def dummy2():
+    await dummy()  # This one uses 4 spaces and these should also be preserved
diff --git a/tests/data/async/typing.py b/tests/data/async/typing.py
@@ -3,3 +3,26 @@
 typing.AsyncIterable[bytes]
 typing.AsyncIterator[bytes]
 typing.AsyncGenerator[bytes]
+
+# A typed function that takes the first item of an (a)sync iterator and returns it
+async def func1(a: typing.AsyncIterable[int]) -> str:
+    it: typing.AsyncIterator[int] = a.__aiter__()
+    b: int = await it.__anext__()
+    return str(b)
+
+
+# Same as the above but using old-style typings (mainly for Python 2.7 – 3.5 compatibility)
+async def func2(a):  # type: (typing.AsyncIterable[int]) -> str
+    it = a.__aiter__()  # type: typing.AsyncIterator[int]
+    b = await it.__anext__()  # type: int
+    return str(b)
+
+
+# And some funky edge cases to at least cover the relevant at all in this test
+a: int = 5
+b: str = a  # type: ignore  # This is the actual comment and the type declaration silences the warning that would otherwise happen
+c: str = a  # type: ignore2  # This is the actual comment and the declaration declares another type, both of which are wrong
+
+# fmt: off
+# And some genuine trailing whitespace (uww…)
+z = a  # type: int   
diff --git a/tests/data/async/typing_py3.py b/tests/data/async/typing_py3.py
@@ -0,0 +1,13 @@
+# fmt: off
+# A forward-reference typed function that returns an iterator for an (a)sync iterable
+async def aiter1(a: "typing.AsyncIterable[int]") -> 'typing.AsyncIterable[int]':
+	return a.__aiter__()
+
+# Same as the above but using tripple-quoted strings
+async def aiter2(a: """typing.AsyncIterable[int]""") -> r'''typing.AsyncIterable[int]''':
+	return a.__aiter__()
+
+# Same as the above but without forward-references
+async def aiter3(a: typing.AsyncIterable[int]) -> typing.AsyncIterable[int]:
+	return a.__aiter__()
+# fmt: on
diff --git a/tests/data/sync/tabs.py b/tests/data/sync/tabs.py
@@ -0,0 +1,8 @@
+# fmt: off
+def dummy():
+	dummy2()  # This line is indented with a tab that should be preserved
+# fmt: on
+
+
+def dummy2():
+    dummy()  # This one uses 4 spaces and these should also be preserved
diff --git a/tests/data/sync/typing.py b/tests/data/sync/typing.py
@@ -3,3 +3,26 @@
 typing.Iterable[bytes]
 typing.Iterator[bytes]
 typing.Generator[bytes]
+
+# A typed function that takes the first item of an (a)sync iterator and returns it
+def func1(a: typing.Iterable[int]) -> str:
+    it: typing.Iterator[int] = a.__iter__()
+    b: int = it.__next__()
+    return str(b)
+
+
+# Same as the above but using old-style typings (mainly for Python 2.7 – 3.5 compatibility)
+def func2(a):  # type: (typing.Iterable[int]) -> str
+    it = a.__iter__()  # type: typing.Iterator[int]
+    b = it.__next__()  # type: int
+    return str(b)
+
+
+# And some funky edge cases to at least cover the relevant at all in this test
+a: int = 5
+b: str = a  # type: ignore  # This is the actual comment and the type declaration silences the warning that would otherwise happen
+c: str = a  # type: ignore2  # This is the actual comment and the declaration declares another type, both of which are wrong
+
+# fmt: off
+# And some genuine trailing whitespace (uww…)
+z = a  # type: int   
diff --git a/tests/data/sync/typing_py3.py b/tests/data/sync/typing_py3.py
@@ -0,0 +1,13 @@
+# fmt: off
+# A forward-reference typed function that returns an iterator for an (a)sync iterable
+def aiter1(a: "typing.Iterable[int]") -> 'typing.Iterable[int]':
+	return a.__iter__()
+
+# Same as the above but using tripple-quoted strings
+def aiter2(a: """typing.Iterable[int]""") -> r'''typing.Iterable[int]''':
+	return a.__iter__()
+
+# Same as the above but without forward-references
+def aiter3(a: typing.Iterable[int]) -> typing.Iterable[int]:
+	return a.__iter__()
+# fmt: on