diff --git a/.gitignore b/.gitignore index 032c3fff8b..905e4ee5a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .vscode .venv .idea +.lintbin *.env *.zip diff --git a/.lintrunner.toml b/.lintrunner.toml new file mode 100644 index 0000000000..10afb018cb --- /dev/null +++ b/.lintrunner.toml @@ -0,0 +1,310 @@ +merge_base_with = "origin/main" + +[[linter]] +code = 'FLAKE8' +include_patterns = ['**/*.py'] +exclude_patterns = [ + '.git/**', +] +command = [ + 'python3', + 'tools/linter/adapters/flake8_linter.py', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + 'tools/linter/adapters/pip_init.py', + '--dry-run={{DRYRUN}}', + 'flake8==6.0.0', + 'flake8-bugbear==23.3.23', + 'flake8-comprehensions==3.12.0', + 'flake8-executable==2.1.3', + 'flake8-logging-format==0.9.0', + 'flake8-pyi==23.3.1', + 'flake8-simplify==0.19.3', + 'mccabe==0.7.0', + 'pycodestyle==2.10.0', + 'pyflakes==3.0.1', +] + +[[linter]] +code = 'MYPY' +include_patterns = [ + 'tools/**/*.py', + 'tools/**/*.pyi', + 'stats/**/*.py', + 'stats/**/*.pyi', + 'torchci/**/*.py', + 'torchci/**/*.pyi', +] +command = [ + 'python3', + 'tools/linter/adapters/mypy_linter.py', + '--config=mypy.ini', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + 'tools/linter/adapters/pip_init.py', + '--dry-run={{DRYRUN}}', + 'numpy==1.24.3', + 'expecttest==0.1.3', + 'mypy==0.960', + 'types-requests==2.27.25', + 'types-PyYAML==6.0.7', + 'types-tabulate==0.8.8', + 'types-protobuf==3.19.18', + 'types-pkg-resources==0.1.3', + 'types-Jinja2==2.11.9', + 'junitparser==2.1.1', + 'rich==10.9.0', + 'pyyaml==6.0', +] + +[[linter]] +code = 'TYPEIGNORE' +include_patterns = ['**/*.py', '**/*.pyi'] +exclude_patterns = [ +] +command = [ + 'python3', + 'tools/linter/adapters/grep_linter.py', + '--pattern=# type:\s*ignore([^\[]|$)', + '--linter-name=TYPEIGNORE', + '--error-name=unqualified type: ignore', + """--error-description=\ + This line has an unqualified `type: ignore`; \ + please convert it to `type: ignore[xxxx]`\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'NOQA' +include_patterns = ['**/*.py', '**/*.pyi'] +command = [ + 'python3', + 'tools/linter/adapters/grep_linter.py', + '--pattern=# noqa([^:]|$)', + '--linter-name=NOQA', + '--error-name=unqualified noqa', + """--error-description=\ + This line has an unqualified `noqa`; \ + please convert it to `noqa: XXXX`\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'NEWLINE' +include_patterns=['**'] +exclude_patterns=[ + '**/*.bat', + '**/*.expect', + '**/*.ipynb', + '**/*.ps1', + '**/*.ptl', +] +command = [ + 'python3', + 'tools/linter/adapters/newlines_linter.py', + '--', + '@{{PATHSFILE}}', +] +is_formatter = true + + +[[linter]] +code = 'SPACES' +include_patterns = ['**'] +exclude_patterns = [ + '**/*.diff', + '**/*.patch', +] +command = [ + 'python3', + 'tools/linter/adapters/grep_linter.py', + '--pattern=[[:blank:]]$', + '--linter-name=SPACES', + '--error-name=trailing spaces', + '--replace-pattern=s/[[:blank:]]+$//', + """--error-description=\ + This line has trailing spaces; please remove them.\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'TABS' +include_patterns = ['**'] +exclude_patterns = [ + '**/*.svg', + '**/*Makefile', + '**/.gitattributes', + '**/.gitmodules', + '.lintrunner.toml', +] +command = [ + 'python3', + 'tools/linter/adapters/grep_linter.py', + # @lint-ignore TXT2 + '--pattern= ', + '--linter-name=TABS', + '--error-name=saw some tabs', + '--replace-pattern=s/\t/ /', + """--error-description=\ + This line has tabs; please replace them with spaces.\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'PYPIDEP' +include_patterns = ['.github/**'] +exclude_patterns = [ + '**/*.rst', + '**/*.py', + '**/*.md', + '**/*.diff', +] +command = [ + 'python3', + 'tools/linter/adapters/grep_linter.py', + """--pattern=\ + (pip|pip3|python -m pip|python3 -m pip|python3 -mpip|python -mpip) \ + install ([a-zA-Z0-9][A-Za-z0-9\\._\\-]+)([^/=<>~!]+)[A-Za-z0-9\\._\\-\\*\\+\\!]*$\ + """, + '--linter-name=PYPIDEP', + '--error-name=unpinned PyPI install', + """--error-description=\ + This line has unpinned PyPi installs; \ + please pin them to a specific version: e.g. 'thepackage==1.2'\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'EXEC' +include_patterns = ['**'] +exclude_patterns = [ + 'third_party/**', + 'torch/bin/**', + '**/*.so', + '**/*.py', + '**/*.sh', + '**/*.bash', + '**/git-pre-commit', + '**/git-clang-format', + '**/gradlew', +] +command = [ + 'python3', + 'tools/linter/adapters/exec_linter.py', + '--', + '@{{PATHSFILE}}', +] + +[[linter]] +code = 'ACTIONLINT' +include_patterns = [ + '.github/workflows/*.yml', + '.github/workflows/*.yaml', + # actionlint does not support composite actions yet + # '.github/actions/**/*.yml', + # '.github/actions/**/*.yaml', +] +command = [ + 'python3', + 'tools/linter/adapters/actionlint_linter.py', + '--binary=.lintbin/actionlint', + '--', + '@{{PATHSFILE}}', +] +init_command = [ + 'python3', + 'tools/linter/adapters/s3_init.py', + '--config-json=tools/linter/adapters/s3_init_config.json', + '--linter=actionlint', + '--dry-run={{DRYRUN}}', + '--output-dir=.lintbin', + '--output-name=actionlint', +] + +# Black + usort +[[linter]] +code = 'UFMT' +include_patterns = [ + '**/*.py', +] +command = [ + 'python3', + 'tools/linter/adapters/ufmt_linter.py', + '--', + '@{{PATHSFILE}}' +] +exclude_patterns = [ +] +init_command = [ + 'python3', + 'tools/linter/adapters/pip_init.py', + '--dry-run={{DRYRUN}}', + '--no-black-binary', + 'black==23.3.0', + 'ufmt==2.1.0', + 'usort==1.0.6', +] +is_formatter = true + +[[linter]] +code = 'COPYRIGHT' +include_patterns = ['**'] +exclude_patterns = ['.lintrunner.toml'] +command = [ + 'python3', + 'tools/linter/adapters/grep_linter.py', + '--pattern=Confidential and proprietary', + '--linter-name=COPYRIGHT', + '--error-name=Confidential Code', + """--error-description=\ + Proprietary and confidential source code\ + should not be contributed to PyTorch/test-infra codebase\ + """, + '--', + '@{{PATHSFILE}}' +] + +[[linter]] +code = 'LINTRUNNER_VERSION' +include_patterns = ['**'] +command = [ + 'python3', + 'tools/linter/adapters/lintrunner_version_linter.py' +] + +[[linter]] +code = 'RUFF' +include_patterns = ['**/*.py'] +exclude_patterns = [ +] +command = [ + 'python3', + 'tools/linter/adapters/ruff_linter.py', + '--config=pyproject.toml', + '--show-disable', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + 'tools/linter/adapters/pip_init.py', + '--dry-run={{DRYRUN}}', + 'ruff==0.0.269', +] +is_formatter = true diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..6bd29c9920 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[build-system] +requires = [ + "setuptools", + "wheel", + "astunparse", + "numpy", + "ninja", + "pyyaml", + "setuptools", + "cmake", + "typing-extensions", + "requests", +] +# Use legacy backend to import local packages in setup.py +build-backend = "setuptools.build_meta:__legacy__" + + +[tool.black] +# Uncomment if pyproject.toml worked fine to ensure consistency with flake8 +# line-length = 120 +target-version = ["py38", "py39", "py310", "py311"] + + +[tool.ruff] +target-version = "py38" + +# NOTE: Synchoronize the ignores with .flake8 +ignore = [ + # these ignores are from flake8-bugbear; please fix! + "B007", "B008", "B017", + "B018", # Useless expression + "B019", "B020", + "B023", "B024", "B026", + "B028", # No explicit `stacklevel` keyword argument found + "B904", "B905", + "E402", + "C408", # C408 ignored because we like the dict keyword argument syntax + "E501", # E501 is not flexible enough, we're using B950 instead + "E721", + "E731", # Assign lambda expression + "E741", + "EXE001", + "F405", + "F821", + "F841", + # these ignores are from flake8-logging-format; please fix! + "G101", "G201", "G202", + "SIM102", "SIM103", "SIM112", # flake8-simplify code styles + "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason + "SIM108", + "SIM110", + "SIM114", # Combine `if` branches using logical `or` operator + "SIM115", + "SIM116", # Disable Use a dictionary instead of consecutive `if` statements + "SIM117", + "SIM118", +] +line-length = 120 +select = [ + "B", + "C4", + "G", + "E", + "F", + "SIM1", + "W", + # Not included in flake8 + "PLE", + "TRY302", +] diff --git a/tools/linter/adapters/actionlint_linter.py b/tools/linter/adapters/actionlint_linter.py new file mode 100644 index 0000000000..169451ca1c --- /dev/null +++ b/tools/linter/adapters/actionlint_linter.py @@ -0,0 +1,154 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import re +import subprocess +import time +from enum import Enum +from typing import List, NamedTuple, Optional, Pattern + + +LINTER_CODE = "ACTIONLINT" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?P\d+): + \s(?P.*) + \s(?P\[.*\]) + $ + """ +) + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + capture_output=True, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def check_file( + binary: str, + file: str, +) -> List[LintMessage]: + try: + proc = run_command([binary, file]) + except OSError as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + stdout = str(proc.stdout, "utf-8").strip() + return [ + LintMessage( + path=match["file"], + name=match["code"], + description=match["message"], + line=int(match["line"]), + char=int(match["char"]), + code=LINTER_CODE, + severity=LintSeverity.ERROR, + original=None, + replacement=None, + ) + for match in RESULTS_RE.finditer(stdout) + ] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="actionlint runner", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--binary", + required=True, + help="actionlint binary path", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + if not os.path.exists(args.binary): + err_msg = LintMessage( + path="", + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Could not find actionlint binary at {args.binary}," + " you may need to run `lintrunner init`." + ), + ) + print(json.dumps(err_msg._asdict()), flush=True) + exit(0) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit( + check_file, + args.binary, + filename, + ): filename + for filename in args.filenames + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise diff --git a/tools/linter/adapters/exec_linter.py b/tools/linter/adapters/exec_linter.py new file mode 100644 index 0000000000..f00dc60afb --- /dev/null +++ b/tools/linter/adapters/exec_linter.py @@ -0,0 +1,86 @@ +""" +EXEC: Ensure that source files are not executable. +""" +import argparse +import json +import logging +import os +import sys + +from enum import Enum +from typing import NamedTuple, Optional + +LINTER_CODE = "EXEC" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def check_file(filename: str) -> Optional[LintMessage]: + is_executable = os.access(filename, os.X_OK) + if is_executable: + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="executable-permissions", + original=None, + replacement=None, + description="This file has executable permission; please remove it by using `chmod -x`.", + ) + return None + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="exec linter", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--verbose", + action="store_true", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + lint_messages = [] + for filename in args.filenames: + lint_message = check_file(filename) + if lint_message is not None: + lint_messages.append(lint_message) + + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) diff --git a/tools/linter/adapters/flake8_linter.py b/tools/linter/adapters/flake8_linter.py new file mode 100644 index 0000000000..97b57d9c87 --- /dev/null +++ b/tools/linter/adapters/flake8_linter.py @@ -0,0 +1,372 @@ +import argparse +import json +import logging +import os +import re +import subprocess +import sys +import time +from enum import Enum +from typing import Any, Dict, List, NamedTuple, Optional, Pattern, Set + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +# fmt: off +# https://www.flake8rules.com/ +DOCUMENTED_IN_FLAKE8RULES: Set[str] = { + "E101", "E111", "E112", "E113", "E114", "E115", "E116", "E117", + "E121", "E122", "E123", "E124", "E125", "E126", "E127", "E128", "E129", + "E131", "E133", + "E201", "E202", "E203", + "E211", + "E221", "E222", "E223", "E224", "E225", "E226", "E227", "E228", + "E231", + "E241", "E242", + "E251", + "E261", "E262", "E265", "E266", + "E271", "E272", "E273", "E274", "E275", + "E301", "E302", "E303", "E304", "E305", "E306", + "E401", "E402", + "E501", "E502", + "E701", "E702", "E703", "E704", + "E711", "E712", "E713", "E714", + "E721", "E722", + "E731", + "E741", "E742", "E743", + "E901", "E902", "E999", + "W191", + "W291", "W292", "W293", + "W391", + "W503", "W504", + "W601", "W602", "W603", "W604", "W605", + "F401", "F402", "F403", "F404", "F405", + "F811", "F812", + "F821", "F822", "F823", + "F831", + "F841", + "F901", + "C901", +} + +# https://pypi.org/project/flake8-comprehensions/#rules +DOCUMENTED_IN_FLAKE8COMPREHENSIONS: Set[str] = { + "C400", "C401", "C402", "C403", "C404", "C405", "C406", "C407", "C408", "C409", + "C410", + "C411", "C412", "C413", "C413", "C414", "C415", "C416", +} + +# https://github.com/PyCQA/flake8-bugbear#list-of-warnings +DOCUMENTED_IN_BUGBEAR: Set[str] = { + "B001", "B002", "B003", "B004", "B005", "B006", "B007", "B008", "B009", "B010", + "B011", "B012", "B013", "B014", "B015", + "B301", "B302", "B303", "B304", "B305", "B306", + "B901", "B902", "B903", "B950", +} +# fmt: on + + +# stdin:2: W802 undefined name 'foo' +# stdin:3:6: T484 Name 'foo' is not defined +# stdin:3:-100: W605 invalid escape sequence '\/' +# stdin:3:1: E302 expected 2 blank lines, found 1 +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?:(?P-?\d+):)? + \s(?P\S+?):? + \s(?P.*) + $ + """ +) + + +def _test_results_re() -> None: + """ + >>> def t(s): return RESULTS_RE.search(s).groupdict() + + >>> t(r"file.py:80:1: E302 expected 2 blank lines, found 1") + ... # doctest: +NORMALIZE_WHITESPACE + {'file': 'file.py', 'line': '80', 'column': '1', 'code': 'E302', + 'message': 'expected 2 blank lines, found 1'} + + >>> t(r"file.py:7:1: P201: Resource `stdout` is acquired but not always released.") + ... # doctest: +NORMALIZE_WHITESPACE + {'file': 'file.py', 'line': '7', 'column': '1', 'code': 'P201', + 'message': 'Resource `stdout` is acquired but not always released.'} + + >>> t(r"file.py:8:-10: W605 invalid escape sequence '/'") + ... # doctest: +NORMALIZE_WHITESPACE + {'file': 'file.py', 'line': '8', 'column': '-10', 'code': 'W605', + 'message': "invalid escape sequence '/'"} + """ + pass + + +def _run_command( + args: List[str], + *, + extra_env: Optional[Dict[str, str]], +) -> "subprocess.CompletedProcess[str]": + logging.debug( + "$ %s", + " ".join( + ([f"{k}={v}" for (k, v) in extra_env.items()] if extra_env else []) + args + ), + ) + start_time = time.monotonic() + try: + return subprocess.run( + args, + capture_output=True, + check=True, + encoding="utf-8", + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def run_command( + args: List[str], + *, + extra_env: Optional[Dict[str, str]], + retries: int, +) -> "subprocess.CompletedProcess[str]": + remaining_retries = retries + while True: + try: + return _run_command(args, extra_env=extra_env) + except subprocess.CalledProcessError as err: + if remaining_retries == 0 or not re.match( + r"^ERROR:1:1: X000 linting with .+ timed out after \d+ seconds", + err.stdout, + ): + raise err + remaining_retries -= 1 + logging.warning( + "(%s/%s) Retrying because command failed with: %r", + retries - remaining_retries, + retries, + err, + ) + time.sleep(1) + + +def get_issue_severity(code: str) -> LintSeverity: + # "B901": `return x` inside a generator + # "B902": Invalid first argument to a method + # "B903": __slots__ efficiency + # "B950": Line too long + # "C4": Flake8 Comprehensions + # "C9": Cyclomatic complexity + # "E2": PEP8 horizontal whitespace "errors" + # "E3": PEP8 blank line "errors" + # "E5": PEP8 line length "errors" + # "F401": Name imported but unused + # "F403": Star imports used + # "F405": Name possibly from star imports + # "T400": type checking Notes + # "T49": internal type checker errors or unmatched messages + if any( + code.startswith(x) + for x in [ + "B9", + "C4", + "C9", + "E2", + "E3", + "E5", + "F401", + "F403", + "F405", + "T400", + "T49", + ] + ): + return LintSeverity.ADVICE + + # "F821": Undefined name + # "E999": syntax error + if any(code.startswith(x) for x in ["F821", "E999"]): + return LintSeverity.ERROR + + # "F": PyFlakes Error + # "B": flake8-bugbear Error + # "E": PEP8 "Error" + # "W": PEP8 Warning + # possibly other plugins... + return LintSeverity.WARNING + + +def get_issue_documentation_url(code: str) -> str: + if code in DOCUMENTED_IN_FLAKE8RULES: + return f"https://www.flake8rules.com/rules/{code}.html" + + if code in DOCUMENTED_IN_FLAKE8COMPREHENSIONS: + return "https://pypi.org/project/flake8-comprehensions/#rules" + + if code in DOCUMENTED_IN_BUGBEAR: + return "https://github.com/PyCQA/flake8-bugbear#list-of-warnings" + + return "" + + +def check_files( + filenames: List[str], + flake8_plugins_path: Optional[str], + severities: Dict[str, LintSeverity], + retries: int, +) -> List[LintMessage]: + try: + proc = run_command( + [sys.executable, "-mflake8", "--exit-zero"] + filenames, + extra_env={"FLAKE8_PLUGINS_PATH": flake8_plugins_path} + if flake8_plugins_path + else None, + retries=retries, + ) + except (OSError, subprocess.CalledProcessError) as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code="FLAKE8", + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.strip() or "(empty)", + stdout=err.stdout.strip() or "(empty)", + ) + ), + ) + ] + + return [ + LintMessage( + path=match["file"], + name=match["code"], + description="{}\nSee {}".format( + match["message"], + get_issue_documentation_url(match["code"]), + ), + line=int(match["line"]), + char=int(match["column"]) + if match["column"] is not None and not match["column"].startswith("-") + else None, + code="FLAKE8", + severity=severities.get(match["code"]) or get_issue_severity(match["code"]), + original=None, + replacement=None, + ) + for match in RESULTS_RE.finditer(proc.stdout) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Flake8 wrapper linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--flake8-plugins-path", + help="FLAKE8_PLUGINS_PATH env value", + ) + parser.add_argument( + "--severity", + action="append", + help="map code to severity (e.g. `B950:advice`)", + ) + parser.add_argument( + "--retries", + default=3, + type=int, + help="times to retry timed out flake8", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + flake8_plugins_path = ( + None + if args.flake8_plugins_path is None + else os.path.realpath(args.flake8_plugins_path) + ) + + severities: Dict[str, LintSeverity] = {} + if args.severity: + for severity in args.severity: + parts = severity.split(":", 1) + assert len(parts) == 2, f"invalid severity `{severity}`" + severities[parts[0]] = LintSeverity(parts[1]) + + lint_messages = check_files( + args.filenames, flake8_plugins_path, severities, args.retries + ) + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + main() diff --git a/tools/linter/adapters/grep_linter.py b/tools/linter/adapters/grep_linter.py new file mode 100644 index 0000000000..21c8a210b2 --- /dev/null +++ b/tools/linter/adapters/grep_linter.py @@ -0,0 +1,272 @@ +""" +Generic linter that greps for a pattern and optionally suggests replacements. +""" + +import argparse +import json +import logging +import os +import subprocess +import sys +import time +from enum import Enum +from typing import Any, List, NamedTuple, Optional + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def run_command( + args: List[str], +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + capture_output=True, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def lint_file( + matching_line: str, + allowlist_pattern: str, + replace_pattern: str, + linter_name: str, + error_name: str, + error_description: str, +) -> Optional[LintMessage]: + # matching_line looks like: + # tools/linter/clangtidy_linter.py:13:import foo.bar.baz + split = matching_line.split(":") + filename = split[0] + + if allowlist_pattern: + try: + proc = run_command(["grep", "-nEHI", allowlist_pattern, filename]) + except Exception as err: + return LintMessage( + path=None, + line=None, + char=None, + code=linter_name, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + + # allowlist pattern was found, abort lint + if proc.returncode == 0: + return None + + original = None + replacement = None + if replace_pattern: + with open(filename, "r") as f: + original = f.read() + + try: + proc = run_command(["sed", "-r", replace_pattern, filename]) + replacement = proc.stdout.decode("utf-8") + except Exception as err: + return LintMessage( + path=None, + line=None, + char=None, + code=linter_name, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + + return LintMessage( + path=split[0], + line=int(split[1]) if len(split) > 1 else None, + char=None, + code=linter_name, + severity=LintSeverity.ERROR, + name=error_name, + original=original, + replacement=replacement, + description=error_description, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="grep wrapper linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--pattern", + required=True, + help="pattern to grep for", + ) + parser.add_argument( + "--allowlist-pattern", + help="if this pattern is true in the file, we don't grep for pattern", + ) + parser.add_argument( + "--linter-name", + required=True, + help="name of the linter", + ) + parser.add_argument( + "--match-first-only", + action="store_true", + help="only match the first hit in the file", + ) + parser.add_argument( + "--error-name", + required=True, + help="human-readable description of what the error is", + ) + parser.add_argument( + "--error-description", + required=True, + help="message to display when the pattern is found", + ) + parser.add_argument( + "--replace-pattern", + help=( + "the form of a pattern passed to `sed -r`. " + "If specified, this will become proposed replacement text." + ), + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + files_with_matches = [] + if args.match_first_only: + files_with_matches = ["--files-with-matches"] + + try: + proc = run_command( + ["grep", "-nEHI", *files_with_matches, args.pattern, *args.filenames] + ) + except Exception as err: + err_msg = LintMessage( + path=None, + line=None, + char=None, + code=args.linter_name, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + "COMMAND (exit code {returncode})\n" + "{command}\n\n" + "STDERR\n{stderr}\n\n" + "STDOUT\n{stdout}" + ).format( + returncode=err.returncode, + command=" ".join(as_posix(x) for x in err.cmd), + stderr=err.stderr.decode("utf-8").strip() or "(empty)", + stdout=err.stdout.decode("utf-8").strip() or "(empty)", + ) + ), + ) + print(json.dumps(err_msg._asdict()), flush=True) + exit(0) + + lines = proc.stdout.decode().splitlines() + for line in lines: + lint_message = lint_file( + line, + args.allowlist_pattern, + args.replace_pattern, + args.linter_name, + args.error_name, + args.error_description, + ) + if lint_message is not None: + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + main() diff --git a/tools/linter/adapters/lintrunner_version_linter.py b/tools/linter/adapters/lintrunner_version_linter.py new file mode 100644 index 0000000000..dc9828e8d7 --- /dev/null +++ b/tools/linter/adapters/lintrunner_version_linter.py @@ -0,0 +1,79 @@ +import json +import subprocess +from enum import Enum +from typing import NamedTuple, Optional, Tuple + + +LINTER_CODE = "LINTRUNNER_VERSION" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def toVersionString(version_tuple: Tuple[int, int, int]) -> str: + return ".".join(str(x) for x in version_tuple) + + +if __name__ == "__main__": + version_str = ( + subprocess.run(["lintrunner", "-V"], stdout=subprocess.PIPE) + .stdout.decode("utf-8") + .strip() + ) + + import re + + version_match = re.compile(r"lintrunner (\d+)\.(\d+)\.(\d+)").match(version_str) + + if not version_match: + err_msg = LintMessage( + path="", + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description="Lintrunner is not installed, did you forget to run `make setup_lint && make lint`?", + ) + exit(0) + + curr_version = int(version_match[1]), int(version_match[2]), int(version_match[3]) + min_version = (0, 10, 7) + + if curr_version < min_version: + err_msg = LintMessage( + path="", + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ADVICE, + name="command-failed", + original=None, + replacement=None, + description="".join( + ( + f"Lintrunner is out of date (you have v{toVersionString(curr_version)} ", + f"instead of v{toVersionString(min_version)}). ", + "Please run `pip install lintrunner -U` to update it", + ) + ), + ) + print(json.dumps(err_msg._asdict()), flush=True) diff --git a/tools/linter/adapters/mypy_linter.py b/tools/linter/adapters/mypy_linter.py new file mode 100644 index 0000000000..0cd0c62df3 --- /dev/null +++ b/tools/linter/adapters/mypy_linter.py @@ -0,0 +1,196 @@ +import argparse +import json +import logging +import os +import re +import subprocess +import sys +import time +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List, NamedTuple, Optional, Pattern + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +# tools/linter/flake8_linter.py:15:13: error: Incompatibl...int") [assignment] +RESULTS_RE: Pattern[str] = re.compile( + r"""(?mx) + ^ + (?P.*?): + (?P\d+): + (?:(?P-?\d+):)? + \s(?P\S+?):? + \s(?P.*) + \s(?P\[.*\]) + $ + """ +) + + +def run_command( + args: List[str], + *, + extra_env: Optional[Dict[str, str]], + retries: int, +) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run( + args, + capture_output=True, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +# Severity is either "error" or "note": +# https://github.com/python/mypy/blob/8b47a032e1317fb8e3f9a818005a6b63e9bf0311/mypy/errors.py#L46-L47 +severities = { + "error": LintSeverity.ERROR, + "note": LintSeverity.ADVICE, +} + + +def check_files( + filenames: List[str], + config: str, + retries: int, + code: str, +) -> List[LintMessage]: + try: + proc = run_command( + [sys.executable, "-mmypy", f"--config={config}"] + filenames, + extra_env={}, + retries=retries, + ) + except OSError as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=code, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + ] + stdout = str(proc.stdout, "utf-8").strip() + return [ + LintMessage( + path=match["file"], + name=match["code"], + description=match["message"], + line=int(match["line"]), + char=int(match["column"]) + if match["column"] is not None and not match["column"].startswith("-") + else None, + code=code, + severity=severities.get(match["severity"], LintSeverity.ERROR), + original=None, + replacement=None, + ) + for match in RESULTS_RE.finditer(stdout) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="mypy wrapper linter.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--retries", + default=3, + type=int, + help="times to retry timed out mypy", + ) + parser.add_argument( + "--config", + required=True, + help="path to an mypy .ini config file", + ) + parser.add_argument( + "--code", + default="MYPY", + help="the code this lint should report as", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + # Use a dictionary here to preserve order. mypy cares about order, + # tragically, e.g. https://github.com/python/mypy/issues/2015 + filenames: Dict[str, bool] = {} + + # If a stub file exists, have mypy check it instead of the original file, in + # accordance with PEP-484 (see https://www.python.org/dev/peps/pep-0484/#stub-files) + for filename in args.filenames: + if filename.endswith(".pyi"): + filenames[filename] = True + continue + + stub_filename = filename.replace(".py", ".pyi") + if Path(stub_filename).exists(): + filenames[stub_filename] = True + else: + filenames[filename] = True + + lint_messages = check_files(list(filenames), args.config, args.retries, args.code) + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) + + +if __name__ == "__main__": + main() diff --git a/tools/linter/adapters/newlines_linter.py b/tools/linter/adapters/newlines_linter.py new file mode 100644 index 0000000000..a2cb1c5ccd --- /dev/null +++ b/tools/linter/adapters/newlines_linter.py @@ -0,0 +1,163 @@ +""" +NEWLINE: Checks files to make sure there are no trailing newlines. +""" +import argparse +import json +import logging +import sys + +from enum import Enum +from typing import List, NamedTuple, Optional + +NEWLINE = 10 # ASCII "\n" +CARRIAGE_RETURN = 13 # ASCII "\r" +LINTER_CODE = "NEWLINE" + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def check_file(filename: str) -> Optional[LintMessage]: + logging.debug("Checking file %s", filename) + + with open(filename, "rb") as f: + lines = f.readlines() + + if len(lines) == 0: + # File is empty, just leave it alone. + return None + + if len(lines) == 1 and len(lines[0]) == 1: + # file is wrong whether or not the only byte is a newline + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="testestTrailing newline", + original=None, + replacement=None, + description="Trailing newline found. Run `lintrunner --take NEWLINE -a` to apply changes.", + ) + + if len(lines[-1]) == 1 and lines[-1][0] == NEWLINE: + try: + original = b"".join(lines).decode("utf-8") + except Exception as err: + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="Decoding failure", + original=None, + replacement=None, + description=f"utf-8 decoding failed due to {err.__class__.__name__}:\n{err}", + ) + + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="Trailing newline", + original=original, + replacement=original.rstrip("\n") + "\n", + description="Trailing newline found. Run `lintrunner --take NEWLINE -a` to apply changes.", + ) + has_changes = False + original_lines: Optional[List[bytes]] = None + for idx, line in enumerate(lines): + if len(line) >= 2 and line[-1] == NEWLINE and line[-2] == CARRIAGE_RETURN: + if not has_changes: + original_lines = list(lines) + has_changes = True + lines[idx] = line[:-2] + b"\n" + + if has_changes: + try: + assert original_lines is not None + original = b"".join(original_lines).decode("utf-8") + replacement = b"".join(lines).decode("utf-8") + except Exception as err: + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="Decoding failure", + original=None, + replacement=None, + description=f"utf-8 decoding failed due to {err.__class__.__name__}:\n{err}", + ) + return LintMessage( + path=filename, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="DOS newline", + original=original, + replacement=replacement, + description="DOS newline found. Run `lintrunner --take NEWLINE -a` to apply changes.", + ) + + return None + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="native functions linter", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="location of native_functions.yaml", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + lint_messages = [] + for filename in args.filenames: + lint_message = check_file(filename) + if lint_message is not None: + lint_messages.append(lint_message) + + for lint_message in lint_messages: + print(json.dumps(lint_message._asdict()), flush=True) diff --git a/tools/linter/adapters/pip_init.py b/tools/linter/adapters/pip_init.py new file mode 100644 index 0000000000..f177a920d0 --- /dev/null +++ b/tools/linter/adapters/pip_init.py @@ -0,0 +1,83 @@ +""" +Initializer script that installs stuff to pip. +""" +import argparse +import logging +import os +import subprocess +import sys +import time + +from typing import List + + +def run_command(args: List[str]) -> "subprocess.CompletedProcess[bytes]": + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + return subprocess.run(args, check=True) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="pip initializer") + parser.add_argument( + "packages", + nargs="+", + help="pip packages to install", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "--dry-run", help="do not install anything, just print what would be done." + ) + parser.add_argument( + "--no-black-binary", + help="do not use pre-compiled binaries from pip for black.", + action="store_true", + ) + + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET if args.verbose else logging.DEBUG, + stream=sys.stderr, + ) + + pip_args = ["pip3", "install"] + + # If we are in a global install, use `--user` to install so that you do not + # need root access in order to initialize linters. + # + # However, `pip install --user` interacts poorly with virtualenvs (see: + # https://bit.ly/3vD4kvl) and conda (see: https://bit.ly/3KG7ZfU). So in + # these cases perform a regular installation. + in_conda = os.environ.get("CONDA_PREFIX") is not None + in_virtualenv = os.environ.get("VIRTUAL_ENV") is not None + if not in_conda and not in_virtualenv: + pip_args.append("--user") + + pip_args.extend(args.packages) + + for package in args.packages: + package_name, _, version = package.partition("=") + if version == "": + raise RuntimeError( + "Package {package_name} did not have a version specified. " + "Please specify a version to produce a consistent linting experience." + ) + if args.no_black_binary and "black" in package_name: + pip_args.append(f"--no-binary={package_name}") + + dry_run = args.dry_run == "1" + if dry_run: + print(f"Would have run: {pip_args}") + sys.exit(0) + + run_command(pip_args) diff --git a/tools/linter/adapters/ruff_linter.py b/tools/linter/adapters/ruff_linter.py new file mode 100644 index 0000000000..451834aa7c --- /dev/null +++ b/tools/linter/adapters/ruff_linter.py @@ -0,0 +1,462 @@ +"""Adapter for https://github.com/charliermarsh/ruff.""" + +from __future__ import annotations + +import argparse +import concurrent.futures +import dataclasses +import enum +import json +import logging +import os +import subprocess +import sys +import time +from typing import Any, BinaryIO + +LINTER_CODE = "RUFF" +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + """Print to stderr.""" + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, enum.Enum): + """Severity of a lint message.""" + + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +@dataclasses.dataclass(frozen=True) +class LintMessage: + """A lint message defined by https://docs.rs/lintrunner/latest/lintrunner/lint_message/struct.LintMessage.html.""" + + path: str | None + line: int | None + char: int | None + code: str + severity: LintSeverity + name: str + original: str | None + replacement: str | None + description: str | None + + def asdict(self) -> dict[str, Any]: + return dataclasses.asdict(self) + + def display(self) -> None: + """Print to stdout for lintrunner to consume.""" + print(json.dumps(self.asdict()), flush=True) + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def _run_command( + args: list[str], + *, + timeout: int | None, + stdin: BinaryIO | None, + input: bytes | None, + check: bool, + cwd: os.PathLike[Any] | None, +) -> subprocess.CompletedProcess[bytes]: + logging.debug("$ %s", " ".join(args)) + start_time = time.monotonic() + try: + if input is not None: + return subprocess.run( + args, + capture_output=True, + shell=False, + input=input, + timeout=timeout, + check=check, + cwd=cwd, + ) + + return subprocess.run( + args, + stdin=stdin, + capture_output=True, + shell=False, + timeout=timeout, + check=check, + cwd=cwd, + ) + finally: + end_time = time.monotonic() + logging.debug("took %dms", (end_time - start_time) * 1000) + + +def run_command( + args: list[str], + *, + retries: int = 0, + timeout: int | None = None, + stdin: BinaryIO | None = None, + input: bytes | None = None, + check: bool = False, + cwd: os.PathLike[Any] | None = None, +) -> subprocess.CompletedProcess[bytes]: + remaining_retries = retries + while True: + try: + return _run_command( + args, timeout=timeout, stdin=stdin, input=input, check=check, cwd=cwd + ) + except subprocess.TimeoutExpired as err: + if remaining_retries == 0: + raise err + remaining_retries -= 1 + logging.warning( + "(%s/%s) Retrying because command failed with: %r", + retries - remaining_retries, + retries, + err, + ) + time.sleep(1) + + +def add_default_options(parser: argparse.ArgumentParser) -> None: + """Add default options to a parser. + + This should be called the last in the chain of add_argument calls. + """ + parser.add_argument( + "--retries", + type=int, + default=3, + help="number of times to retry if the linter times out.", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + + +def explain_rule(code: str) -> str: + proc = run_command( + ["ruff", "rule", "--format=json", code], + check=True, + ) + rule = json.loads(str(proc.stdout, "utf-8").strip()) + return f"\n{rule['linter']}: {rule['summary']}" + + +def get_issue_severity(code: str) -> LintSeverity: + # "B901": `return x` inside a generator + # "B902": Invalid first argument to a method + # "B903": __slots__ efficiency + # "B950": Line too long + # "C4": Flake8 Comprehensions + # "C9": Cyclomatic complexity + # "E2": PEP8 horizontal whitespace "errors" + # "E3": PEP8 blank line "errors" + # "E5": PEP8 line length "errors" + # "T400": type checking Notes + # "T49": internal type checker errors or unmatched messages + if any( + code.startswith(x) + for x in ( + "B9", + "C4", + "C9", + "E2", + "E3", + "E5", + "T400", + "T49", + "PLC", + "PLR", + ) + ): + return LintSeverity.ADVICE + + # "F821": Undefined name + # "E999": syntax error + if any(code.startswith(x) for x in ("F821", "E999", "PLE")): + return LintSeverity.ERROR + + # "F": PyFlakes Error + # "B": flake8-bugbear Error + # "E": PEP8 "Error" + # "W": PEP8 Warning + # possibly other plugins... + return LintSeverity.WARNING + + +def format_lint_message( + message: str, code: str, rules: dict[str, str], show_disable: bool +) -> str: + if rules: + message += f".\n{rules.get(code) or ''}" + message += ".\nSee https://beta.ruff.rs/docs/rules/" + if show_disable: + message += f".\n\nTo disable, use ` # noqa: {code}`" + return message + + +def check_files( + filenames: list[str], + severities: dict[str, LintSeverity], + *, + config: str | None, + retries: int, + timeout: int, + explain: bool, + show_disable: bool, +) -> list[LintMessage]: + try: + proc = run_command( + [ + sys.executable, + "-m", + "ruff", + "--exit-zero", + "--quiet", + "--format=json", + *([f"--config={config}"] if config else []), + *filenames, + ], + retries=retries, + timeout=timeout, + check=True, + ) + except (OSError, subprocess.CalledProcessError) as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + f"COMMAND (exit code {err.returncode})\n" + f"{' '.join(as_posix(x) for x in err.cmd)}\n\n" + f"STDERR\n{err.stderr.decode('utf-8').strip() or '(empty)'}\n\n" + f"STDOUT\n{err.stdout.decode('utf-8').strip() or '(empty)'}" + ) + ), + ) + ] + + stdout = str(proc.stdout, "utf-8").strip() + vulnerabilities = json.loads(stdout) + + if explain: + all_codes = {v["code"] for v in vulnerabilities} + rules = {code: explain_rule(code) for code in all_codes} + else: + rules = {} + + return [ + LintMessage( + path=vuln["filename"], + name=vuln["code"], + description=( + format_lint_message( + vuln["message"], + vuln["code"], + rules, + show_disable, + ) + ), + line=int(vuln["location"]["row"]), + char=int(vuln["location"]["column"]), + code=LINTER_CODE, + severity=severities.get(vuln["code"], get_issue_severity(vuln["code"])), + original=None, + replacement=None, + ) + for vuln in vulnerabilities + ] + + +def check_file_for_fixes( + filename: str, + *, + config: str | None, + retries: int, + timeout: int, +) -> list[LintMessage]: + try: + with open(filename, "rb") as f: + original = f.read() + with open(filename, "rb") as f: + proc_fix = run_command( + [ + sys.executable, + "-m", + "ruff", + "--fix-only", + "--exit-zero", + *([f"--config={config}"] if config else []), + "--stdin-filename", + filename, + "-", + ], + stdin=f, + retries=retries, + timeout=timeout, + check=True, + ) + except (OSError, subprocess.CalledProcessError) as err: + return [ + LintMessage( + path=None, + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.ERROR, + name="command-failed", + original=None, + replacement=None, + description=( + f"Failed due to {err.__class__.__name__}:\n{err}" + if not isinstance(err, subprocess.CalledProcessError) + else ( + f"COMMAND (exit code {err.returncode})\n" + f"{' '.join(as_posix(x) for x in err.cmd)}\n\n" + f"STDERR\n{err.stderr.decode('utf-8').strip() or '(empty)'}\n\n" + f"STDOUT\n{err.stdout.decode('utf-8').strip() or '(empty)'}" + ) + ), + ) + ] + + replacement = proc_fix.stdout + if original == replacement: + return [] + + return [ + LintMessage( + path=filename, + name="format", + description="Run `lintrunner -a` to apply this patch.", + line=None, + char=None, + code=LINTER_CODE, + severity=LintSeverity.WARNING, + original=original.decode("utf-8"), + replacement=replacement.decode("utf-8"), + ) + ] + + +def main() -> None: + parser = argparse.ArgumentParser( + description=f"Ruff linter. Linter code: {LINTER_CODE}. Use with RUFF-FIX to auto-fix issues.", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--config", + default=None, + help="Path to the `pyproject.toml` or `ruff.toml` file to use for configuration", + ) + parser.add_argument( + "--explain", + action="store_true", + help="Explain a rule", + ) + parser.add_argument( + "--show-disable", + action="store_true", + help="Show how to disable a lint message", + ) + parser.add_argument( + "--timeout", + default=90, + type=int, + help="Seconds to wait for ruff", + ) + parser.add_argument( + "--severity", + action="append", + help="map code to severity (e.g. `F401:advice`). This option can be used multiple times.", + ) + parser.add_argument( + "--no-fix", + action="store_true", + help="Do not suggest fixes", + ) + add_default_options(parser) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + severities: dict[str, LintSeverity] = {} + if args.severity: + for severity in args.severity: + parts = severity.split(":", 1) + assert len(parts) == 2, f"invalid severity `{severity}`" + severities[parts[0]] = LintSeverity(parts[1]) + + lint_messages = check_files( + args.filenames, + severities=severities, + config=args.config, + retries=args.retries, + timeout=args.timeout, + explain=args.explain, + show_disable=args.show_disable, + ) + for lint_message in lint_messages: + lint_message.display() + + if args.no_fix or not lint_messages: + # If we're not fixing, we can exit early + return + + files_with_lints = {lint.path for lint in lint_messages if lint.path is not None} + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = { + executor.submit( + check_file_for_fixes, + path, + config=args.config, + retries=args.retries, + timeout=args.timeout, + ): path + for path in files_with_lints + } + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + lint_message.display() + except Exception: # Catch all exceptions for lintrunner + logging.critical('Failed at "%s".', futures[future]) + raise + + +if __name__ == "__main__": + main() diff --git a/tools/linter/adapters/s3_init.py b/tools/linter/adapters/s3_init.py new file mode 100644 index 0000000000..c3d6e8e03c --- /dev/null +++ b/tools/linter/adapters/s3_init.py @@ -0,0 +1,216 @@ +import argparse +import hashlib +import json +import logging +import os +import platform +import stat +import subprocess +import sys +import urllib.error +import urllib.request +from pathlib import Path + +# String representing the host platform (e.g. Linux, Darwin). +HOST_PLATFORM = platform.system() +HOST_PLATFORM_ARCH = platform.system() + "-" + platform.processor() + +# PyTorch directory root +try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + stdout=subprocess.PIPE, + check=True, + ) + PYTORCH_ROOT = result.stdout.decode("utf-8").strip() +except subprocess.CalledProcessError: + # If git is not installed, compute repo root as 3 folders up from this file + path_ = os.path.abspath(__file__) + for _ in range(4): + path_ = os.path.dirname(path_) + PYTORCH_ROOT = path_ + +DRY_RUN = False + + +def compute_file_sha256(path: str) -> str: + """Compute the SHA256 hash of a file and return it as a hex string.""" + # If the file doesn't exist, return an empty string. + if not os.path.exists(path): + return "" + + hash = hashlib.sha256() + + # Open the file in binary mode and hash it. + with open(path, "rb") as f: + for b in f: + hash.update(b) + + # Return the hash as a hexadecimal string. + return hash.hexdigest() + + +def report_download_progress( + chunk_number: int, chunk_size: int, file_size: int +) -> None: + """ + Pretty printer for file download progress. + """ + if file_size != -1: + percent = min(1, (chunk_number * chunk_size) / file_size) + bar = "#" * int(64 * percent) + sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100))) + + +def check(binary_path: Path, reference_hash: str) -> bool: + """Check whether the binary exists and is the right one. + + If there is hash difference, delete the actual binary. + """ + if not binary_path.exists(): + logging.info("%s does not exist.", binary_path) + return False + + existing_binary_hash = compute_file_sha256(str(binary_path)) + if existing_binary_hash == reference_hash: + return True + + logging.warning( + """\ +Found binary hash does not match reference! + +Found hash: %s +Reference hash: %s + +Deleting %s just to be safe. +""", + existing_binary_hash, + reference_hash, + binary_path, + ) + if DRY_RUN: + logging.critical( + "In dry run mode, so not actually deleting the binary. But consider deleting it ASAP!" + ) + return False + + try: + binary_path.unlink() + except OSError as e: + logging.critical("Failed to delete binary: %s", e) + logging.critical( + "Delete this binary as soon as possible and do not execute it!" + ) + + return False + + +def download( + name: str, + output_dir: str, + url: str, + reference_bin_hash: str, +) -> bool: + """ + Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies + that it is the right binary by checking its SHA256 hash against the expected hash. + """ + # First check if we need to do anything + binary_path = Path(output_dir, name) + if check(binary_path, reference_bin_hash): + logging.info("Correct binary already exists at %s. Exiting.", binary_path) + return True + + # Create the output folder + binary_path.parent.mkdir(parents=True, exist_ok=True) + + # Download the binary + logging.info("Downloading %s to %s", url, binary_path) + + if DRY_RUN: + logging.info("Exiting as there is nothing left to do in dry run mode") + return True + + urllib.request.urlretrieve( + url, + binary_path, + reporthook=report_download_progress if sys.stdout.isatty() else None, + ) + + logging.info("Downloaded %s successfully.", name) + + # Check the downloaded binary + if not check(binary_path, reference_bin_hash): + logging.critical("Downloaded binary %s failed its hash check", name) + return False + + # Ensure that executable bits are set + mode = os.stat(binary_path).st_mode + mode |= stat.S_IXUSR + os.chmod(binary_path, mode) + + logging.info("Using %s located at %s", name, binary_path) + return True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="downloads and checks binaries from s3", + ) + parser.add_argument( + "--config-json", + required=True, + help="Path to config json that describes where to find binaries and hashes", + ) + parser.add_argument( + "--linter", + required=True, + help="Which linter to initialize from the config json", + ) + parser.add_argument( + "--output-dir", + required=True, + help="place to put the binary", + ) + parser.add_argument( + "--output-name", + required=True, + help="name of binary", + ) + parser.add_argument( + "--dry-run", + default=False, + help="do not download, just print what would be done", + ) + + args = parser.parse_args() + if args.dry_run == "0": + DRY_RUN = False + else: + DRY_RUN = True + + logging.basicConfig( + format="[DRY_RUN] %(levelname)s: %(message)s" + if DRY_RUN + else "%(levelname)s: %(message)s", + level=logging.INFO, + stream=sys.stderr, + ) + + config = json.load(open(args.config_json)) + config = config[args.linter] + + # Allow processor specific binaries for platform (namely Intel and M1 binaries for MacOS) + host_platform = HOST_PLATFORM if HOST_PLATFORM in config else HOST_PLATFORM_ARCH + # If the host platform is not in platform_to_hash, it is unsupported. + if host_platform not in config: + logging.error("Unsupported platform: %s/%s", HOST_PLATFORM, HOST_PLATFORM_ARCH) + exit(1) + + url = config[host_platform]["download_url"] + hash = config[host_platform]["hash"] + + ok = download(args.output_name, args.output_dir, url, hash) + if not ok: + logging.critical("Unable to initialize %s", args.linter) + sys.exit(1) diff --git a/tools/linter/adapters/s3_init_config.json b/tools/linter/adapters/s3_init_config.json new file mode 100644 index 0000000000..85e2402061 --- /dev/null +++ b/tools/linter/adapters/s3_init_config.json @@ -0,0 +1,53 @@ +{ + "HOW TO UPDATE THE BINARIES": [ + "Upload the new file to S3 under a new folder with the version number embedded in (see actionlint for an example).", + "(Don't override the old files, otherwise you'll break `lintrunner install` for anyone using an older commit of pytorch.)", + "'Hash' is the sha256 of the uploaded file.", + "Validate the new download url and hash by running 'lintrunner init' to pull the new binaries and then run 'lintrunner' to try linting the files.", + "Some binaries have custom builds; see https://github.com/pytorch/test-infra/blob/main/.github/workflows/clang-tidy-linux.yml and https://github.com/pytorch/test-infra/blob/main/.github/workflows/clang-tidy-macos.yml" + ], + "clang-format": { + "Darwin": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/mac/clang-format-mojave", + "hash": "1485a242a96c737ba7cdd9f259114f2201accdb46d87ac7a8650b1a814cd4d4d" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-format-linux64", + "hash": "e1c8b97b919541a99e0a355df5c3f9e8abebc64259dbee6f8c68e1ef90582856" + } + }, + "clang-tidy": { + "Darwin-i386": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-i386/15.0.6/clang-tidy", + "hash": "11c9234155dd5b7aec8cf46ea9629401c4432576615b6eff2a5a4c5e3f9e6504" + }, + "Darwin-arm": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-arm/15.0.6/clang-tidy", + "hash": "4ed664cf50bb9fddec2d4170b3d7bbe0135dc5648acbd620b61c8d25a5a2fdb7" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/15.0.6/clang-tidy", + "hash": "8defeb3a2698caca60251f9d682bc08374f1a37eec77d515533affdd03f93add" + } + }, + "actionlint": { + "Darwin": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/actionlint/1.6.21/Darwin_amd64/actionlint", + "hash": "b354db83815384d3c3a07f68f44b30cb0a70899757a0d185d7322de9952e8813" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/actionlint/1.6.21/Linux_arm64/actionlint", + "hash": "025ac157db121b33971ef24af72d73d71cda3cb1e3a94795bb2708ef4032ca76" + } + }, + "bazel": { + "Darwin": { + "download_url": "https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.16.0/bazelisk.py", + "hash": "1f6d76d023ddd5f1625f34d934418e7334a267318d084f31be09df8a8835ed16" + }, + "Linux": { + "download_url": "https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.16.0/bazelisk.py", + "hash": "1f6d76d023ddd5f1625f34d934418e7334a267318d084f31be09df8a8835ed16" + } + } +} diff --git a/tools/linter/adapters/ufmt_linter.py b/tools/linter/adapters/ufmt_linter.py new file mode 100644 index 0000000000..2064582e57 --- /dev/null +++ b/tools/linter/adapters/ufmt_linter.py @@ -0,0 +1,142 @@ +import argparse +import concurrent.futures +import json +import logging +import os +import sys +from enum import Enum +from pathlib import Path +from typing import Any, List, NamedTuple, Optional + +from ufmt.core import ufmt_string +from ufmt.util import make_black_config +from usort import Config as UsortConfig + + +IS_WINDOWS: bool = os.name == "nt" + + +def eprint(*args: Any, **kwargs: Any) -> None: + print(*args, file=sys.stderr, flush=True, **kwargs) + + +class LintSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + ADVICE = "advice" + DISABLED = "disabled" + + +class LintMessage(NamedTuple): + path: Optional[str] + line: Optional[int] + char: Optional[int] + code: str + severity: LintSeverity + name: str + original: Optional[str] + replacement: Optional[str] + description: Optional[str] + + +def as_posix(name: str) -> str: + return name.replace("\\", "/") if IS_WINDOWS else name + + +def format_error_message(filename: str, err: Exception) -> LintMessage: + return LintMessage( + path=filename, + line=None, + char=None, + code="UFMT", + severity=LintSeverity.ADVICE, + name="command-failed", + original=None, + replacement=None, + description=(f"Failed due to {err.__class__.__name__}:\n{err}"), + ) + + +def check_file( + filename: str, +) -> List[LintMessage]: + with open(filename, "rb") as f: + original = f.read().decode("utf-8") + + try: + path = Path(filename) + + usort_config = UsortConfig.find(path) + black_config = make_black_config(path) + + # Use UFMT API to call both usort and black + replacement = ufmt_string( + path=path, + content=original, + usort_config=usort_config, + black_config=black_config, + ) + + if original == replacement: + return [] + + return [ + LintMessage( + path=filename, + line=None, + char=None, + code="UFMT", + severity=LintSeverity.WARNING, + name="format", + original=original, + replacement=replacement, + description="Run `lintrunner -a` to apply this patch.", + ) + ] + except Exception as err: + return [format_error_message(filename, err)] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Format files with ufmt (black + usort).", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="verbose logging", + ) + parser.add_argument( + "filenames", + nargs="+", + help="paths to lint", + ) + args = parser.parse_args() + + logging.basicConfig( + format="<%(threadName)s:%(levelname)s> %(message)s", + level=logging.NOTSET + if args.verbose + else logging.DEBUG + if len(args.filenames) < 1000 + else logging.INFO, + stream=sys.stderr, + ) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count(), + thread_name_prefix="Thread", + ) as executor: + futures = {executor.submit(check_file, x): x for x in args.filenames} + for future in concurrent.futures.as_completed(futures): + try: + for lint_message in future.result(): + print(json.dumps(lint_message._asdict()), flush=True) + except Exception: + logging.critical('Failed at "%s".', futures[future]) + raise + + +if __name__ == "__main__": + main()