From f0c31c27aff8307e1feaa9a3cf4059851bc8ed83 Mon Sep 17 00:00:00 2001 From: Laurie <55149902+lauriemerrell@users.noreply.github.com> Date: Thu, 26 Oct 2023 16:47:05 -0500 Subject: [PATCH] Address a bunch of linter failures (#3054) * address a bunch of linter failures * more linter failures and fix some from before * more linter failures * one last one * Force python version for lint CI run --------- Co-authored-by: Soren Spicknall --- .github/workflows/lint.yml | 2 ++ .../generate_gtfs_download_configs.py | 2 +- airflow/dags/dags.py | 2 +- .../download_schedule_feeds.py | 2 +- .../email_download_failures.py | 2 +- .../unzip_gtfs_schedule.py | 2 +- .../plugins/operators/gtfs_csv_to_jsonl.py | 2 +- .../operators/gtfs_csv_to_jsonl_hourly.py | 2 +- .../plugins/operators/littlepay_raw_sync.py | 5 +++-- .../gtfs_aggregator_scraper.py | 2 +- jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py | 4 ++-- .../gtfs_schedule_validator_hourly.py | 2 +- .../calitp_data_analysis/geography_utils.py | 2 +- .../calitp_data_analysis/sql.py | 2 +- .../calitp_data_analysis/tables.py | 2 +- .../calitp-data-analysis/tests/test_tables.py | 2 +- .../calitp_data_infra/storage.py | 19 ++++++++++--------- warehouse/scripts/dbt_artifacts/__init__.py | 2 +- warehouse/scripts/json_to_docblocks.py | 2 +- warehouse/scripts/publish.py | 2 +- warehouse/scripts/visualize.py | 2 +- 21 files changed, 34 insertions(+), 30 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index bc9ae3e534..bbbae19e32 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,6 +12,8 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 + with: + python-version: 3.11.6 - uses: pre-commit/action@v3.0.0 - uses: crate-ci/typos@master with: diff --git a/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py b/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py index e2ad47d878..071f7286e6 100644 --- a/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py +++ b/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py @@ -104,7 +104,7 @@ def convert_gtfs_datasets_to_download_configs(task_instance, execution_date, **k valid, invalid, skipped = gtfs_datasets_to_extract_configs(extract) - msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}" + msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}" # noqa: E225 print(msg) print("Invalid records:") diff --git a/airflow/dags/dags.py b/airflow/dags/dags.py index 17832dc299..3956c277a3 100644 --- a/airflow/dags/dags.py +++ b/airflow/dags/dags.py @@ -39,7 +39,7 @@ def log_failure_to_slack(context): Try {ti.try_number} of {ti.max_tries} <{ti.log_url}| Check Log > - """ + """ # noqa: E221, E222 requests.post(slack_url, json={"text": message}) # This is very broad but we want to try to log _any_ exception to slack diff --git a/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py b/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py index b84801befc..38aad85c8e 100644 --- a/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py +++ b/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py @@ -172,7 +172,7 @@ def download_all(task_instance, execution_date, **kwargs): success_rate = len(result.successes) / len(configs) if success_rate < GTFS_FEED_LIST_ERROR_THRESHOLD: raise RuntimeError( - f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}" + f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}" # noqa: E231 ) diff --git a/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py b/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py index 0f53ef3202..3bb6fa0ecb 100644 --- a/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py +++ b/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py @@ -30,7 +30,7 @@ def email_failures(task_instance: TaskInstance, execution_date, **kwargs): The following agency GTFS feeds could not be extracted on {execution_date}: {html_report} - """ + """ # noqa: E231,E241 if os.environ["AIRFLOW_ENV"] == "development": print( diff --git a/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py b/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py index 74d552cef2..324d51c784 100644 --- a/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py +++ b/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py @@ -258,7 +258,7 @@ def unzip_extracts( for failure in result.failures ] exc_str = "\n".join(str(tup) for tup in exceptions) - msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}" + msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}" # noqa: E231 if exceptions: typer.secho(msg, err=True, fg=typer.colors.RED) if success_rate < GTFS_UNZIP_LIST_ERROR_THRESHOLD: diff --git a/airflow/plugins/operators/gtfs_csv_to_jsonl.py b/airflow/plugins/operators/gtfs_csv_to_jsonl.py index dc6519da59..5365e2e681 100644 --- a/airflow/plugins/operators/gtfs_csv_to_jsonl.py +++ b/airflow/plugins/operators/gtfs_csv_to_jsonl.py @@ -165,7 +165,7 @@ def parse_files(day: pendulum.datetime, input_table_name: str, gtfs_filename: st success_rate = len(result.successes) / len(files) if success_rate < GTFS_PARSE_ERROR_THRESHOLD: raise RuntimeError( - f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}" + f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}" # noqa: E231 ) diff --git a/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py b/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py index 2f14ebab3f..2c68ae7e27 100644 --- a/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py +++ b/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py @@ -194,7 +194,7 @@ def parse_files(period: pendulum.Period, input_table_name: str, gtfs_filename: s success_rate = len(result.successes) / len(files) if success_rate < GTFS_PARSE_ERROR_THRESHOLD: raise RuntimeError( - f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}" + f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}" # noqa: E231 ) diff --git a/airflow/plugins/operators/littlepay_raw_sync.py b/airflow/plugins/operators/littlepay_raw_sync.py index 3a7bf0a2ab..b5bd1c0984 100644 --- a/airflow/plugins/operators/littlepay_raw_sync.py +++ b/airflow/plugins/operators/littlepay_raw_sync.py @@ -117,7 +117,8 @@ def sync_file( ) try: metadata_str = fs.getxattr( - path=f"gs://{fileinfo.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY + path=f"gs://{fileinfo.name}", # noqa: E231 + attr=PARTITIONED_ARTIFACT_METADATA_KEY, ) except KeyError: print(f"metadata missing on {fileinfo.name}") @@ -207,7 +208,7 @@ def execute(self, context): raise RuntimeError("failed to page fully through bucket") print( - f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}." + f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}." # noqa: E702 ) fs = get_fs() diff --git a/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py b/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py index 621d5b1948..b64902fb6d 100644 --- a/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py +++ b/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py @@ -183,7 +183,7 @@ def main(aggregator: GTFSFeedAggregator, dry_run: bool = False, progress: bool = ) if dry_run: typer.secho( - f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}" + f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}" # noqa: E702 ) else: typer.secho( diff --git a/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py b/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py index d16d4b91db..fc59b0bc74 100644 --- a/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py +++ b/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py @@ -769,7 +769,7 @@ def main( typer.secho(f"missing: {files_missing_metadata}") typer.secho(f"invalid: {files_invalid_metadata}") raise RuntimeError( - f"too many files have missing/invalid metadata; {total - len(files)} of {total}" + f"too many files have missing/invalid metadata; {total - len(files)} of {total}" # noqa: E702 ) if not files: @@ -893,7 +893,7 @@ def main( if exceptions: exc_str = "\n".join(str(tup) for tup in exceptions) - msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}" + msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}" # noqa: E231 typer.secho(msg, err=True, fg=typer.colors.RED) raise RuntimeError(msg) diff --git a/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py b/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py index 8ff55c4eca..aedfbf7949 100644 --- a/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py +++ b/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py @@ -435,7 +435,7 @@ def validate_hour( success_rate = len(result.successes) / len(extracts) if success_rate < GTFS_VALIDATE_LIST_ERROR_THRESHOLD: exc_str = "\n".join(str(tup) for tup in exceptions) - msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}" + msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}" # noqa: E231 if exceptions: typer.secho(msg, err=True, fg=typer.colors.RED) raise RuntimeError(msg) diff --git a/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py b/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py index b8523346a0..e820b92df2 100644 --- a/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py +++ b/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py @@ -168,7 +168,7 @@ def cut_segments( .drop(columns="temp_index") ) - segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}") + segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}") # noqa: E231 return segmented2 diff --git a/packages/calitp-data-analysis/calitp_data_analysis/sql.py b/packages/calitp-data-analysis/calitp_data_analysis/sql.py index 8157f829dc..89bfbf4ffd 100644 --- a/packages/calitp-data-analysis/calitp_data_analysis/sql.py +++ b/packages/calitp-data-analysis/calitp_data_analysis/sql.py @@ -27,7 +27,7 @@ def get_engine(max_bytes=None, project="cal-itp-data-infra"): # Note that we should be able to add location as a uri parameter, but # it is not being picked up, so passing as a separate argument for now. return create_engine( - f"bigquery://{project}/?maximum_bytes_billed={max_bytes}", + f"bigquery://{project}/?maximum_bytes_billed={max_bytes}", # noqa: E231 location=CALITP_BQ_LOCATION, credentials_path=cred_path, ) diff --git a/packages/calitp-data-analysis/calitp_data_analysis/tables.py b/packages/calitp-data-analysis/calitp_data_analysis/tables.py index 4833c274ba..9f7ce5223a 100644 --- a/packages/calitp-data-analysis/calitp_data_analysis/tables.py +++ b/packages/calitp-data-analysis/calitp_data_analysis/tables.py @@ -109,7 +109,7 @@ def _repr_html_(self): {table_body_html} - """ + """ # noqa: E221,E222 tbls = AutoTable( diff --git a/packages/calitp-data-analysis/tests/test_tables.py b/packages/calitp-data-analysis/tests/test_tables.py index 0ede69d8f8..292724903a 100644 --- a/packages/calitp-data-analysis/tests/test_tables.py +++ b/packages/calitp-data-analysis/tests/test_tables.py @@ -33,7 +33,7 @@ def test_auto_table_comments(engine, tmp_name): OPTIONS( description="the table comment" ) - """ + """ # noqa: E231,E241,E202 ) # TODO: rather than using AutoTable, let's just use CalitpTable directly diff --git a/packages/calitp-data-infra/calitp_data_infra/storage.py b/packages/calitp-data-infra/calitp_data_infra/storage.py index 4f1b302680..d06f4277ce 100644 --- a/packages/calitp-data-infra/calitp_data_infra/storage.py +++ b/packages/calitp-data-infra/calitp_data_infra/storage.py @@ -223,7 +223,7 @@ def check_partitions(cls, values): ] if missing: raise ValueError( - f"all partition names must exist as fields or properties; missing {missing}" + f"all partition names must exist as fields or properties; missing {missing}" # noqa: E702 ) return values @@ -315,7 +315,7 @@ def fetch_all_in_partition( if not isinstance(bucket, str): raise TypeError( - f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}" + f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}" # noqa: E702 ) if not table: @@ -323,7 +323,7 @@ def fetch_all_in_partition( if not isinstance(table, str): raise TypeError( - f"must either pass table, or the table must resolve to a string; got {type(table)}" + f"must either pass table, or the table must resolve to a string; got {type(table)}" # noqa: E702 ) prefix = "/".join( @@ -396,7 +396,7 @@ def filename(self) -> str: @property def path(self) -> str: - return f"gs://{self.bucket}/{self.name}" + return f"gs://{self.bucket}/{self.name}" # noqa: E231 @property def partition(self) -> Dict[str, str]: @@ -486,7 +486,7 @@ def get_latest( if not isinstance(bucket, str): raise TypeError( - f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}" + f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}" # noqa: E702 ) if not table: @@ -494,7 +494,7 @@ def get_latest( if not isinstance(table, str): raise TypeError( - f"must either pass table, or the table must resolve to a string; got {type(table)}" + f"must either pass table, or the table must resolve to a string; got {type(table)}" # noqa: E702 ) if not partition_names: @@ -502,7 +502,7 @@ def get_latest( if not isinstance(partition_names, list): raise TypeError( - f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}" + f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}" # noqa: E702 ) latest = get_latest_file( @@ -520,7 +520,8 @@ def get_latest( return cls( **json.loads( get_fs().getxattr( - path=f"gs://{latest.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY + path=f"gs://{latest.name}", # noqa: E231 + attr=PARTITIONED_ARTIFACT_METADATA_KEY, ) ) ) @@ -755,7 +756,7 @@ def download_feed( if disposition_header: if disposition_header.startswith("filename="): # sorry; cgi won't parse unless it's prefixed with the disposition type - disposition_header = f"attachment; {disposition_header}" + disposition_header = f"attachment; {disposition_header}" # noqa: E702 _, params = cgi.parse_header(disposition_header) disposition_filename = params.get("filename") else: diff --git a/warehouse/scripts/dbt_artifacts/__init__.py b/warehouse/scripts/dbt_artifacts/__init__.py index becf77f597..c8cacc4105 100644 --- a/warehouse/scripts/dbt_artifacts/__init__.py +++ b/warehouse/scripts/dbt_artifacts/__init__.py @@ -40,7 +40,7 @@ def get_engine(project, max_bytes=None): # Note that we should be able to add location as a uri parameter, but # it is not being picked up, so passing as a separate argument for now. return create_engine( - f"bigquery://{project}/?maximum_bytes_billed={max_bytes}", + f"bigquery://{project}/?maximum_bytes_billed={max_bytes}", # noqa: E231 location="us-west2", credentials_path=os.environ.get("BIGQUERY_KEYFILE_LOCATION"), ) diff --git a/warehouse/scripts/json_to_docblocks.py b/warehouse/scripts/json_to_docblocks.py index 5de79ce114..b18ae14ef5 100755 --- a/warehouse/scripts/json_to_docblocks.py +++ b/warehouse/scripts/json_to_docblocks.py @@ -27,7 +27,7 @@ prefix = f"gtfs_{model}__" df.write(col.docblock(prefix=prefix)) - doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}' + doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}' # noqa: E201,E202 extras: Dict[str, Any] = { "description": f"'{doc_ref}'", diff --git a/warehouse/scripts/publish.py b/warehouse/scripts/publish.py index 6cff2f3636..08345b507c 100755 --- a/warehouse/scripts/publish.py +++ b/warehouse/scripts/publish.py @@ -546,7 +546,7 @@ def _publish_exposure( "-o", mbtiles_path, *[ - f"--named-layer={layer}:{path}" + f"--named-layer={layer}:{path}" # noqa: E231 for layer, path in layer_geojson_paths.items() ], ] diff --git a/warehouse/scripts/visualize.py b/warehouse/scripts/visualize.py index 98de3f4a30..5c415de583 100755 --- a/warehouse/scripts/visualize.py +++ b/warehouse/scripts/visualize.py @@ -240,7 +240,7 @@ def viz( print(f"Writing DAG to {output}") A.draw(output, args=f"-Gratio={ratio}", prog="dot") if display: - url = f"file://{output.resolve()}" + url = f"file://{output.resolve()}" # noqa: E231 webbrowser.open(url, new=2) # open in new tab