From f0c31c27aff8307e1feaa9a3cf4059851bc8ed83 Mon Sep 17 00:00:00 2001
From: Laurie <55149902+lauriemerrell@users.noreply.github.com>
Date: Thu, 26 Oct 2023 16:47:05 -0500
Subject: [PATCH] Address a bunch of linter failures (#3054)

* address a bunch of linter failures

* more linter failures and fix some from before

* more linter failures

* one last one

* Force python version for lint CI run

---------

Co-authored-by: Soren Spicknall <soren@spicknall.us>
---
 .github/workflows/lint.yml                    |  2 ++
 .../generate_gtfs_download_configs.py         |  2 +-
 airflow/dags/dags.py                          |  2 +-
 .../download_schedule_feeds.py                |  2 +-
 .../email_download_failures.py                |  2 +-
 .../unzip_gtfs_schedule.py                    |  2 +-
 .../plugins/operators/gtfs_csv_to_jsonl.py    |  2 +-
 .../operators/gtfs_csv_to_jsonl_hourly.py     |  2 +-
 .../plugins/operators/littlepay_raw_sync.py   |  5 +++--
 .../gtfs_aggregator_scraper.py                |  2 +-
 jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py      |  4 ++--
 .../gtfs_schedule_validator_hourly.py         |  2 +-
 .../calitp_data_analysis/geography_utils.py   |  2 +-
 .../calitp_data_analysis/sql.py               |  2 +-
 .../calitp_data_analysis/tables.py            |  2 +-
 .../calitp-data-analysis/tests/test_tables.py |  2 +-
 .../calitp_data_infra/storage.py              | 19 ++++++++++---------
 warehouse/scripts/dbt_artifacts/__init__.py   |  2 +-
 warehouse/scripts/json_to_docblocks.py        |  2 +-
 warehouse/scripts/publish.py                  |  2 +-
 warehouse/scripts/visualize.py                |  2 +-
 21 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index bc9ae3e534..bbbae19e32 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -12,6 +12,8 @@ jobs:
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
+        with:
+          python-version: 3.11.6
       - uses: pre-commit/action@v3.0.0
       - uses: crate-ci/typos@master
         with:
diff --git a/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py b/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py
index e2ad47d878..071f7286e6 100644
--- a/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py
+++ b/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py
@@ -104,7 +104,7 @@ def convert_gtfs_datasets_to_download_configs(task_instance, execution_date, **k
 
     valid, invalid, skipped = gtfs_datasets_to_extract_configs(extract)
 
-    msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}"
+    msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}"  # noqa: E225
     print(msg)
 
     print("Invalid records:")
diff --git a/airflow/dags/dags.py b/airflow/dags/dags.py
index 17832dc299..3956c277a3 100644
--- a/airflow/dags/dags.py
+++ b/airflow/dags/dags.py
@@ -39,7 +39,7 @@ def log_failure_to_slack(context):
     Try {ti.try_number} of {ti.max_tries}
 
     <{ti.log_url}| Check Log >
-    """
+    """  # noqa: E221, E222
         requests.post(slack_url, json={"text": message})
 
     # This is very broad but we want to try to log _any_ exception to slack
diff --git a/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py b/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py
index b84801befc..38aad85c8e 100644
--- a/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py
+++ b/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py
@@ -172,7 +172,7 @@ def download_all(task_instance, execution_date, **kwargs):
     success_rate = len(result.successes) / len(configs)
     if success_rate < GTFS_FEED_LIST_ERROR_THRESHOLD:
         raise RuntimeError(
-            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}"
+            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}"  # noqa: E231
         )
 
 
diff --git a/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py b/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py
index 0f53ef3202..3bb6fa0ecb 100644
--- a/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py
+++ b/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py
@@ -30,7 +30,7 @@ def email_failures(task_instance: TaskInstance, execution_date, **kwargs):
     The following agency GTFS feeds could not be extracted on {execution_date}:
 
     {html_report}
-    """
+    """  # noqa: E231,E241
 
     if os.environ["AIRFLOW_ENV"] == "development":
         print(
diff --git a/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py b/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py
index 74d552cef2..324d51c784 100644
--- a/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py
+++ b/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py
@@ -258,7 +258,7 @@ def unzip_extracts(
             for failure in result.failures
         ]
         exc_str = "\n".join(str(tup) for tup in exceptions)
-        msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"
+        msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"  # noqa: E231
         if exceptions:
             typer.secho(msg, err=True, fg=typer.colors.RED)
         if success_rate < GTFS_UNZIP_LIST_ERROR_THRESHOLD:
diff --git a/airflow/plugins/operators/gtfs_csv_to_jsonl.py b/airflow/plugins/operators/gtfs_csv_to_jsonl.py
index dc6519da59..5365e2e681 100644
--- a/airflow/plugins/operators/gtfs_csv_to_jsonl.py
+++ b/airflow/plugins/operators/gtfs_csv_to_jsonl.py
@@ -165,7 +165,7 @@ def parse_files(day: pendulum.datetime, input_table_name: str, gtfs_filename: st
     success_rate = len(result.successes) / len(files)
     if success_rate < GTFS_PARSE_ERROR_THRESHOLD:
         raise RuntimeError(
-            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"
+            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"  # noqa: E231
         )
 
 
diff --git a/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py b/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py
index 2f14ebab3f..2c68ae7e27 100644
--- a/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py
+++ b/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py
@@ -194,7 +194,7 @@ def parse_files(period: pendulum.Period, input_table_name: str, gtfs_filename: s
         success_rate = len(result.successes) / len(files)
         if success_rate < GTFS_PARSE_ERROR_THRESHOLD:
             raise RuntimeError(
-                f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"
+                f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"  # noqa: E231
             )
 
 
diff --git a/airflow/plugins/operators/littlepay_raw_sync.py b/airflow/plugins/operators/littlepay_raw_sync.py
index 3a7bf0a2ab..b5bd1c0984 100644
--- a/airflow/plugins/operators/littlepay_raw_sync.py
+++ b/airflow/plugins/operators/littlepay_raw_sync.py
@@ -117,7 +117,8 @@ def sync_file(
         )
         try:
             metadata_str = fs.getxattr(
-                path=f"gs://{fileinfo.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY
+                path=f"gs://{fileinfo.name}",  # noqa: E231
+                attr=PARTITIONED_ARTIFACT_METADATA_KEY,
             )
         except KeyError:
             print(f"metadata missing on {fileinfo.name}")
@@ -207,7 +208,7 @@ def execute(self, context):
             raise RuntimeError("failed to page fully through bucket")
 
         print(
-            f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}."
+            f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}."  # noqa: E702
         )
 
         fs = get_fs()
diff --git a/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py b/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py
index 621d5b1948..b64902fb6d 100644
--- a/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py
+++ b/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py
@@ -183,7 +183,7 @@ def main(aggregator: GTFSFeedAggregator, dry_run: bool = False, progress: bool =
     )
     if dry_run:
         typer.secho(
-            f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}"
+            f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}"  # noqa: E702
         )
     else:
         typer.secho(
diff --git a/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py b/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py
index d16d4b91db..fc59b0bc74 100644
--- a/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py
+++ b/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py
@@ -769,7 +769,7 @@ def main(
         typer.secho(f"missing: {files_missing_metadata}")
         typer.secho(f"invalid: {files_invalid_metadata}")
         raise RuntimeError(
-            f"too many files have missing/invalid metadata; {total - len(files)} of {total}"
+            f"too many files have missing/invalid metadata; {total - len(files)} of {total}"  # noqa: E702
         )
 
     if not files:
@@ -893,7 +893,7 @@ def main(
 
     if exceptions:
         exc_str = "\n".join(str(tup) for tup in exceptions)
-        msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}"
+        msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}"  # noqa: E231
         typer.secho(msg, err=True, fg=typer.colors.RED)
         raise RuntimeError(msg)
 
diff --git a/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py b/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py
index 8ff55c4eca..aedfbf7949 100644
--- a/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py
+++ b/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py
@@ -435,7 +435,7 @@ def validate_hour(
         success_rate = len(result.successes) / len(extracts)
         if success_rate < GTFS_VALIDATE_LIST_ERROR_THRESHOLD:
             exc_str = "\n".join(str(tup) for tup in exceptions)
-            msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"
+            msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"  # noqa: E231
             if exceptions:
                 typer.secho(msg, err=True, fg=typer.colors.RED)
             raise RuntimeError(msg)
diff --git a/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py b/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py
index b8523346a0..e820b92df2 100644
--- a/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py
+++ b/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py
@@ -168,7 +168,7 @@ def cut_segments(
         .drop(columns="temp_index")
     )
 
-    segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}")
+    segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}")  # noqa: E231
 
     return segmented2
 
diff --git a/packages/calitp-data-analysis/calitp_data_analysis/sql.py b/packages/calitp-data-analysis/calitp_data_analysis/sql.py
index 8157f829dc..89bfbf4ffd 100644
--- a/packages/calitp-data-analysis/calitp_data_analysis/sql.py
+++ b/packages/calitp-data-analysis/calitp_data_analysis/sql.py
@@ -27,7 +27,7 @@ def get_engine(max_bytes=None, project="cal-itp-data-infra"):
     # Note that we should be able to add location as a uri parameter, but
     # it is not being picked up, so passing as a separate argument for now.
     return create_engine(
-        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",
+        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",  # noqa: E231
         location=CALITP_BQ_LOCATION,
         credentials_path=cred_path,
     )
diff --git a/packages/calitp-data-analysis/calitp_data_analysis/tables.py b/packages/calitp-data-analysis/calitp_data_analysis/tables.py
index 4833c274ba..9f7ce5223a 100644
--- a/packages/calitp-data-analysis/calitp_data_analysis/tables.py
+++ b/packages/calitp-data-analysis/calitp_data_analysis/tables.py
@@ -109,7 +109,7 @@ def _repr_html_(self):
                 </tr>
                 {table_body_html}
             </table>
-            """
+            """  # noqa: E221,E222
 
 
 tbls = AutoTable(
diff --git a/packages/calitp-data-analysis/tests/test_tables.py b/packages/calitp-data-analysis/tests/test_tables.py
index 0ede69d8f8..292724903a 100644
--- a/packages/calitp-data-analysis/tests/test_tables.py
+++ b/packages/calitp-data-analysis/tests/test_tables.py
@@ -33,7 +33,7 @@ def test_auto_table_comments(engine, tmp_name):
         OPTIONS(
             description="the table comment"
         )
-    """
+    """  # noqa: E231,E241,E202
     )
 
     # TODO: rather than using AutoTable, let's just use CalitpTable directly
diff --git a/packages/calitp-data-infra/calitp_data_infra/storage.py b/packages/calitp-data-infra/calitp_data_infra/storage.py
index 4f1b302680..d06f4277ce 100644
--- a/packages/calitp-data-infra/calitp_data_infra/storage.py
+++ b/packages/calitp-data-infra/calitp_data_infra/storage.py
@@ -223,7 +223,7 @@ def check_partitions(cls, values):
         ]
         if missing:
             raise ValueError(
-                f"all partition names must exist as fields or properties; missing {missing}"
+                f"all partition names must exist as fields or properties; missing {missing}"  # noqa: E702
             )
         return values
 
@@ -315,7 +315,7 @@ def fetch_all_in_partition(
 
         if not isinstance(bucket, str):
             raise TypeError(
-                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"
+                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"  # noqa: E702
             )
 
     if not table:
@@ -323,7 +323,7 @@ def fetch_all_in_partition(
 
         if not isinstance(table, str):
             raise TypeError(
-                f"must either pass table, or the table must resolve to a string; got {type(table)}"
+                f"must either pass table, or the table must resolve to a string; got {type(table)}"  # noqa: E702
             )
 
     prefix = "/".join(
@@ -396,7 +396,7 @@ def filename(self) -> str:
 
     @property
     def path(self) -> str:
-        return f"gs://{self.bucket}/{self.name}"
+        return f"gs://{self.bucket}/{self.name}"  # noqa: E231
 
     @property
     def partition(self) -> Dict[str, str]:
@@ -486,7 +486,7 @@ def get_latest(
 
         if not isinstance(bucket, str):
             raise TypeError(
-                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"
+                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"  # noqa: E702
             )
 
     if not table:
@@ -494,7 +494,7 @@ def get_latest(
 
         if not isinstance(table, str):
             raise TypeError(
-                f"must either pass table, or the table must resolve to a string; got {type(table)}"
+                f"must either pass table, or the table must resolve to a string; got {type(table)}"  # noqa: E702
             )
 
     if not partition_names:
@@ -502,7 +502,7 @@ def get_latest(
 
         if not isinstance(partition_names, list):
             raise TypeError(
-                f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}"
+                f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}"  # noqa: E702
             )
 
     latest = get_latest_file(
@@ -520,7 +520,8 @@ def get_latest(
     return cls(
         **json.loads(
             get_fs().getxattr(
-                path=f"gs://{latest.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY
+                path=f"gs://{latest.name}",  # noqa: E231
+                attr=PARTITIONED_ARTIFACT_METADATA_KEY,
             )
         )
     )
@@ -755,7 +756,7 @@ def download_feed(
     if disposition_header:
         if disposition_header.startswith("filename="):
             # sorry; cgi won't parse unless it's prefixed with the disposition type
-            disposition_header = f"attachment; {disposition_header}"
+            disposition_header = f"attachment; {disposition_header}"  # noqa: E702
         _, params = cgi.parse_header(disposition_header)
         disposition_filename = params.get("filename")
     else:
diff --git a/warehouse/scripts/dbt_artifacts/__init__.py b/warehouse/scripts/dbt_artifacts/__init__.py
index becf77f597..c8cacc4105 100644
--- a/warehouse/scripts/dbt_artifacts/__init__.py
+++ b/warehouse/scripts/dbt_artifacts/__init__.py
@@ -40,7 +40,7 @@ def get_engine(project, max_bytes=None):
     # Note that we should be able to add location as a uri parameter, but
     # it is not being picked up, so passing as a separate argument for now.
     return create_engine(
-        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",
+        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",  # noqa: E231
         location="us-west2",
         credentials_path=os.environ.get("BIGQUERY_KEYFILE_LOCATION"),
     )
diff --git a/warehouse/scripts/json_to_docblocks.py b/warehouse/scripts/json_to_docblocks.py
index 5de79ce114..b18ae14ef5 100755
--- a/warehouse/scripts/json_to_docblocks.py
+++ b/warehouse/scripts/json_to_docblocks.py
@@ -27,7 +27,7 @@
             prefix = f"gtfs_{model}__"
             df.write(col.docblock(prefix=prefix))
 
-            doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}'
+            doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}'  # noqa: E201,E202
 
             extras: Dict[str, Any] = {
                 "description": f"'{doc_ref}'",
diff --git a/warehouse/scripts/publish.py b/warehouse/scripts/publish.py
index 6cff2f3636..08345b507c 100755
--- a/warehouse/scripts/publish.py
+++ b/warehouse/scripts/publish.py
@@ -546,7 +546,7 @@ def _publish_exposure(
                         "-o",
                         mbtiles_path,
                         *[
-                            f"--named-layer={layer}:{path}"
+                            f"--named-layer={layer}:{path}"  # noqa: E231
                             for layer, path in layer_geojson_paths.items()
                         ],
                     ]
diff --git a/warehouse/scripts/visualize.py b/warehouse/scripts/visualize.py
index 98de3f4a30..5c415de583 100755
--- a/warehouse/scripts/visualize.py
+++ b/warehouse/scripts/visualize.py
@@ -240,7 +240,7 @@ def viz(
         print(f"Writing DAG to {output}")
     A.draw(output, args=f"-Gratio={ratio}", prog="dot")
     if display:
-        url = f"file://{output.resolve()}"
+        url = f"file://{output.resolve()}"  # noqa: E231
         webbrowser.open(url, new=2)  # open in new tab