Address a bunch of linter failures (#3054)

* address a bunch of linter failures * more linter failures and fix some from before * more linter failures * one last one * Force python version for lint CI run --------- Co-authored-by: Soren Spicknall <[email protected]>
cal-itp · Oct 26, 2023 · f0c31c2 · f0c31c2
1 parent eddae00
commit f0c31c2
Show file tree

Hide file tree

Showing 21 changed files with 34 additions and 30 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -12,6 +12,8 @@ jobs:
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
+        with:
+          python-version: 3.11.6
       - uses: pre-commit/[email protected]
       - uses: crate-ci/typos@master
         with:

diff --git a/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py b/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py
@@ -104,7 +104,7 @@ def convert_gtfs_datasets_to_download_configs(task_instance, execution_date, **k
 
     valid, invalid, skipped = gtfs_datasets_to_extract_configs(extract)
 
-    msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}"
+    msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}"  # noqa: E225
     print(msg)
 
     print("Invalid records:")

diff --git a/airflow/dags/dags.py b/airflow/dags/dags.py
@@ -39,7 +39,7 @@ def log_failure_to_slack(context):
     Try {ti.try_number} of {ti.max_tries}
 
     <{ti.log_url}| Check Log >
-    """
+    """  # noqa: E221, E222
         requests.post(slack_url, json={"text": message})
 
     # This is very broad but we want to try to log _any_ exception to slack

diff --git a/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py b/airflow/dags/download_gtfs_schedule_v2/download_schedule_feeds.py
@@ -172,7 +172,7 @@ def download_all(task_instance, execution_date, **kwargs):
     success_rate = len(result.successes) / len(configs)
     if success_rate < GTFS_FEED_LIST_ERROR_THRESHOLD:
         raise RuntimeError(
-            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}"
+            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}"  # noqa: E231
         )
 
 

diff --git a/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py b/airflow/dags/download_gtfs_schedule_v2/email_download_failures.py
@@ -30,7 +30,7 @@ def email_failures(task_instance: TaskInstance, execution_date, **kwargs):
     The following agency GTFS feeds could not be extracted on {execution_date}:
 
     {html_report}
-    """
+    """  # noqa: E231,E241
 
     if os.environ["AIRFLOW_ENV"] == "development":
         print(

diff --git a/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py b/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/unzip_gtfs_schedule.py
@@ -258,7 +258,7 @@ def unzip_extracts(
             for failure in result.failures
         ]
         exc_str = "\n".join(str(tup) for tup in exceptions)
-        msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"
+        msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"  # noqa: E231
         if exceptions:
             typer.secho(msg, err=True, fg=typer.colors.RED)
         if success_rate < GTFS_UNZIP_LIST_ERROR_THRESHOLD:

diff --git a/airflow/plugins/operators/gtfs_csv_to_jsonl.py b/airflow/plugins/operators/gtfs_csv_to_jsonl.py
@@ -165,7 +165,7 @@ def parse_files(day: pendulum.datetime, input_table_name: str, gtfs_filename: st
     success_rate = len(result.successes) / len(files)
     if success_rate < GTFS_PARSE_ERROR_THRESHOLD:
         raise RuntimeError(
-            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"
+            f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"  # noqa: E231
         )
 
 

diff --git a/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py b/airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py
@@ -194,7 +194,7 @@ def parse_files(period: pendulum.Period, input_table_name: str, gtfs_filename: s
         success_rate = len(result.successes) / len(files)
         if success_rate < GTFS_PARSE_ERROR_THRESHOLD:
             raise RuntimeError(
-                f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"
+                f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"  # noqa: E231
             )
 
 

diff --git a/airflow/plugins/operators/littlepay_raw_sync.py b/airflow/plugins/operators/littlepay_raw_sync.py
@@ -117,7 +117,8 @@ def sync_file(
         )
         try:
             metadata_str = fs.getxattr(
-                path=f"gs://{fileinfo.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY
+                path=f"gs://{fileinfo.name}",  # noqa: E231
+                attr=PARTITIONED_ARTIFACT_METADATA_KEY,
             )
         except KeyError:
             print(f"metadata missing on {fileinfo.name}")
@@ -207,7 +208,7 @@ def execute(self, context):
             raise RuntimeError("failed to page fully through bucket")
 
         print(
-            f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}."
+            f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}."  # noqa: E702
         )
 
         fs = get_fs()

diff --git a/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py b/jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py
@@ -183,7 +183,7 @@ def main(aggregator: GTFSFeedAggregator, dry_run: bool = False, progress: bool =
     )
     if dry_run:
         typer.secho(
-            f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}"
+            f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}"  # noqa: E702
         )
     else:
         typer.secho(

diff --git a/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py b/jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py
@@ -769,7 +769,7 @@ def main(
         typer.secho(f"missing: {files_missing_metadata}")
         typer.secho(f"invalid: {files_invalid_metadata}")
         raise RuntimeError(
-            f"too many files have missing/invalid metadata; {total - len(files)} of {total}"
+            f"too many files have missing/invalid metadata; {total - len(files)} of {total}"  # noqa: E702
         )
 
     if not files:
@@ -893,7 +893,7 @@ def main(
 
     if exceptions:
         exc_str = "\n".join(str(tup) for tup in exceptions)
-        msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}"
+        msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}"  # noqa: E231
         typer.secho(msg, err=True, fg=typer.colors.RED)
         raise RuntimeError(msg)
 

diff --git a/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py b/jobs/gtfs-schedule-validator/gtfs_schedule_validator_hourly.py
@@ -435,7 +435,7 @@ def validate_hour(
         success_rate = len(result.successes) / len(extracts)
         if success_rate < GTFS_VALIDATE_LIST_ERROR_THRESHOLD:
             exc_str = "\n".join(str(tup) for tup in exceptions)
-            msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"
+            msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"  # noqa: E231
             if exceptions:
                 typer.secho(msg, err=True, fg=typer.colors.RED)
             raise RuntimeError(msg)

diff --git a/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py b/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py
@@ -168,7 +168,7 @@ def cut_segments(
         .drop(columns="temp_index")
     )
 
-    segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}")
+    segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}")  # noqa: E231
 
     return segmented2
 

diff --git a/packages/calitp-data-analysis/calitp_data_analysis/sql.py b/packages/calitp-data-analysis/calitp_data_analysis/sql.py
@@ -27,7 +27,7 @@ def get_engine(max_bytes=None, project="cal-itp-data-infra"):
     # Note that we should be able to add location as a uri parameter, but
     # it is not being picked up, so passing as a separate argument for now.
     return create_engine(
-        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",
+        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",  # noqa: E231
         location=CALITP_BQ_LOCATION,
         credentials_path=cred_path,
     )

diff --git a/packages/calitp-data-analysis/calitp_data_analysis/tables.py b/packages/calitp-data-analysis/calitp_data_analysis/tables.py
@@ -109,7 +109,7 @@ def _repr_html_(self):
                 </tr>
                 {table_body_html}
             </table>
-            """
+            """  # noqa: E221,E222
 
 
 tbls = AutoTable(

diff --git a/packages/calitp-data-analysis/tests/test_tables.py b/packages/calitp-data-analysis/tests/test_tables.py
@@ -33,7 +33,7 @@ def test_auto_table_comments(engine, tmp_name):
         OPTIONS(
             description="the table comment"
         )
-    """
+    """  # noqa: E231,E241,E202
     )
 
     # TODO: rather than using AutoTable, let's just use CalitpTable directly

diff --git a/packages/calitp-data-infra/calitp_data_infra/storage.py b/packages/calitp-data-infra/calitp_data_infra/storage.py
@@ -223,7 +223,7 @@ def check_partitions(cls, values):
         ]
         if missing:
             raise ValueError(
-                f"all partition names must exist as fields or properties; missing {missing}"
+                f"all partition names must exist as fields or properties; missing {missing}"  # noqa: E702
             )
         return values
 
@@ -315,15 +315,15 @@ def fetch_all_in_partition(
 
         if not isinstance(bucket, str):
             raise TypeError(
-                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"
+                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"  # noqa: E702
             )
 
     if not table:
         table = cls.table  # type: ignore[assignment]
 
         if not isinstance(table, str):
             raise TypeError(
-                f"must either pass table, or the table must resolve to a string; got {type(table)}"
+                f"must either pass table, or the table must resolve to a string; got {type(table)}"  # noqa: E702
             )
 
     prefix = "/".join(
@@ -396,7 +396,7 @@ def filename(self) -> str:
 
     @property
     def path(self) -> str:
-        return f"gs://{self.bucket}/{self.name}"
+        return f"gs://{self.bucket}/{self.name}"  # noqa: E231
 
     @property
     def partition(self) -> Dict[str, str]:
@@ -486,23 +486,23 @@ def get_latest(
 
         if not isinstance(bucket, str):
             raise TypeError(
-                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"
+                f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"  # noqa: E702
             )
 
     if not table:
         table = cls.table  # type: ignore[assignment]
 
         if not isinstance(table, str):
             raise TypeError(
-                f"must either pass table, or the table must resolve to a string; got {type(table)}"
+                f"must either pass table, or the table must resolve to a string; got {type(table)}"  # noqa: E702
             )
 
     if not partition_names:
         partition_names = cls.partition_names  # type: ignore[assignment]
 
         if not isinstance(partition_names, list):
             raise TypeError(
-                f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}"
+                f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}"  # noqa: E702
             )
 
     latest = get_latest_file(
@@ -520,7 +520,8 @@ def get_latest(
     return cls(
         **json.loads(
             get_fs().getxattr(
-                path=f"gs://{latest.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY
+                path=f"gs://{latest.name}",  # noqa: E231
+                attr=PARTITIONED_ARTIFACT_METADATA_KEY,
             )
         )
     )
@@ -755,7 +756,7 @@ def download_feed(
     if disposition_header:
         if disposition_header.startswith("filename="):
             # sorry; cgi won't parse unless it's prefixed with the disposition type
-            disposition_header = f"attachment; {disposition_header}"
+            disposition_header = f"attachment; {disposition_header}"  # noqa: E702
         _, params = cgi.parse_header(disposition_header)
         disposition_filename = params.get("filename")
     else:

diff --git a/warehouse/scripts/dbt_artifacts/__init__.py b/warehouse/scripts/dbt_artifacts/__init__.py
@@ -40,7 +40,7 @@ def get_engine(project, max_bytes=None):
     # Note that we should be able to add location as a uri parameter, but
     # it is not being picked up, so passing as a separate argument for now.
     return create_engine(
-        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",
+        f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",  # noqa: E231
         location="us-west2",
         credentials_path=os.environ.get("BIGQUERY_KEYFILE_LOCATION"),
     )

diff --git a/warehouse/scripts/json_to_docblocks.py b/warehouse/scripts/json_to_docblocks.py
@@ -27,7 +27,7 @@
             prefix = f"gtfs_{model}__"
             df.write(col.docblock(prefix=prefix))
 
-            doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}'
+            doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}'  # noqa: E201,E202
 
             extras: Dict[str, Any] = {
                 "description": f"'{doc_ref}'",

diff --git a/warehouse/scripts/publish.py b/warehouse/scripts/publish.py
@@ -546,7 +546,7 @@ def _publish_exposure(
                         "-o",
                         mbtiles_path,
                         *[
-                            f"--named-layer={layer}:{path}"
+                            f"--named-layer={layer}:{path}"  # noqa: E231
                             for layer, path in layer_geojson_paths.items()
                         ],
                     ]

diff --git a/warehouse/scripts/visualize.py b/warehouse/scripts/visualize.py
@@ -240,7 +240,7 @@ def viz(
         print(f"Writing DAG to {output}")
     A.draw(output, args=f"-Gratio={ratio}", prog="dot")
     if display:
-        url = f"file://{output.resolve()}"
+        url = f"file://{output.resolve()}"  # noqa: E231
         webbrowser.open(url, new=2)  # open in new tab