Skip to content

Commit

Permalink
Address a bunch of linter failures (#3054)
Browse files Browse the repository at this point in the history
* address a bunch of linter failures

* more linter failures and fix some from before

* more linter failures

* one last one

* Force python version for lint CI run

---------

Co-authored-by: Soren Spicknall <[email protected]>
  • Loading branch information
lauriemerrell and SorenSpicknall authored Oct 26, 2023
1 parent eddae00 commit f0c31c2
Show file tree
Hide file tree
Showing 21 changed files with 34 additions and 30 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.11.6
- uses: pre-commit/[email protected]
- uses: crate-ci/typos@master
with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def convert_gtfs_datasets_to_download_configs(task_instance, execution_date, **k

valid, invalid, skipped = gtfs_datasets_to_extract_configs(extract)

msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}"
msg = f"{len(extract.records)=} {len(valid)=} {len(skipped)=} {len(invalid)=}" # noqa: E225
print(msg)

print("Invalid records:")
Expand Down
2 changes: 1 addition & 1 deletion airflow/dags/dags.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def log_failure_to_slack(context):
Try {ti.try_number} of {ti.max_tries}
<{ti.log_url}| Check Log >
"""
""" # noqa: E221, E222
requests.post(slack_url, json={"text": message})

# This is very broad but we want to try to log _any_ exception to slack
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def download_all(task_instance, execution_date, **kwargs):
success_rate = len(result.successes) / len(configs)
if success_rate < GTFS_FEED_LIST_ERROR_THRESHOLD:
raise RuntimeError(
f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}"
f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_FEED_LIST_ERROR_THRESHOLD}" # noqa: E231
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def email_failures(task_instance: TaskInstance, execution_date, **kwargs):
The following agency GTFS feeds could not be extracted on {execution_date}:
{html_report}
"""
""" # noqa: E231,E241

if os.environ["AIRFLOW_ENV"] == "development":
print(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def unzip_extracts(
for failure in result.failures
]
exc_str = "\n".join(str(tup) for tup in exceptions)
msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"
msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}" # noqa: E231
if exceptions:
typer.secho(msg, err=True, fg=typer.colors.RED)
if success_rate < GTFS_UNZIP_LIST_ERROR_THRESHOLD:
Expand Down
2 changes: 1 addition & 1 deletion airflow/plugins/operators/gtfs_csv_to_jsonl.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def parse_files(day: pendulum.datetime, input_table_name: str, gtfs_filename: st
success_rate = len(result.successes) / len(files)
if success_rate < GTFS_PARSE_ERROR_THRESHOLD:
raise RuntimeError(
f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"
f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}" # noqa: E231
)


Expand Down
2 changes: 1 addition & 1 deletion airflow/plugins/operators/gtfs_csv_to_jsonl_hourly.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def parse_files(period: pendulum.Period, input_table_name: str, gtfs_filename: s
success_rate = len(result.successes) / len(files)
if success_rate < GTFS_PARSE_ERROR_THRESHOLD:
raise RuntimeError(
f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}"
f"Success rate: {success_rate:.3f} was below error threshold: {GTFS_PARSE_ERROR_THRESHOLD}" # noqa: E231
)


Expand Down
5 changes: 3 additions & 2 deletions airflow/plugins/operators/littlepay_raw_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def sync_file(
)
try:
metadata_str = fs.getxattr(
path=f"gs://{fileinfo.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY
path=f"gs://{fileinfo.name}", # noqa: E231
attr=PARTITIONED_ARTIFACT_METADATA_KEY,
)
except KeyError:
print(f"metadata missing on {fileinfo.name}")
Expand Down Expand Up @@ -207,7 +208,7 @@ def execute(self, context):
raise RuntimeError("failed to page fully through bucket")

print(
f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}."
f"Found {len(files)} source files in {self.src_bucket}; diffing and copying to {RawLittlepayFileExtract.bucket}." # noqa: E702
)

fs = get_fs()
Expand Down
2 changes: 1 addition & 1 deletion jobs/gtfs-aggregator-scraper/gtfs_aggregator_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def main(aggregator: GTFSFeedAggregator, dry_run: bool = False, progress: bool =
)
if dry_run:
typer.secho(
f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}"
f"dry run; skipping upload of {humanize.naturalsize(len(scrape_result.content))}" # noqa: E702
)
else:
typer.secho(
Expand Down
4 changes: 2 additions & 2 deletions jobs/gtfs-rt-parser-v2/gtfs_rt_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def main(
typer.secho(f"missing: {files_missing_metadata}")
typer.secho(f"invalid: {files_invalid_metadata}")
raise RuntimeError(
f"too many files have missing/invalid metadata; {total - len(files)} of {total}"
f"too many files have missing/invalid metadata; {total - len(files)} of {total}" # noqa: E702
)

if not files:
Expand Down Expand Up @@ -893,7 +893,7 @@ def main(

if exceptions:
exc_str = "\n".join(str(tup) for tup in exceptions)
msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}"
msg = f"got {len(exceptions)} exceptions from processing {len(aggregations_to_process)} feeds:\n{exc_str}" # noqa: E231
typer.secho(msg, err=True, fg=typer.colors.RED)
raise RuntimeError(msg)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def validate_hour(
success_rate = len(result.successes) / len(extracts)
if success_rate < GTFS_VALIDATE_LIST_ERROR_THRESHOLD:
exc_str = "\n".join(str(tup) for tup in exceptions)
msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}"
msg = f"got {len(exceptions)} exceptions from validating {len(extracts)} extracts:\n{exc_str}" # noqa: E231
if exceptions:
typer.secho(msg, err=True, fg=typer.colors.RED)
raise RuntimeError(msg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def cut_segments(
.drop(columns="temp_index")
)

segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}")
segmented2 = gpd.GeoDataFrame(segmented, crs=f"EPSG:{EPSG_CODE}") # noqa: E231

return segmented2

Expand Down
2 changes: 1 addition & 1 deletion packages/calitp-data-analysis/calitp_data_analysis/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_engine(max_bytes=None, project="cal-itp-data-infra"):
# Note that we should be able to add location as a uri parameter, but
# it is not being picked up, so passing as a separate argument for now.
return create_engine(
f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",
f"bigquery://{project}/?maximum_bytes_billed={max_bytes}", # noqa: E231
location=CALITP_BQ_LOCATION,
credentials_path=cred_path,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _repr_html_(self):
</tr>
{table_body_html}
</table>
"""
""" # noqa: E221,E222


tbls = AutoTable(
Expand Down
2 changes: 1 addition & 1 deletion packages/calitp-data-analysis/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_auto_table_comments(engine, tmp_name):
OPTIONS(
description="the table comment"
)
"""
""" # noqa: E231,E241,E202
)

# TODO: rather than using AutoTable, let's just use CalitpTable directly
Expand Down
19 changes: 10 additions & 9 deletions packages/calitp-data-infra/calitp_data_infra/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def check_partitions(cls, values):
]
if missing:
raise ValueError(
f"all partition names must exist as fields or properties; missing {missing}"
f"all partition names must exist as fields or properties; missing {missing}" # noqa: E702
)
return values

Expand Down Expand Up @@ -315,15 +315,15 @@ def fetch_all_in_partition(

if not isinstance(bucket, str):
raise TypeError(
f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"
f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}" # noqa: E702
)

if not table:
table = cls.table # type: ignore[assignment]

if not isinstance(table, str):
raise TypeError(
f"must either pass table, or the table must resolve to a string; got {type(table)}"
f"must either pass table, or the table must resolve to a string; got {type(table)}" # noqa: E702
)

prefix = "/".join(
Expand Down Expand Up @@ -396,7 +396,7 @@ def filename(self) -> str:

@property
def path(self) -> str:
return f"gs://{self.bucket}/{self.name}"
return f"gs://{self.bucket}/{self.name}" # noqa: E231

@property
def partition(self) -> Dict[str, str]:
Expand Down Expand Up @@ -486,23 +486,23 @@ def get_latest(

if not isinstance(bucket, str):
raise TypeError(
f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}"
f"must either pass bucket, or the bucket must resolve to a string; got {type(bucket)}" # noqa: E702
)

if not table:
table = cls.table # type: ignore[assignment]

if not isinstance(table, str):
raise TypeError(
f"must either pass table, or the table must resolve to a string; got {type(table)}"
f"must either pass table, or the table must resolve to a string; got {type(table)}" # noqa: E702
)

if not partition_names:
partition_names = cls.partition_names # type: ignore[assignment]

if not isinstance(partition_names, list):
raise TypeError(
f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}"
f"must either pass partition names, or the partition names must resolve to a list; got {type(partition_names)}" # noqa: E702
)

latest = get_latest_file(
Expand All @@ -520,7 +520,8 @@ def get_latest(
return cls(
**json.loads(
get_fs().getxattr(
path=f"gs://{latest.name}", attr=PARTITIONED_ARTIFACT_METADATA_KEY
path=f"gs://{latest.name}", # noqa: E231
attr=PARTITIONED_ARTIFACT_METADATA_KEY,
)
)
)
Expand Down Expand Up @@ -755,7 +756,7 @@ def download_feed(
if disposition_header:
if disposition_header.startswith("filename="):
# sorry; cgi won't parse unless it's prefixed with the disposition type
disposition_header = f"attachment; {disposition_header}"
disposition_header = f"attachment; {disposition_header}" # noqa: E702
_, params = cgi.parse_header(disposition_header)
disposition_filename = params.get("filename")
else:
Expand Down
2 changes: 1 addition & 1 deletion warehouse/scripts/dbt_artifacts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_engine(project, max_bytes=None):
# Note that we should be able to add location as a uri parameter, but
# it is not being picked up, so passing as a separate argument for now.
return create_engine(
f"bigquery://{project}/?maximum_bytes_billed={max_bytes}",
f"bigquery://{project}/?maximum_bytes_billed={max_bytes}", # noqa: E231
location="us-west2",
credentials_path=os.environ.get("BIGQUERY_KEYFILE_LOCATION"),
)
Expand Down
2 changes: 1 addition & 1 deletion warehouse/scripts/json_to_docblocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
prefix = f"gtfs_{model}__"
df.write(col.docblock(prefix=prefix))

doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}'
doc_ref = f'{{{{ doc("{prefix}{col.name}") }}}}' # noqa: E201,E202

extras: Dict[str, Any] = {
"description": f"'{doc_ref}'",
Expand Down
2 changes: 1 addition & 1 deletion warehouse/scripts/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def _publish_exposure(
"-o",
mbtiles_path,
*[
f"--named-layer={layer}:{path}"
f"--named-layer={layer}:{path}" # noqa: E231
for layer, path in layer_geojson_paths.items()
],
]
Expand Down
2 changes: 1 addition & 1 deletion warehouse/scripts/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def viz(
print(f"Writing DAG to {output}")
A.draw(output, args=f"-Gratio={ratio}", prog="dot")
if display:
url = f"file://{output.resolve()}"
url = f"file://{output.resolve()}" # noqa: E231
webbrowser.open(url, new=2) # open in new tab


Expand Down

0 comments on commit f0c31c2

Please sign in to comment.