Skip to content

Commit

Permalink
Merge pull request #3019 from fedspendingtransparency/fix/delta-file-…
Browse files Browse the repository at this point in the history
…pandas

[FIX] issue with pandas API
  • Loading branch information
gpontejos authored Mar 9, 2021
2 parents cece4bf + 2f78251 commit 9c13f94
Showing 1 changed file with 7 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -226,15 +226,12 @@ def split_transaction_id(tid):
return pd.Series(tid.split("_") + [tid])

def add_deletion_records(self, source_path, working_dir, award_type, agency_code, source, generate_since):
""" Retrieve deletion files from S3 and append necessary records to the end of the file """
"""Retrieve deletion files from S3 and append necessary records to the end of the file"""
logger.info("Retrieving deletion records from S3 files and appending to the CSV")

# Retrieve all SubtierAgency IDs within this TopTierAgency
subtier_agencies = list(
SubtierAgency.objects.filter(agency__toptier_agency__toptier_code=agency_code).values_list(
"subtier_code", flat=True
)
)
filter = {"agency__toptier_agency__toptier_code": agency_code}
subtier_agencies = list(SubtierAgency.objects.filter(**filter).values_list("subtier_code", flat=True))

# Create a list of keys in the bucket that match the date range we want
bucket = boto3.resource("s3", region_name=settings.USASPENDING_AWS_REGION).Bucket(
Expand All @@ -246,7 +243,7 @@ def add_deletion_records(self, source_path, working_dir, award_type, agency_code
match_date = self.check_regex_match(award_type, key.key, generate_since)
if match_date:
# Create a local copy of the deletion file
delete_filepath = "{}{}".format(working_dir, key.key)
delete_filepath = f"{working_dir}{key.key}"
bucket.download_file(key.key, delete_filepath)
df = pd.read_csv(delete_filepath)
os.remove(delete_filepath)
Expand All @@ -257,6 +254,7 @@ def add_deletion_records(self, source_path, working_dir, award_type, agency_code
.apply(self.split_transaction_id)
.replace("-none-", "")
.replace("-NONE-", "")
.reset_index() # adding to handle API bug which caused a Series to be returned
.rename(columns=AWARD_MAPPINGS[award_type]["column_headers"])
)

Expand All @@ -270,13 +268,13 @@ def add_deletion_records(self, source_path, working_dir, award_type, agency_code

# Reorder columns to make it CSV-ready, and append
df = self.organize_deletion_columns(source, df, award_type, match_date)
logger.info("Found {} deletion records to include".format(len(df.index)))
logger.info(f"Found {len(df.index):,} deletion records to include")
all_deletions = all_deletions.append(df, ignore_index=True)

# Only append to file if there are any records
if len(all_deletions.index) == 0:
logger.info("No deletion records to append to file")
else:
logger.info(f"Appending {len(all_deletions.index):,} records to file")
self.add_deletions_to_file(all_deletions, award_type, source_path)

def organize_deletion_columns(self, source, dataframe, award_type, match_date):
Expand Down

0 comments on commit 9c13f94

Please sign in to comment.