Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invoices no longer assign to self.data during filtering step #119

Merged
merged 5 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions process_report/invoices/NERC_total_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@

@dataclass
class NERCTotalInvoice(invoice.Invoice):
"""
This invoice operates on data processed by these Processors:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

INCLUDED_INSTITUTIONS = [
"Harvard University",
"Boston University",
Expand Down Expand Up @@ -45,6 +51,9 @@ def output_s3_archive_key(self):
return f"Invoices/{self.invoice_month}/Archive/NERC-{self.invoice_month}-Total-Invoice {util.get_iso8601_time()}.csv"

def _prepare_export(self):
self.data = self.data[
self.data[invoice.INSTITUTION_FIELD].isin(self.INCLUDED_INSTITUTIONS)
self.export_data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.export_data = self.export_data[
self.export_data[invoice.INSTITUTION_FIELD].isin(self.INCLUDED_INSTITUTIONS)
].copy()
181 changes: 13 additions & 168 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
from dataclasses import dataclass
import logging
import sys

import pandas
import pyarrow

from process_report.invoices import invoice, discount_invoice
from process_report import util

from process_report.invoices import invoice

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class BillableInvoice(discount_invoice.DiscountInvoice):
NEW_PI_CREDIT_CODE = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
class BillableInvoice(invoice.Invoice):
"""
This invoice operates on data processed by these Processors:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

PI_S3_FILEPATH = "PIs/PI.csv"

old_pi_filepath: str
updated_old_pi_df: pandas.DataFrame

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand All @@ -38,61 +41,10 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
invoice.BALANCE_FIELD,
]

old_pi_filepath: str
limit_new_pi_credit_to_partners: bool = False

@staticmethod
def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
try:
old_pi_df = pandas.read_csv(
old_pi_filepath,
dtype={
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
pyarrow.decimal128(21, 2)
),
invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
except FileNotFoundError:
sys.exit("Applying credit 0002 failed. Old PI file does not exist")

return old_pi_df

@staticmethod
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
I.e 0 for new PIs
Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
first_invoice_month = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
]
if first_invoice_month.empty:
return 0

month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
if month_diff < 0:
sys.exit(
f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
)
else:
return month_diff

def _prepare(self):
self.data = self.data[
def _prepare_export(self):
self.export_data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
self.old_pi_df = self._load_old_pis(self.old_pi_filepath)

def _process(self):
self.data, self.updated_old_pi_df = self._apply_credits_new_pi(
self.data, self.old_pi_df
)

def _prepare_export(self):
self.updated_old_pi_df = self.updated_old_pi_df.astype(
{
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
Expand All @@ -110,110 +62,3 @@ def export(self):
def export_s3(self, s3_bucket):
super().export_s3(s3_bucket)
s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH)

def _filter_partners(self, data):
active_partnerships = list()
institute_list = util.load_institute_list()
for institute_info in institute_list:
if partnership_start_date := institute_info.get(
"mghpcc_partnership_start_date"
):
if util.get_month_diff(self.invoice_month, partnership_start_date) >= 0:
active_partnerships.append(institute_info["display_name"])

return data[data[invoice.INSTITUTION_FIELD].isin(active_partnerships)]

def _filter_excluded_su_types(self, data):
return data[~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))]

def _get_credit_eligible_projects(self, data: pandas.DataFrame):
filtered_data = self._filter_excluded_su_types(data)
if self.limit_new_pi_credit_to_partners:
filtered_data = self._filter_partners(filtered_data)

return filtered_data

def _apply_credits_new_pi(
self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
):
def get_initial_credit_amount(
old_pi_df, invoice_month, default_initial_credit_amount
):
first_month_processed_pis = old_pi_df[
old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month
]
if first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].empty or pandas.isna(
new_pi_credit_amount := first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].iat[0]
):
new_pi_credit_amount = default_initial_credit_amount

return new_pi_credit_amount

new_pi_credit_amount = get_initial_credit_amount(
old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT
)
print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")

credit_eligible_projects = self._get_credit_eligible_projects(data)
current_pi_set = set(credit_eligible_projects[invoice.PI_FIELD])
for pi in current_pi_set:
pi_projects = credit_eligible_projects[
credit_eligible_projects[invoice.PI_FIELD] == pi
]
pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
].squeeze()

if pi_age > 1:
for i, row in pi_projects.iterrows():
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
else:
if pi_age == 0:
if len(pi_old_pi_entry) == 0:
pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0]
old_pi_df = pandas.concat(
[
pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
old_pi_df,
],
ignore_index=True,
)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
].squeeze()

remaining_credit = new_pi_credit_amount
credit_used_field = invoice.PI_1ST_USED
elif pi_age == 1:
remaining_credit = (
pi_old_pi_entry[invoice.PI_INITIAL_CREDITS]
- pi_old_pi_entry[invoice.PI_1ST_USED]
)
credit_used_field = invoice.PI_2ND_USED

credits_used = self.apply_flat_discount(
data,
pi_projects,
remaining_credit,
invoice.CREDIT_FIELD,
invoice.BALANCE_FIELD,
invoice.CREDIT_CODE_FIELD,
self.NEW_PI_CREDIT_CODE,
)

if (pi_old_pi_entry[credit_used_field] != 0) and (
credits_used != pi_old_pi_entry[credit_used_field]
):
print(
f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
)
old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field
] = credits_used

return (data, old_pi_df)
61 changes: 23 additions & 38 deletions process_report/invoices/bu_internal_invoice.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
from dataclasses import dataclass
from decimal import Decimal

import process_report.invoices.invoice as invoice
import process_report.invoices.discount_invoice as discount_invoice


@dataclass
class BUInternalInvoice(discount_invoice.DiscountInvoice):
class BUInternalInvoice(invoice.Invoice):
"""
This invoice operates on data processed by these Processors:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PI_FIELD,
"Project",
invoice.COST_FIELD,
invoice.CREDIT_FIELD,
invoice.SUBSIDY_FIELD,
invoice.BALANCE_FIELD,
invoice.PI_BALANCE_FIELD,
]

subsidy_amount: int

def _prepare(self):
def get_project(row):
project_alloc = row[invoice.PROJECT_FIELD]
if project_alloc.rfind("-") == -1:
return project_alloc
else:
return project_alloc[: project_alloc.rfind("-")]

self.data = self.data[
self.data[invoice.INSTITUTION_FIELD] == "Boston University"
].copy()
self.data["Project"] = self.data.apply(get_project, axis=1)
self.data[invoice.SUBSIDY_FIELD] = Decimal(0)
exported_columns_map = {invoice.PI_BALANCE_FIELD: "Balance"}

def _process(self):
data_summed_projects = self._sum_project_allocations(self.data)
self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount)
def _prepare_export(self):
self.export_data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.export_data = self.export_data[
self.export_data[invoice.INSTITUTION_FIELD] == "Boston University"
]
self.export_data = self._sum_project_allocations(self.export_data)

def _sum_project_allocations(self, dataframe):
"""A project may have multiple allocations, and therefore multiple rows
in the raw invoices. For BU-Internal invoice, we only want 1 row for
each unique project, summing up its allocations' costs"""
project_list = dataframe["Project"].unique()
data_no_dup = dataframe.drop_duplicates("Project", inplace=False)
sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
sum_fields = [
invoice.COST_FIELD,
invoice.CREDIT_FIELD,
invoice.SUBSIDY_FIELD,
invoice.PI_BALANCE_FIELD,
]
for project in project_list:
project_mask = dataframe["Project"] == project
no_dup_project_mask = data_no_dup["Project"] == project
Expand All @@ -52,18 +52,3 @@ def _sum_project_allocations(self, dataframe):
data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

return data_no_dup

def _apply_subsidy(self, dataframe, subsidy_amount):
pi_list = dataframe[invoice.PI_FIELD].unique()

for pi in pi_list:
pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
self.apply_flat_discount(
dataframe,
pi_projects,
subsidy_amount,
invoice.SUBSIDY_FIELD,
invoice.BALANCE_FIELD,
)

return dataframe
9 changes: 6 additions & 3 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
### Internally used field names
IS_BILLABLE_FIELD = "Is Billable"
MISSING_PI_FIELD = "Missing PI"
PI_BALANCE_FIELD = "PI Balance"
PROJECT_NAME_FIELD = "Project"
###


Expand All @@ -47,6 +49,7 @@ class Invoice:
name: str
invoice_month: str
data: pandas.DataFrame
export_data = None

def process(self):
self._prepare()
Expand Down Expand Up @@ -91,13 +94,13 @@ def _prepare_export(self):

def _filter_columns(self):
"""Filters and renames columns before exporting"""
return self.data.copy()[self.export_columns_list].rename(
self.export_data = self.export_data[self.export_columns_list].rename(
columns=self.exported_columns_map
)

def export(self):
export_data = self._filter_columns()
export_data.to_csv(self.output_path, index=False)
self._filter_columns()
self.export_data.to_csv(self.output_path, index=False)

def export_s3(self, s3_bucket):
s3_bucket.upload_file(self.output_path, self.output_s3_key)
Expand Down
2 changes: 1 addition & 1 deletion process_report/invoices/lenovo_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ class LenovoInvoice(invoice.Invoice):
exported_columns_map = {invoice.SU_HOURS_FIELD: "SU Hours"}

def _prepare_export(self):
self.data = self.data[
self.export_data = self.data[
self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES)
]
2 changes: 1 addition & 1 deletion process_report/invoices/nonbillable_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ class NonbillableInvoice(invoice.Invoice):
]

def _prepare_export(self):
self.data = self.data[~self.data[invoice.IS_BILLABLE_FIELD]]
self.export_data = self.data[~self.data[invoice.IS_BILLABLE_FIELD]]
Loading
Loading