Skip to content

Commit

Permalink
Merge pull request #112 from QuanMPhm/101/proc_new_pi_credit
Browse files Browse the repository at this point in the history
  • Loading branch information
naved001 authored Dec 17, 2024
2 parents 5ad8e4c + 1686888 commit c979154
Show file tree
Hide file tree
Showing 10 changed files with 817 additions and 463 deletions.
9 changes: 9 additions & 0 deletions process_report/invoices/NERC_total_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@

@dataclass
class NERCTotalInvoice(invoice.Invoice):
"""
This invoice operates on data processed by these Processors:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

INCLUDED_INSTITUTIONS = [
"Harvard University",
"Boston University",
Expand Down Expand Up @@ -45,6 +51,9 @@ def output_s3_archive_key(self):
return f"Invoices/{self.invoice_month}/Archive/NERC-{self.invoice_month}-Total-Invoice {util.get_iso8601_time()}.csv"

def _prepare_export(self):
self.data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.data = self.data[
self.data[invoice.INSTITUTION_FIELD].isin(self.INCLUDED_INSTITUTIONS)
].copy()
179 changes: 12 additions & 167 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
from dataclasses import dataclass
import logging
import sys

import pandas
import pyarrow

from process_report.invoices import invoice, discount_invoice
from process_report import util

from process_report.invoices import invoice

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class BillableInvoice(discount_invoice.DiscountInvoice):
NEW_PI_CREDIT_CODE = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
class BillableInvoice(invoice.Invoice):
"""
This invoice operates on data processed by these Processors:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

PI_S3_FILEPATH = "PIs/PI.csv"

old_pi_filepath: str
updated_old_pi_df: pandas.DataFrame

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand All @@ -38,61 +41,10 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
invoice.BALANCE_FIELD,
]

old_pi_filepath: str
limit_new_pi_credit_to_partners: bool = False

@staticmethod
def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
try:
old_pi_df = pandas.read_csv(
old_pi_filepath,
dtype={
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
pyarrow.decimal128(21, 2)
),
invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
except FileNotFoundError:
sys.exit("Applying credit 0002 failed. Old PI file does not exist")

return old_pi_df

@staticmethod
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
I.e 0 for new PIs
Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
first_invoice_month = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
]
if first_invoice_month.empty:
return 0

month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
if month_diff < 0:
sys.exit(
f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
)
else:
return month_diff

def _prepare(self):
def _prepare_export(self):
self.data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.data[invoice.CREDIT_FIELD] = None
self.data[invoice.CREDIT_CODE_FIELD] = None
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
self.old_pi_df = self._load_old_pis(self.old_pi_filepath)

def _process(self):
self.data, self.updated_old_pi_df = self._apply_credits_new_pi(
self.data, self.old_pi_df
)

def _prepare_export(self):
self.updated_old_pi_df = self.updated_old_pi_df.astype(
{
invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
Expand All @@ -110,110 +62,3 @@ def export(self):
def export_s3(self, s3_bucket):
super().export_s3(s3_bucket)
s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH)

def _filter_partners(self, data):
active_partnerships = list()
institute_list = util.load_institute_list()
for institute_info in institute_list:
if partnership_start_date := institute_info.get(
"mghpcc_partnership_start_date"
):
if util.get_month_diff(self.invoice_month, partnership_start_date) >= 0:
active_partnerships.append(institute_info["display_name"])

return data[data[invoice.INSTITUTION_FIELD].isin(active_partnerships)]

def _filter_excluded_su_types(self, data):
return data[~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))]

def _get_credit_eligible_projects(self, data: pandas.DataFrame):
filtered_data = self._filter_excluded_su_types(data)
if self.limit_new_pi_credit_to_partners:
filtered_data = self._filter_partners(filtered_data)

return filtered_data

def _apply_credits_new_pi(
self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
):
def get_initial_credit_amount(
old_pi_df, invoice_month, default_initial_credit_amount
):
first_month_processed_pis = old_pi_df[
old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month
]
if first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].empty or pandas.isna(
new_pi_credit_amount := first_month_processed_pis[
invoice.PI_INITIAL_CREDITS
].iat[0]
):
new_pi_credit_amount = default_initial_credit_amount

return new_pi_credit_amount

new_pi_credit_amount = get_initial_credit_amount(
old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT
)
print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")

credit_eligible_projects = self._get_credit_eligible_projects(data)
current_pi_set = set(credit_eligible_projects[invoice.PI_FIELD])
for pi in current_pi_set:
pi_projects = credit_eligible_projects[
credit_eligible_projects[invoice.PI_FIELD] == pi
]
pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
].squeeze()

if pi_age > 1:
for i, row in pi_projects.iterrows():
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
else:
if pi_age == 0:
if len(pi_old_pi_entry) == 0:
pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0]
old_pi_df = pandas.concat(
[
pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
old_pi_df,
],
ignore_index=True,
)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi
].squeeze()

remaining_credit = new_pi_credit_amount
credit_used_field = invoice.PI_1ST_USED
elif pi_age == 1:
remaining_credit = (
pi_old_pi_entry[invoice.PI_INITIAL_CREDITS]
- pi_old_pi_entry[invoice.PI_1ST_USED]
)
credit_used_field = invoice.PI_2ND_USED

credits_used = self.apply_flat_discount(
data,
pi_projects,
remaining_credit,
invoice.CREDIT_FIELD,
invoice.BALANCE_FIELD,
invoice.CREDIT_CODE_FIELD,
self.NEW_PI_CREDIT_CODE,
)

if (pi_old_pi_entry[credit_used_field] != 0) and (
credits_used != pi_old_pi_entry[credit_used_field]
):
print(
f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
)
old_pi_df.loc[
old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field
] = credits_used

return (data, old_pi_df)
9 changes: 9 additions & 0 deletions process_report/invoices/bu_internal_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@

@dataclass
class BUInternalInvoice(discount_invoice.DiscountInvoice):
"""
This invoice operates on data processed by these Processors:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PI_FIELD,
Expand All @@ -27,6 +33,9 @@ def get_project(row):
else:
return project_alloc[: project_alloc.rfind("-")]

self.data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.data = self.data[
self.data[invoice.INSTITUTION_FIELD] == "Boston University"
].copy()
Expand Down
1 change: 1 addition & 0 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
### Internally used field names
IS_BILLABLE_FIELD = "Is Billable"
MISSING_PI_FIELD = "Missing PI"
PI_BALANCE_FIELD = "PI Balance"
###


Expand Down
9 changes: 9 additions & 0 deletions process_report/invoices/pi_specific_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@

@dataclass
class PIInvoice(invoice.Invoice):
"""
This invoice operates on data processed by these Processors:
- ValidateBillablePIsProcessor
- NewPICreditProcessor
"""

export_columns_list = [
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
Expand All @@ -28,6 +34,9 @@ class PIInvoice(invoice.Invoice):
]

def _prepare(self):
self.data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.pi_list = self.data[invoice.PI_FIELD].unique()

def export(self):
Expand Down
40 changes: 27 additions & 13 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
add_institution_processor,
lenovo_processor,
validate_billable_pi_processor,
new_pi_credit_processor,
)

### PI file field names
Expand Down Expand Up @@ -229,7 +230,19 @@ def main():
)
validate_billable_pi_proc.process()

processed_data = validate_billable_pi_proc.data
rates_info = load_from_url()
new_pi_credit_proc = new_pi_credit_processor.NewPICreditProcessor(
"",
invoice_month,
data=validate_billable_pi_proc.data,
old_pi_filepath=old_pi_file,
limit_new_pi_credit_to_partners=rates_info.get_value_at(
"Limit New PI Credit to MGHPCC Partners", invoice_month
),
)
new_pi_credit_proc.process()

processed_data = new_pi_credit_proc.data

### Initialize invoices

Expand All @@ -249,40 +262,41 @@ def main():
if args.upload_to_s3:
backup_to_s3_old_pi_file(old_pi_file)

rates_info = load_from_url()
billable_inv = billable_invoice.BillableInvoice(
name=args.output_file,
invoice_month=invoice_month,
data=processed_data.copy(),
old_pi_filepath=old_pi_file,
limit_new_pi_credit_to_partners=rates_info.get_value_at(
"Limit New PI Credit to MGHPCC Partners", invoice_month
),
)

util.process_and_export_invoices(
[lenovo_inv, nonbillable_inv, billable_inv], args.upload_to_s3
updated_old_pi_df=new_pi_credit_proc.updated_old_pi_df,
)

nerc_total_inv = NERC_total_invoice.NERCTotalInvoice(
name=args.NERC_total_invoice_file,
invoice_month=invoice_month,
data=billable_inv.data.copy(),
data=processed_data.copy(),
)

bu_internal_inv = bu_internal_invoice.BUInternalInvoice(
name=args.BU_invoice_file,
invoice_month=invoice_month,
data=billable_inv.data.copy(),
data=processed_data.copy(),
subsidy_amount=args.BU_subsidy_amount,
)

pi_inv = pi_specific_invoice.PIInvoice(
name=args.output_folder, invoice_month=invoice_month, data=billable_inv.data
name=args.output_folder, invoice_month=invoice_month, data=processed_data.copy()
)

util.process_and_export_invoices(
[nerc_total_inv, bu_internal_inv, pi_inv], args.upload_to_s3
[
lenovo_inv,
nonbillable_inv,
billable_inv,
nerc_total_inv,
bu_internal_inv,
pi_inv,
],
args.upload_to_s3,
)


Expand Down
Loading

0 comments on commit c979154

Please sign in to comment.