diff --git a/process_report/invoices/NERC_total_invoice.py b/process_report/invoices/NERC_total_invoice.py index 335c52a..928c703 100644 --- a/process_report/invoices/NERC_total_invoice.py +++ b/process_report/invoices/NERC_total_invoice.py @@ -6,6 +6,12 @@ @dataclass class NERCTotalInvoice(invoice.Invoice): + """ + Dependancies: + - ValidateBillablePIsProcessor + - NewPICreditProcessor + """ + INCLUDED_INSTITUTIONS = [ "Harvard University", "Boston University", @@ -45,6 +51,9 @@ def output_s3_archive_key(self): return f"Invoices/{self.invoice_month}/Archive/NERC-{self.invoice_month}-Total-Invoice {util.get_iso8601_time()}.csv" def _prepare_export(self): + self.data = self.data[ + self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] + ] self.data = self.data[ self.data[invoice.INSTITUTION_FIELD].isin(self.INCLUDED_INSTITUTIONS) ].copy() diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index bc6408e..4c71114 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -1,25 +1,28 @@ from dataclasses import dataclass import logging -import sys import pandas import pyarrow -from process_report.invoices import invoice, discount_invoice -from process_report import util - +from process_report.invoices import invoice logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @dataclass -class BillableInvoice(discount_invoice.DiscountInvoice): - NEW_PI_CREDIT_CODE = "0002" - INITIAL_CREDIT_AMOUNT = 1000 - EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] +class BillableInvoice(invoice.Invoice): + """ + Dependancies: + - ValidateBillablePIsProcessor + - NewPICreditProcessor + """ + PI_S3_FILEPATH = "PIs/PI.csv" + old_pi_filepath: str + updated_old_pi_df: pandas.DataFrame + export_columns_list = [ invoice.INVOICE_DATE_FIELD, invoice.PROJECT_FIELD, @@ -38,61 +41,10 @@ class BillableInvoice(discount_invoice.DiscountInvoice): invoice.BALANCE_FIELD, ] - old_pi_filepath: str - limit_new_pi_credit_to_partners: bool = False - - @staticmethod - def _load_old_pis(old_pi_filepath) -> pandas.DataFrame: - try: - old_pi_df = pandas.read_csv( - old_pi_filepath, - dtype={ - invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( - pyarrow.decimal128(21, 2) - ), - invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, - ) - except FileNotFoundError: - sys.exit("Applying credit 0002 failed. Old PI file does not exist") - - return old_pi_df - - @staticmethod - def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month): - """Returns time difference between current invoice month and PI's first invoice month - I.e 0 for new PIs - Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug""" - first_invoice_month = old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH - ] - if first_invoice_month.empty: - return 0 - - month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0]) - if month_diff < 0: - sys.exit( - f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!" - ) - else: - return month_diff - - def _prepare(self): + def _prepare_export(self): self.data = self.data[ self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] ] - self.data[invoice.CREDIT_FIELD] = None - self.data[invoice.CREDIT_CODE_FIELD] = None - self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD] - self.old_pi_df = self._load_old_pis(self.old_pi_filepath) - - def _process(self): - self.data, self.updated_old_pi_df = self._apply_credits_new_pi( - self.data, self.old_pi_df - ) - - def _prepare_export(self): self.updated_old_pi_df = self.updated_old_pi_df.astype( { invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype( @@ -110,110 +62,3 @@ def export(self): def export_s3(self, s3_bucket): super().export_s3(s3_bucket) s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH) - - def _filter_partners(self, data): - active_partnerships = list() - institute_list = util.load_institute_list() - for institute_info in institute_list: - if partnership_start_date := institute_info.get( - "mghpcc_partnership_start_date" - ): - if util.get_month_diff(self.invoice_month, partnership_start_date) >= 0: - active_partnerships.append(institute_info["display_name"]) - - return data[data[invoice.INSTITUTION_FIELD].isin(active_partnerships)] - - def _filter_excluded_su_types(self, data): - return data[~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))] - - def _get_credit_eligible_projects(self, data: pandas.DataFrame): - filtered_data = self._filter_excluded_su_types(data) - if self.limit_new_pi_credit_to_partners: - filtered_data = self._filter_partners(filtered_data) - - return filtered_data - - def _apply_credits_new_pi( - self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame - ): - def get_initial_credit_amount( - old_pi_df, invoice_month, default_initial_credit_amount - ): - first_month_processed_pis = old_pi_df[ - old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month - ] - if first_month_processed_pis[ - invoice.PI_INITIAL_CREDITS - ].empty or pandas.isna( - new_pi_credit_amount := first_month_processed_pis[ - invoice.PI_INITIAL_CREDITS - ].iat[0] - ): - new_pi_credit_amount = default_initial_credit_amount - - return new_pi_credit_amount - - new_pi_credit_amount = get_initial_credit_amount( - old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT - ) - print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}") - - credit_eligible_projects = self._get_credit_eligible_projects(data) - current_pi_set = set(credit_eligible_projects[invoice.PI_FIELD]) - for pi in current_pi_set: - pi_projects = credit_eligible_projects[ - credit_eligible_projects[invoice.PI_FIELD] == pi - ] - pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month) - pi_old_pi_entry = old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi - ].squeeze() - - if pi_age > 1: - for i, row in pi_projects.iterrows(): - data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD] - else: - if pi_age == 0: - if len(pi_old_pi_entry) == 0: - pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0] - old_pi_df = pandas.concat( - [ - pandas.DataFrame([pi_entry], columns=old_pi_df.columns), - old_pi_df, - ], - ignore_index=True, - ) - pi_old_pi_entry = old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi - ].squeeze() - - remaining_credit = new_pi_credit_amount - credit_used_field = invoice.PI_1ST_USED - elif pi_age == 1: - remaining_credit = ( - pi_old_pi_entry[invoice.PI_INITIAL_CREDITS] - - pi_old_pi_entry[invoice.PI_1ST_USED] - ) - credit_used_field = invoice.PI_2ND_USED - - credits_used = self.apply_flat_discount( - data, - pi_projects, - remaining_credit, - invoice.CREDIT_FIELD, - invoice.BALANCE_FIELD, - invoice.CREDIT_CODE_FIELD, - self.NEW_PI_CREDIT_CODE, - ) - - if (pi_old_pi_entry[credit_used_field] != 0) and ( - credits_used != pi_old_pi_entry[credit_used_field] - ): - print( - f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}" - ) - old_pi_df.loc[ - old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field - ] = credits_used - - return (data, old_pi_df) diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py index bc6f9c2..402bfbf 100644 --- a/process_report/invoices/bu_internal_invoice.py +++ b/process_report/invoices/bu_internal_invoice.py @@ -7,6 +7,12 @@ @dataclass class BUInternalInvoice(discount_invoice.DiscountInvoice): + """ + Dependancies: + - ValidateBillablePIsProcessor + - NewPICreditProcessor + """ + export_columns_list = [ invoice.INVOICE_DATE_FIELD, invoice.PI_FIELD, @@ -27,6 +33,9 @@ def get_project(row): else: return project_alloc[: project_alloc.rfind("-")] + self.data = self.data[ + self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] + ] self.data = self.data[ self.data[invoice.INSTITUTION_FIELD] == "Boston University" ].copy() diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py index 3b5b2e5..35c8ffb 100644 --- a/process_report/invoices/invoice.py +++ b/process_report/invoices/invoice.py @@ -36,6 +36,7 @@ ### Internally used field names IS_BILLABLE_FIELD = "Is Billable" MISSING_PI_FIELD = "Missing PI" +PI_BALANCE_FIELD = "PI Balance" ### diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py index 6d52933..98ca9b1 100644 --- a/process_report/invoices/pi_specific_invoice.py +++ b/process_report/invoices/pi_specific_invoice.py @@ -9,6 +9,12 @@ @dataclass class PIInvoice(invoice.Invoice): + """ + Dependancies: + - ValidateBillablePIsProcessor + - NewPICreditProcessor + """ + export_columns_list = [ invoice.INVOICE_DATE_FIELD, invoice.PROJECT_FIELD, @@ -28,6 +34,9 @@ class PIInvoice(invoice.Invoice): ] def _prepare(self): + self.data = self.data[ + self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] + ] self.pi_list = self.data[invoice.PI_FIELD].unique() def export(self): diff --git a/process_report/process_report.py b/process_report/process_report.py index 6ee32f1..cd65430 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -20,6 +20,7 @@ add_institution_processor, lenovo_processor, validate_billable_pi_processor, + new_pi_credit_processor, ) ### PI file field names @@ -229,7 +230,19 @@ def main(): ) validate_billable_pi_proc.process() - processed_data = validate_billable_pi_proc.data + rates_info = load_from_url() + new_pi_credit_proc = new_pi_credit_processor.NewPICreditProcessor( + "", + invoice_month, + data=validate_billable_pi_proc.data, + old_pi_filepath=old_pi_file, + limit_new_pi_credit_to_partners=rates_info.get_value_at( + "Limit New PI Credit to MGHPCC Partners", invoice_month + ), + ) + new_pi_credit_proc.process() + + processed_data = new_pi_credit_proc.data ### Initialize invoices @@ -249,40 +262,41 @@ def main(): if args.upload_to_s3: backup_to_s3_old_pi_file(old_pi_file) - rates_info = load_from_url() billable_inv = billable_invoice.BillableInvoice( name=args.output_file, invoice_month=invoice_month, data=processed_data.copy(), old_pi_filepath=old_pi_file, - limit_new_pi_credit_to_partners=rates_info.get_value_at( - "Limit New PI Credit to MGHPCC Partners", invoice_month - ), - ) - - util.process_and_export_invoices( - [lenovo_inv, nonbillable_inv, billable_inv], args.upload_to_s3 + updated_old_pi_df=new_pi_credit_proc.updated_old_pi_df, ) nerc_total_inv = NERC_total_invoice.NERCTotalInvoice( name=args.NERC_total_invoice_file, invoice_month=invoice_month, - data=billable_inv.data.copy(), + data=processed_data.copy(), ) bu_internal_inv = bu_internal_invoice.BUInternalInvoice( name=args.BU_invoice_file, invoice_month=invoice_month, - data=billable_inv.data.copy(), + data=processed_data.copy(), subsidy_amount=args.BU_subsidy_amount, ) pi_inv = pi_specific_invoice.PIInvoice( - name=args.output_folder, invoice_month=invoice_month, data=billable_inv.data + name=args.output_folder, invoice_month=invoice_month, data=processed_data.copy() ) util.process_and_export_invoices( - [nerc_total_inv, bu_internal_inv, pi_inv], args.upload_to_s3 + [ + lenovo_inv, + nonbillable_inv, + billable_inv, + nerc_total_inv, + bu_internal_inv, + pi_inv, + ], + args.upload_to_s3, ) diff --git a/process_report/processors/discount_processor.py b/process_report/processors/discount_processor.py index 8feb24e..f669265 100644 --- a/process_report/processors/discount_processor.py +++ b/process_report/processors/discount_processor.py @@ -7,15 +7,18 @@ @dataclass class DiscountProcessor(processor.Processor): """ - Invoice class containing functions useful for applying discounts + Processor class containing functions useful for applying discounts on dataframes """ - @staticmethod + IS_DISCOUNT_BY_NERC = True + def apply_flat_discount( + self, invoice: pandas.DataFrame, pi_projects: pandas.DataFrame, - discount_amount: int, + pi_balance_field: str, + discount_amount: float, discount_field: str, balance_field: str, code_field: str = None, @@ -38,9 +41,10 @@ def apply_flat_discount( :param invoice: Dataframe containing all projects :param pi_projects: A subset of `invoice`, containing all projects for a PI you want to apply the discount + :param pi_balance_field: Name of the field of the PI balance :param discount_amount: The discount given to the PI :param discount_field: Name of the field to put the discount amount applied to each project - :param balance_field: Name of the balance field + :param balance_field: Name of the NERC balance field :param code_field: Name of the discount code field :param discount_code: Code of the discount """ @@ -49,9 +53,10 @@ def apply_discount_on_project(remaining_discount_amount, project_i, project): remaining_project_balance = project[balance_field] applied_discount = min(remaining_project_balance, remaining_discount_amount) invoice.at[project_i, discount_field] = applied_discount - invoice.at[project_i, balance_field] = ( - project[balance_field] - applied_discount - ) + balance_after_discount = project[balance_field] - applied_discount + invoice.at[project_i, pi_balance_field] = balance_after_discount + if self.IS_DISCOUNT_BY_NERC: + invoice.at[project_i, balance_field] = balance_after_discount remaining_discount_amount -= applied_discount return remaining_discount_amount diff --git a/process_report/processors/new_pi_credit_processor.py b/process_report/processors/new_pi_credit_processor.py index 9b61aa1..693ca4a 100644 --- a/process_report/processors/new_pi_credit_processor.py +++ b/process_report/processors/new_pi_credit_processor.py @@ -11,9 +11,15 @@ @dataclass class NewPICreditProcessor(discount_processor.DiscountProcessor): + """ + Dependancies: + - ValidateBillablePIsProcessor + """ + NEW_PI_CREDIT_CODE = "0002" INITIAL_CREDIT_AMOUNT = 1000 EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] + IS_DISCOUNT_BY_NERC = True old_pi_filepath: str limit_new_pi_credit_to_partners: bool = False @@ -70,8 +76,16 @@ def _filter_partners(self, data): def _filter_excluded_su_types(self, data): return data[~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))] + def _filter_nonbillables(self, data): + return data[data["Is Billable"]] + + def _filter_missing_pis(self, data): + return data[~data["Missing PI"]] + def _get_credit_eligible_projects(self, data: pandas.DataFrame): - filtered_data = self._filter_excluded_su_types(data) + filtered_data = self._filter_nonbillables(data) + filtered_data = self._filter_missing_pis(filtered_data) + filtered_data = self._filter_excluded_su_types(filtered_data) if self.limit_new_pi_credit_to_partners: filtered_data = self._filter_partners(filtered_data) @@ -143,6 +157,7 @@ def get_initial_credit_amount( credits_used = self.apply_flat_discount( data, pi_projects, + invoice.PI_BALANCE_FIELD, remaining_credit, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD, @@ -165,6 +180,7 @@ def get_initial_credit_amount( def _prepare(self): self.data[invoice.CREDIT_FIELD] = None self.data[invoice.CREDIT_CODE_FIELD] = None + self.data[invoice.PI_BALANCE_FIELD] = self.data[invoice.COST_FIELD] self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD] self.old_pi_df = self._load_old_pis(self.old_pi_filepath) diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 3293ea1..788a91d 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -1,7 +1,6 @@ from unittest import TestCase, mock import tempfile import pandas -import pyarrow import os import uuid import math @@ -110,6 +109,8 @@ def setUp(self): "ProjectE", ], "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"], + "Is Billable": [True, True, True, True, True], + "Missing PI": [False, False, False, False, False], } self.dataframe = pandas.DataFrame(data) self.invoice_month = data["Invoice Month"][0] @@ -277,304 +278,483 @@ def test_get_month_diff(self): util.get_month_diff("2024-16", "2025-03") -class TestCredit0002(TestCase): - def setUp(self): - data = { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": [ - "PI1", - "PI2", - "PI3", - "PI4", - "PI4", - "PI5", - "PI7", - "NewPI1", - "NewPI1", - "NewPI2", - "NewPI2", - ], - "SU Type": [ - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", +class TestNewPICreditProcessor(TestCase): + def _assert_result_invoice_and_old_pi_file( + self, + invoice_month, + test_invoice, + test_old_pi_filepath, + answer_invoice, + answer_old_pi_df, + ): + new_pi_credit_proc = test_utils.new_new_pi_credit_processor( + invoice_month=invoice_month, + data=test_invoice, + old_pi_filepath=test_old_pi_filepath, + ) + new_pi_credit_proc.process() + output_invoice = new_pi_credit_proc.data + output_old_pi_df = new_pi_credit_proc.updated_old_pi_df.sort_values( + by="PI", ignore_index=True + ) + + answer_invoice = answer_invoice.astype(output_invoice.dtypes) + answer_old_pi_df = answer_old_pi_df.astype(output_old_pi_df.dtypes).sort_values( + by="PI", ignore_index=True + ) + + self.assertTrue(output_invoice.equals(answer_invoice)) + self.assertTrue(output_old_pi_df.equals(answer_old_pi_df)) + + def _get_test_invoice( + self, pi, cost, su_type=None, is_billable=None, missing_pi=None + ): + if not su_type: + su_type = ["CPU" for _ in range(len(pi))] + + if not is_billable: + is_billable = [True for _ in range(len(pi))] + + if not missing_pi: + missing_pi = [False for _ in range(len(pi))] + + return pandas.DataFrame( + { + "Manager (PI)": pi, + "Cost": cost, + "SU Type": su_type, + "Is Billable": is_billable, + "Missing PI": missing_pi, + } + ) + + def setUp(self) -> None: + self.test_old_pi_file = tempfile.NamedTemporaryFile( + delete=False, mode="w+", suffix=".csv" + ) + + def tearDown(self) -> None: + os.remove(self.test_old_pi_file.name) + + def test_no_new_pi(self): + test_invoice = self._get_test_invoice( + ["PI" for _ in range(3)], [100 for _ in range(3)] + ) + + # Other fields of old PI file not accessed if PI is no longer + # eligible for new-PI credit + test_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-01"], + "Initial Credits": [1000], + } + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [None for _ in range(3)], + "Credit Code": [None for _ in range(3)], + "PI Balance": [100 for _ in range(3)], + "Balance": [100 for _ in range(3)], + } + ), ], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - "ProjectF", - "ProjectG", - "ProjectH", - "ProjectI", - "ProjectJ", - "ProjectK", + axis=1, + ) + + answer_old_pi_df = test_old_pi_df.copy() + + self._assert_result_invoice_and_old_pi_file( + "2024-06", + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_one_new_pi(self): + """Invoice with one completely new PI""" + + # One allocation + invoice_month = "2024-06" + + test_invoice = self._get_test_invoice(["PI"], [100]) + + test_old_pi_df = pandas.DataFrame( + columns=[ + "PI", + "First Invoice Month", + "Initial Credits", + "1st Month Used", + "2nd Month Used", + ] + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [100], + "Credit Code": ["0002"], + "PI Balance": [0], + "Balance": [0], + } + ), ], - "Cost": [10, 100, 10000, 500, 100, 400, 200, 250, 250, 700, 700], - } - answer_df_dict = { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [100], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + # Two allocations, costs partially covered + test_invoice = self._get_test_invoice(["PI", "PI"], [500, 1000]) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, 500], + "Credit Code": ["0002", "0002"], + "PI Balance": [0, 500], + "Balance": [0, 500], + } + ), ], - "Manager (PI)": [ - "PI1", - "PI2", - "PI3", - "PI4", - "PI4", - "PI5", - "PI7", - "NewPI1", - "NewPI1", - "NewPI2", - "NewPI2", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [1000], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + # Two allocations, costs completely covered + test_invoice = self._get_test_invoice(["PI", "PI"], [500, 400]) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, 400], + "Credit Code": ["0002", "0002"], + "PI Balance": [0, 0], + "Balance": [0, 0], + } + ), ], - "SU Type": [ - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", - "CPU", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [900], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_one_month_pi(self): + """PI has appeared in invoices for one month""" + + # Remaining credits completely covers costs + invoice_month = "2024-07" + test_invoice = self._get_test_invoice(["PI"], [200]) + + test_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [0], + } + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [200], + "Credit Code": ["0002"], + "PI Balance": [0], + "Balance": [0], + } + ), ], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - "ProjectF", - "ProjectG", - "ProjectH", - "ProjectI", - "ProjectJ", - "ProjectK", + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [200], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + # Remaining credits partially covers costs + test_invoice = self._get_test_invoice(["PI"], [600]) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500], + "Credit Code": ["0002"], + "PI Balance": [100], + "Balance": [100], + } + ), ], - "Cost": [10, 100, 10000, 500, 100, 400, 200, 250, 250, 700, 700], - "Credit": [None, None, None, 100, None, 400, 200, 250, 250, 500, None], - "Credit Code": [ - None, - None, - None, - "0002", - None, - "0002", - "0002", - "0002", - "0002", - "0002", - None, + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [500], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_two_new_pi(self): + """Two PIs of different age""" + + # Costs partially and completely covered + invoice_month = "2024-07" + test_invoice = self._get_test_invoice(["PI1", "PI1", "PI2"], [800, 500, 500]) + + test_old_pi_df = pandas.DataFrame( + { + "PI": ["PI1"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [500], + "2nd Month Used": [0], + } + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, None, 500], + "Credit Code": ["0002", None, "0002"], + "PI Balance": [300, 500, 0], + "Balance": [300, 500, 0], + } + ), ], - "Balance": [10, 100, 10000, 400, 100, 0, 0, 0, 0, 200, 700], - } - self.dataframe = pandas.DataFrame(data) - self.dataframe["Credit"] = None - self.dataframe["Credit Code"] = None - self.dataframe["Balance"] = self.dataframe["Cost"] - self.answer_dataframe = pandas.DataFrame(answer_df_dict) - old_pi = [ - "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", - "PI1,2023-09,500,200,0", - "PI2,2024-01,2000,0,0", - "PI3,2024-01,2000,1000,500", - "PI4,2024-02,1000,900,0", - "PI5,2024-02,1000,300,500", - "PI6,2024-02,1000,700,0", - "PI7,2024-03,500,300,0", # This as current month we're testing, new PIs should get $500 - "PI8,2024-04,1000,500,0", - ] - self.old_pi_df_answer = ( - pandas.DataFrame( - { - "PI": [ - "PI1", - "PI2", - "PI3", - "PI4", - "PI5", - "PI6", - "PI7", - "NewPI1", - "NewPI2", - "PI8", - ], - "First Invoice Month": [ - "2023-09", - "2024-01", - "2024-01", - "2024-02", - "2024-02", - "2024-02", - "2024-03", - "2024-03", - "2024-03", - "2024-04", - ], - "Initial Credits": [ - 500, - 2000, - 2000, - 1000, - 1000, - 1000, - 500, - 500, - 500, - 1000, - ], - "1st Month Used": [200, 0, 1000, 900, 300, 700, 200, 500, 500, 500], - "2nd Month Used": [0, 0, 500, 100, 400, 0, 0, 0, 0, 0], - } - ) - .astype( - { - "Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, - ) - .sort_values(by="PI", ignore_index=True) + axis=1, ) - # Contains cases with new, one month old, two month old, older PI, and future PI that hasn't appeared in invoices yet - # For each invoice month, test case where pi has 1 project, >1, and has spare credit - old_pi_file = tempfile.NamedTemporaryFile( - delete=False, mode="w+", suffix=".csv" + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI1", "PI2"], + "First Invoice Month": ["2024-06", "2024-07"], + "Initial Credits": [1000, 1000], + "1st Month Used": [500, 500], + "2nd Month Used": [500, 0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, ) - for pi in old_pi: - old_pi_file.write(pi + "\n") - self.old_pi_file = old_pi_file.name - self.dataframe_no_gpu = pandas.DataFrame( + def test_old_pi_file_overwritten(self): + """If PI already has entry in Old PI file, + their initial credits and PI entry could be overwritten""" + + invoice_month = "2024-06" + test_invoice = self._get_test_invoice(["PI", "PI"], [500, 500]) + test_old_pi_df = pandas.DataFrame( { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], - "SU Type": [ - "GPU", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - ], - "Cost": [500, 100, 100, 500, 500], + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [500], + "1st Month Used": [200], + "2nd Month Used": [0], } ) - self.dataframe_no_gpu["Credit"] = None - self.dataframe_no_gpu["Credit Code"] = None - self.dataframe_no_gpu["Balance"] = self.dataframe_no_gpu["Cost"] - old_pi_no_gpu = [ - "PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used", - "OldPI,2024-03,500,200,0", - ] - old_pi_no_gpu_file = tempfile.NamedTemporaryFile( - delete=False, mode="w", suffix=".csv" + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [500, None], + "Credit Code": ["0002", None], + "PI Balance": [0, 500], + "Balance": [0, 500], + } + ), + ], + axis=1, ) - for pi in old_pi_no_gpu: - old_pi_no_gpu_file.write(pi + "\n") - self.old_pi_no_gpu_file = old_pi_no_gpu_file.name - self.no_gpu_df_answer = pandas.DataFrame( + + answer_old_pi_df = pandas.DataFrame( { - "Invoice Month": [ - "2024-03", - "2024-03", - "2024-03", - "2024-03", - "2024-03", - ], - "Manager (PI)": ["PI1", "PI1", "PI1", "PI2", "PI2"], - "SU Type": [ - "GPU", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - ], - "Cost": [500, 100, 100, 500, 500], - "Credit": [500, None, None, None, None], - "Credit Code": ["0002", None, None, None, None], - "Balance": [0.0, 100.0, 100.0, 500.0, 500.0], + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [500], + "1st Month Used": [500], + "2nd Month Used": [0], } ) - def tearDown(self): - os.remove(self.old_pi_file) - os.remove(self.old_pi_no_gpu_file) - - def test_apply_credit_0002(self): - test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03") - old_pi_df = test_invoice._load_old_pis(self.old_pi_file) - dataframe, updated_old_pi_df = test_invoice._apply_credits_new_pi( - self.dataframe, old_pi_df - ) - dataframe = dataframe.astype({"Credit": "float64", "Balance": "int64"}) - updated_old_pi_df = updated_old_pi_df.astype( - dtype={ - "Initial Credits": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "1st Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - "2nd Month Used": pandas.ArrowDtype(pyarrow.decimal128(21, 2)), - }, - ).sort_values(by=["PI"], ignore_index=True) - self.assertTrue(self.answer_dataframe.equals(dataframe)) - self.assertTrue(self.old_pi_df_answer.equals(updated_old_pi_df)) - - def test_no_gpu(self): - test_invoice = test_utils.new_billable_invoice(invoice_month="2024-03") - old_pi_df = test_invoice._load_old_pis(self.old_pi_no_gpu_file) - dataframe, _ = test_invoice._apply_credits_new_pi( - self.dataframe_no_gpu, old_pi_df - ) - dataframe = dataframe.astype({"Credit": "float64", "Balance": "float64"}) - self.assertTrue(self.no_gpu_df_answer.equals(dataframe)) + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) + + def test_excluded_su_types(self): + """Certain SU types can be excluded from the credit""" + + invoice_month = "2024-06" + test_invoice = self._get_test_invoice( + ["PI", "PI", "PI", "PI"], + [600, 600, 600, 600], + [ + "CPU", + "OpenShift GPUA100SXM4", + "GPU", + "OpenStack GPUA100SXM4", + ], + ) + + test_old_pi_df = pandas.DataFrame( + columns=[ + "PI", + "First Invoice Month", + "Initial Credits", + "1st Month Used", + "2nd Month Used", + ] + ) + test_old_pi_df.to_csv(self.test_old_pi_file.name, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + "Credit": [600, None, 400, None], + "Credit Code": ["0002", None, "0002", None], + "PI Balance": [0, 600, 200, 600], + "Balance": [0, 600, 200, 600], + } + ), + ], + axis=1, + ) + + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [1000], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + self.test_old_pi_file.name, + answer_invoice, + answer_old_pi_df, + ) def test_apply_credit_error(self): + """Test faulty data""" old_pi_df = pandas.DataFrame( {"PI": ["PI1"], "First Invoice Month": ["2024-04"]} ) invoice_month = "2024-03" - test_invoice = test_utils.new_billable_invoice() + test_invoice = test_utils.new_new_pi_credit_processor() with self.assertRaises(SystemExit): test_invoice._get_pi_age(old_pi_df, "PI1", invoice_month) @@ -634,6 +814,8 @@ def setUp(self): 50, 100, ], # Test case where subsidy does/doesn't cover fully balance + "Is Billable": [True, True, True, True, True, True, True, True], + "Missing PI": [False, False, False, False, False, False, False, False], } self.dataframe = pandas.DataFrame(data) self.subsidy = 100 @@ -770,24 +952,24 @@ def test_flag_limit_new_pi_credit(self, mock_load_institute_list): "Institution": ["BU", "HU", "NEU", "MIT", "BC"], } ) - sample_inv = test_utils.new_billable_invoice( + sample_proc = test_utils.new_new_pi_credit_processor( limit_new_pi_credit_to_partners=True ) # When no partnerships are active - sample_inv.invoice_month = "2024-01" - output_df = sample_inv._filter_partners(sample_df) + sample_proc.invoice_month = "2024-01" + output_df = sample_proc._filter_partners(sample_df) self.assertTrue(output_df.empty) # When some partnerships are active - sample_inv.invoice_month = "2024-06" - output_df = sample_inv._filter_partners(sample_df) + sample_proc.invoice_month = "2024-06" + output_df = sample_proc._filter_partners(sample_df) answer_df = pandas.DataFrame({"Institution": ["BU", "HU"]}) self.assertTrue(output_df.equals(answer_df)) # When all partnerships are active - sample_inv.invoice_month = "2024-12" - output_df = sample_inv._filter_partners(sample_df) + sample_proc.invoice_month = "2024-12" + output_df = sample_proc._filter_partners(sample_df) answer_df = pandas.DataFrame({"Institution": ["BU", "HU", "NEU"]}) self.assertTrue(output_df.equals(answer_df)) diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 3585e1e..6c2cb50 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -12,6 +12,7 @@ validate_pi_alias_processor, lenovo_processor, validate_billable_pi_processor, + new_pi_credit_processor, ) @@ -28,14 +29,14 @@ def new_billable_invoice( invoice_month="0000-00", data=pandas.DataFrame(), old_pi_filepath="", - limit_new_pi_credit_to_partners=False, + updated_old_pi_df=pandas.DataFrame(), ): return billable_invoice.BillableInvoice( name, invoice_month, data, old_pi_filepath, - limit_new_pi_credit_to_partners, + updated_old_pi_df, ) @@ -93,3 +94,15 @@ def new_validate_billable_pi_processor( nonbillable_pis, nonbillable_projects, ) + + +def new_new_pi_credit_processor( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), + old_pi_filepath="", + limit_new_pi_credit_to_partners=False, +): + return new_pi_credit_processor.NewPICreditProcessor( + name, invoice_month, data, old_pi_filepath, limit_new_pi_credit_to_partners + )