Skip to content

Commit

Permalink
Initialized processor for BU subsidy and copied over processing logic
Browse files Browse the repository at this point in the history
  • Loading branch information
QuanMPhm committed Nov 19, 2024
1 parent 1686888 commit b9d8086
Showing 1 changed file with 62 additions and 0 deletions.
62 changes: 62 additions & 0 deletions process_report/processors/bu_subsidy_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from dataclasses import dataclass
from decimal import Decimal

from process_report.invoices import invoice
from process_report.processors import discount_processor


@dataclass
class BUSubsidyProcessor(discount_processor.DiscountProcessor):
subsidy_amount: int

def _prepare(self):
def get_project(row):
project_alloc = row[invoice.PROJECT_FIELD]
if project_alloc.rfind("-") == -1:
return project_alloc
else:
return project_alloc[: project_alloc.rfind("-")]

self.data = self.data[
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
]
self.data = self.data[
self.data[invoice.INSTITUTION_FIELD] == "Boston University"
].copy()
self.data["Project"] = self.data.apply(get_project, axis=1)
self.data[invoice.SUBSIDY_FIELD] = Decimal(0)

def _process(self):
data_summed_projects = self._sum_project_allocations(self.data)
self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount)

def _sum_project_allocations(self, dataframe):
"""A project may have multiple allocations, and therefore multiple rows
in the raw invoices. For BU-Internal invoice, we only want 1 row for
each unique project, summing up its allocations' costs"""
project_list = dataframe["Project"].unique()
data_no_dup = dataframe.drop_duplicates("Project", inplace=False)
sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
for project in project_list:
project_mask = dataframe["Project"] == project
no_dup_project_mask = data_no_dup["Project"] == project

sum_fields_sums = dataframe[project_mask][sum_fields].sum().values
data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

return data_no_dup

def _apply_subsidy(self, dataframe, subsidy_amount):
pi_list = dataframe[invoice.PI_FIELD].unique()

for pi in pi_list:
pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
self.apply_flat_discount(
dataframe,
pi_projects,
subsidy_amount,
invoice.SUBSIDY_FIELD,
invoice.BALANCE_FIELD,
)

return dataframe

0 comments on commit b9d8086

Please sign in to comment.