Update Makefile

Makefile will now merge the 2 CSV and update FederalProgramNames.json
GSA-TTS · Jan 23, 2025 · 8392dd3 · 8392dd3
1 parent 3050021
commit 8392dd3
Showing 1 changed file with 44 additions and 5 deletions.
diff --git a/backend/schemas/Makefile b/backend/schemas/Makefile
@@ -1,9 +1,9 @@
 SYSTEM_PYTHON  = $(or $(shell which python3), $(shell which python))
 PYTHON=${SYSTEM_PYTHON}
 
-all:  clean source_data build_xlsx format_jsonnet build_templates build_sections build_audit_json
-json: format_jsonnet build_templates build_templates build_audit_json
+all: clean source_data build_xlsx format_jsonnet build_templates build_sections build_audit_json
 
+json: format_jsonnet build_templates build_templates build_audit_json
 
 template_specs = $(wildcard source/excel/templates/*.jsonnet)
 section_specs = $(wildcard source/sections/*.jsonnet)
@@ -13,9 +13,48 @@ base_specs = $(wildcard source/base/*.jsonnet)
 xlsx = $(wildcard output/excel/xlsx/*-workbook*.xlsx)
 json = $(wildcard output/excel/json/*.json)
 
-source_data:
-	python scripts/generate_lookup_schemas.py cfda-lookup source/base/FederalProgramNames.json
-	python scripts/generate_lookup_schemas.py cluster-names source/base/ClusterNames.json
+# New merged-csv target
+merged-csv:
+	@echo "Merging and standardizing CSV files..."
+	$(PYTHON) -c " \
+	import os, pandas as pd, glob; \
+	folder = './source/data/ALNs_raw_downloads'; \
+	output_file = './source/data/cfda-lookup-$(shell date +%Y%m%d).csv'; \
+	print(f'Looking for CSV files in: {folder}'); \
+	csv_files = glob.glob(f'{folder}/*.csv'); \
+	print(f'CSV files found: {csv_files}'); \
+	if not csv_files: \
+	    print('No data found in the input files.'); \
+	    exit(1); \
+	all_data = []; \
+	for f in csv_files: \
+	    try: \
+	        df = pd.read_csv(f, encoding='utf-8'); \
+	    except UnicodeDecodeError: \
+	        print(f'Warning: Could not read {f} with UTF-8. Trying ISO-8859-1.'); \
+	        df = pd.read_csv(f, encoding='ISO-8859-1'); \
+	    all_data.append(df); \
+	combined_data = pd.concat(all_data, ignore_index=True); \
+	column_mapping = { \
+	    'Title': 'Program Title', \
+	    'Assistance Listings Number': 'Program Number', \
+	    'Date Published': 'Date Published', \
+	    'Department/Ind. Agency': 'Department/Ind. Agency', \
+	    'Funded': 'Funded', \
+	    'Last Date Modified': 'Last Date Modified', \
+	    'POC Information': 'POC Information', \
+	    'Related Federal Assistance': 'Related Federal Assistance', \
+	    'Sub-Tier': 'Sub-Tier', \
+	    'Types of Assistance': 'Types of Assistance' \
+	}; \
+	combined_data = combined_data.rename(columns=column_mapping); \
+	print(f'Saving merged and standardized CSV to: {output_file}'); \
+	combined_data.to_csv(output_file, index=False, encoding='utf-8'); \
+	print('CSV processing completed successfully.');"
+
+source_data: merged-csv
+	$(PYTHON) scripts/generate_lookup_schemas.py cfda-lookup source/base/FederalProgramNames.json
+	$(PYTHON) scripts/generate_lookup_schemas.py cluster-names source/base/ClusterNames.json
 
 clean:
 	for f in $(xlsx); do \