From 83681de92d3f57128ba8d7d233fb94c3745c02d3 Mon Sep 17 00:00:00 2001 From: Muhammad Afaq Shuaib Date: Thu, 29 Aug 2024 01:44:56 +0500 Subject: [PATCH] feat: add management command for SFMC Course Catalog POC' (#4409) --- .../apps/course_metadata/gspread_client.py | 91 +++++++++ .../commands/populate_product_catalog.py | 184 ++++++++++++++++++ .../tests/test_populate_product_catalog.py | 161 +++++++++++++++ .../apps/course_metadata/models.py | 9 + .../course_metadata/tests/test_gspread.py | 88 +++++++++ course_discovery/settings/base.py | 2 + 6 files changed, 535 insertions(+) create mode 100644 course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py create mode 100644 course_discovery/apps/course_metadata/management/commands/tests/test_populate_product_catalog.py diff --git a/course_discovery/apps/course_metadata/gspread_client.py b/course_discovery/apps/course_metadata/gspread_client.py index fbc0c15c07..28d3dc3c86 100644 --- a/course_discovery/apps/course_metadata/gspread_client.py +++ b/course_discovery/apps/course_metadata/gspread_client.py @@ -1,4 +1,5 @@ import logging +from string import ascii_uppercase import gspread from django.conf import settings @@ -37,6 +38,96 @@ def read_data(self, config): logger.exception('[Spread Sheet Read Error]: Exception occurred while reading sheet data') return None + def _get_or_create_worksheet(self, spread_sheet, tab_id, cols, rows): + """ + Get or create a worksheet with the given tab_id in the given spread_sheet + + Args: + spread_sheet: The spread sheet object + tab_id: The tab id of the worksheet + cols: The number of columns in the worksheet + rows: The number of rows in the worksheet + """ + try: + return spread_sheet.worksheet(tab_id) + except gspread.exceptions.WorksheetNotFound: + return spread_sheet.add_worksheet( + title=tab_id, + rows=rows, + cols=cols, + ) + + def _write_headers(self, sheet_tab, headers): + """ + Write headers to the first row of the worksheet + + Args: + sheet_tab: The worksheet object + headers: The headers of the worksheet + """ + sheet_tab.append_row(headers) + end_column = ascii_uppercase[len(headers) - 1] + cell_range = f"A1:{end_column}1" + sheet_tab.format(cell_range, {'textFormat': {'bold': True}}) + + def _write_rows(self, sheet_tab, headers, csv_data): + """ + Write rows to the worksheet after headers + + Args: + sheet_tab: The worksheet object + headers: The headers of the worksheet + csv_data: The data to be written in the worksheet, as a list of dictionaries, where + each dictionary represents a row + """ + for row in csv_data: + sheet_tab.append_row( + [ + ( + row.get(header).replace('\"', '\"\"') # double quote escape to preserve " in values + if isinstance(row.get(header), str) + else row.get(header) + ) + for header in headers + ] + ) + + def write_data(self, config, csv_headers, csv_data, overwrite): + """ + Write data to the google spread sheet + + Args: + config: The configuration for the google spread sheet + csv_headers: The headers of the data to be written in the worksheet + csv_data: The data to be written in the worksheet, as a list of dictionaries, where + each dictionary represents a row + overwrite: Whether to overwrite the existing data in the worksheet + """ + try: + spread_sheet = self.get_spread_sheet_by_key(config["SHEET_ID"]) + sheet_tab = self._get_or_create_worksheet( + spread_sheet, config["OUTPUT_TAB_ID"], len(csv_headers) + 1, len(csv_data) + 1 + ) + + if overwrite: + sheet_tab.clear() + + if csv_headers: + self._write_headers(sheet_tab, csv_headers) + + self._write_rows(sheet_tab, csv_headers, csv_data) + + logger.info( + f""" + [Spread Sheet Write Success]: Successfully written data to + sheet {config["SHEET_ID"]} tab {config["OUTPUT_TAB_ID"]} + """ + ) + except gspread.exceptions.GSpreadException as e: + logger.exception(f"[Spread Sheet Write Error]: GSpreadException occurred while writing sheet data: {e}") + except Exception as e: # pylint: disable=broad-except + logger.exception(f"[Spread Sheet Write Error]: Exception occurred while writing sheet data: {e}") + @staticmethod def get_worksheet_data_by_tab_id(spread_sheet, tab_id): try: diff --git a/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py b/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py new file mode 100644 index 0000000000..4da0a651a2 --- /dev/null +++ b/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py @@ -0,0 +1,184 @@ +import csv +import datetime +import logging + +from django.conf import settings +from django.core.management import BaseCommand, CommandError +from django.db.models import Prefetch + +from course_discovery.apps.course_metadata.gspread_client import GspreadClient +from course_discovery.apps.course_metadata.models import Course, CourseType, SubjectTranslation + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = """ + Populates Product Catalog for Salesforce Marketing Cloud Catalog + + Example usage: + python manage.py populate_product_catalog --product_type={product_type} --output_csv=/path/to/output.csv --product_source={product_source} + python manage.py populate_product_catalog --product_type={product_type} --product_source={product_source} --use_gspread_client=True --overwrite=True + """ + + CATALOG_CSV_HEADERS = [ + 'UUID', 'Title', 'Organizations Name', 'Organizations Logo', 'Organizations Abbr', 'Languages', + 'Subjects', 'Subjects Spanish', 'Marketing URL', 'Marketing Image' + ] + + def add_arguments(self, parser): + parser.add_argument( + '--product_type', + dest='product_type', + type=str, + required=False, + help='Product Type to populate in the catalog' + ) + parser.add_argument( + '--output_csv', + dest='output_csv', + type=str, + required=False, + help='Path of the output CSV' + ) + parser.add_argument( + '--product_source', + dest='product_source', + type=str, + required=False, + help='The product source to filter the products' + ) + parser.add_argument( + '--use_gspread_client', + dest='gspread_client_flag', + type=bool, + required=False, + help='Flag to use Gspread Client for writing data to Google Sheets' + ) + parser.add_argument( + '--overwrite', + dest='overwrite_flag', + type=bool, + default=True, + required=False, + help='Flag to overwrite the existing data in Google Sheet tab' + ) + + def get_products(self, product_type, product_source): + """ + Extract products from the DB for product catalog + """ + ocm_course_catalog_types = [ + CourseType.AUDIT, CourseType.VERIFIED_AUDIT, CourseType.PROFESSIONAL, CourseType.CREDIT_VERIFIED_AUDIT, + 'verified', 'spoc-verified-audit' + ] + + if (product_type := product_type.lower()) in ['executive_education', 'bootcamp', 'ocm_course']: + queryset = Course.objects.available() + + if product_type == 'ocm_course': + queryset = queryset.filter(type__slug__in=ocm_course_catalog_types) + + elif product_type == 'executive_education': + queryset = queryset.filter(type__slug=CourseType.EXECUTIVE_EDUCATION_2U) + + elif product_type == 'bootcamp': + queryset = queryset.filter(type__slug=CourseType.BOOTCAMP_2U) + + if product_source: + queryset = queryset.filter(product_source__slug=product_source) + + # Prefetch Spanish translations of subjects + subject_translations = Prefetch( + 'subjects__translations', + queryset=SubjectTranslation.objects.filter(language_code='es'), + to_attr='spanish_translations' + ) + + return queryset.prefetch_related( + 'authoring_organizations', + 'subjects', + subject_translations + ) + else: + # Return empty queryset if invalid product type specified + return Course.objects.none() + + def write_csv_header(self, output_csv): + """ + Write the header of output CSV in the file. + """ + writer = csv.DictWriter(output_csv, fieldnames=self.CATALOG_CSV_HEADERS) + writer.writeheader() + return writer + + def get_transformed_data(self, product): + """ + Transforms the product data for product's catalog + """ + authoring_orgs = product.authoring_organizations.all() + return { + "UUID": str(product.uuid), + "Title": product.title, + "Organizations Name": ", ".join(org.name for org in authoring_orgs), + "Organizations Logo": ", ".join( + org.logo_image.url for org in authoring_orgs if org.logo_image + ), + "Organizations Abbr": ", ".join(org.key for org in authoring_orgs), + "Languages": product.languages_codes, + "Subjects": ", ".join(subject.name for subject in product.subjects.all()), + "Subjects Spanish": ", ".join( + translation.name for subject in product.subjects.all() + for translation in subject.spanish_translations + ), + "Marketing URL": product.marketing_url, + "Marketing Image": (product.image.url if product.image else ""), + } + + def handle(self, *args, **options): + product_type = options.get('product_type') + output_csv = options.get('output_csv') + product_source = options.get('product_source') + gspread_client_flag = options.get('gspread_client_flag') + overwrite = options.get('overwrite_flag') + PRODUCT_CATALOG_CONFIG = { + 'SHEET_ID': settings.PRODUCT_CATALOG_SHEET_ID, + 'OUTPUT_TAB_ID': ( + product_type.upper() + ('_' + datetime.datetime.now().strftime("%Y%m%d") if not overwrite else '') + if product_type else 'All' + ), + } + + gspread_client = GspreadClient() + + try: + products = self.get_products(product_type, product_source) + if not products.exists(): + raise CommandError('No products found for the given criteria.') + products_count = products.count() + + logger.info(f'Fetched {products_count} courses from the database') + if output_csv: + with open(output_csv, 'w', newline='') as output_file: + output_writer = self.write_csv_header(output_file) + for product in products: + try: + output_writer.writerow(self.get_transformed_data(product)) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error(f"Error writing product {product.uuid} to CSV: {str(e)}") + continue + + logger.info(f'Populated {products_count} {product_type}s to {output_csv}') + + elif gspread_client_flag: + csv_data = [self.get_transformed_data(product) for product in products] + gspread_client.write_data( + PRODUCT_CATALOG_CONFIG, + self.CATALOG_CSV_HEADERS, + csv_data, + overwrite=overwrite, + ) + logger.info(f'Populated {products_count} {product_type}s to Google Sheets') + + except Exception as e: + raise CommandError(f'Error while populating product catalog: {str(e)}') from e diff --git a/course_discovery/apps/course_metadata/management/commands/tests/test_populate_product_catalog.py b/course_discovery/apps/course_metadata/management/commands/tests/test_populate_product_catalog.py new file mode 100644 index 0000000000..95463a385c --- /dev/null +++ b/course_discovery/apps/course_metadata/management/commands/tests/test_populate_product_catalog.py @@ -0,0 +1,161 @@ +""" +Unit tests for populate_product_catalog management command. +""" +import csv +from tempfile import NamedTemporaryFile + +import mock +from django.core.management import CommandError, call_command +from django.test import TestCase + +from course_discovery.apps.course_metadata.choices import CourseRunStatus +from course_discovery.apps.course_metadata.management.commands.populate_product_catalog import Command +from course_discovery.apps.course_metadata.models import Course, CourseType +from course_discovery.apps.course_metadata.tests.factories import ( + CourseFactory, CourseRunFactory, CourseTypeFactory, PartnerFactory, SeatFactory, SourceFactory +) + + +class PopulateProductCatalogCommandTests(TestCase): + def setUp(self): + super().setUp() + self.partner = PartnerFactory.create() + self.course_type = CourseTypeFactory(slug=CourseType.AUDIT) + self.source = SourceFactory.create(slug="edx") + self.courses = CourseFactory.create_batch( + 2, + product_source=self.source, + partner=self.partner, + additional_metadata=None, + type=self.course_type, + ) + self.course_run = CourseRunFactory( + course=Course.objects.all()[0], + status=CourseRunStatus.Published, + ) + self.seat = SeatFactory.create(course_run=self.course_run) + self.course_run_2 = CourseRunFactory.create_batch( + 2, course=Course.objects.all()[1] + ) + + def test_populate_product_catalog(self): + """ + Test populate_product_catalog command and verify data has been populated successfully + """ + with NamedTemporaryFile(mode="w", delete=False) as output_csv: + call_command( + "populate_product_catalog", + product_type="ocm_course", + output_csv=output_csv.name, + product_source="edx", + gspread_client_flag=False, + ) + + with open(output_csv.name, "r") as output_csv_file: + csv_reader = csv.DictReader(output_csv_file) + for row in csv_reader: + self.assertIn("UUID", row) + self.assertIn("Title", row) + self.assertIn("Organizations Name", row) + self.assertIn("Organizations Logo", row) + self.assertIn("Organizations Abbr", row) + self.assertIn("Languages", row) + self.assertIn("Subjects", row) + self.assertIn("Subjects Spanish", row) + self.assertIn("Marketing URL", row) + self.assertIn("Marketing Image", row) + + @mock.patch( + "course_discovery.apps.course_metadata.management.commands.populate_product_catalog.Command.get_products" + ) + def test_get_products_with_product_type(self, mock_get_products): + """ + Test that the get_products method is called correctly when a specific product type is provided. + """ + command = Command() + command.get_products("executive_education", None) + + mock_get_products.assert_called_once_with("executive_education", None) + + def test_handle_no_products_found(self): + """ + Test to ensure CommandError is raised when no products are found. + """ + with self.assertRaises(CommandError) as cm: + call_command("populate_product_catalog", product_type="bootcamp") + + self.assertEqual( + str(cm.exception), + "Error while populating product catalog: No products found for the given criteria.", + ) + + @mock.patch( + "course_discovery.apps.course_metadata.management.commands.populate_product_catalog.csv.DictWriter" + ) + def test_write_csv_header(self, mock_dict_writer): + """ + Test that the CSV header is written correctly. + """ + mock_output_file = mock.Mock() + + command = Command() + writer = command.write_csv_header(mock_output_file) + + mock_dict_writer.assert_called_once_with( + mock_output_file, fieldnames=command.CATALOG_CSV_HEADERS + ) + # pylint: disable=no-member + writer.writeheader.assert_called_once() + + def test_get_transformed_data(self): + """ + Verify get_transformed_data method is working correctly + """ + product = self.courses[0] + command = Command() + product_authoring_orgs = product.authoring_organizations.all() + transformed_prod_data = command.get_transformed_data(product) + assert transformed_prod_data == { + "UUID": str(product.uuid), + "Title": product.title, + "Organizations Name": ", ".join( + org.name for org in product_authoring_orgs + ), + "Organizations Logo": ", ".join( + org.logo_image.url + for org in product_authoring_orgs + if org.logo_image + ), + "Organizations Abbr": ", ".join( + org.key for org in product_authoring_orgs + ), + "Languages": product.languages_codes, + "Subjects": ", ".join(subject.name for subject in product.subjects.all()), + "Subjects Spanish": ", ".join( + translation.name + for subject in product.subjects.all() + for translation in subject.spanish_translations + ), + "Marketing URL": product.marketing_url, + "Marketing Image": (product.image.url if product.image else ""), + } + + @mock.patch('course_discovery.apps.course_metadata.management.commands.populate_product_catalog.GspreadClient') + @mock.patch( + 'course_discovery.apps.course_metadata.management.commands.populate_product_catalog.Command.get_products' + ) + def test_handle_gspread_client(self, mock_get_products, mock_gspread_client): + """ + Ensure GspreadClient is used to write product data when the flag is set. + """ + mock_get_products.return_value.exists.return_value = True + mock_get_products.return_value.count.return_value = 1 + mock_get_products.return_value.__iter__.return_value = [mock.MagicMock()] + + mock_client_instance = mock_gspread_client.return_value + mock_client_instance.write_data.return_value = None + + call_command('populate_product_catalog', product_type='ocm_course', use_gspread_client=True) + + mock_gspread_client.assert_called_once() + mock_client_instance.write_data.assert_called() diff --git a/course_discovery/apps/course_metadata/models.py b/course_discovery/apps/course_metadata/models.py index 4bddf9aa24..4959e01571 100644 --- a/course_discovery/apps/course_metadata/models.py +++ b/course_discovery/apps/course_metadata/models.py @@ -1709,6 +1709,15 @@ def languages(self, exclude_inactive_runs=False): if course_run.language is not None }) + @property + def languages_codes(self): + """ + Returns a string of languages codes used in this course. The languages codes are separated by comma. + This property will ignore restricted runs and course runs with no language set. + """ + filtered_course_runs = self.active_course_runs.filter(language__isnull=False, restricted_run__isnull=True) + return ','.join(course_run.language.code for course_run in filtered_course_runs) + @property def first_enrollable_paid_seat_price(self): """ diff --git a/course_discovery/apps/course_metadata/tests/test_gspread.py b/course_discovery/apps/course_metadata/tests/test_gspread.py index 2c1f04b252..8f56d1d352 100644 --- a/course_discovery/apps/course_metadata/tests/test_gspread.py +++ b/course_discovery/apps/course_metadata/tests/test_gspread.py @@ -29,3 +29,91 @@ def test_get_worksheet_data_by_tab_id(self, _mock_gspread_connection, mock_logge spread_sheet.worksheets.return_value = [] client.get_worksheet_data_by_tab_id(spread_sheet, '123456') mock_logger.error.assert_called_with('[Worksheet Not Found]: No worksheet found with id: 123456') + + @mock.patch('course_discovery.apps.course_metadata.gspread_client.GspreadClient.get_spread_sheet_by_key') + @mock.patch('course_discovery.apps.course_metadata.gspread_client.GspreadClient.get_worksheet_data_by_tab_id') + @mock.patch('course_discovery.apps.course_metadata.gspread_client.logger') + def test_read_data(self, _mock_logger, mock_get_worksheet_data_by_tab_id, mock_get_spread_sheet_by_key): + """ + Test read_data method of Gspread client with mock data + """ + mock_spreadsheet = mock.Mock() + mock_worksheet_data = [{'header1': 'value1', 'header2': 'value2'}] + mock_get_spread_sheet_by_key.return_value = mock_spreadsheet + mock_get_worksheet_data_by_tab_id.return_value = mock_worksheet_data + + client = GspreadClient() + config = {'SHEET_ID': 'sheet_id', 'INPUT_TAB_ID': 'input_tab_id'} + result = client.read_data(config) + + mock_get_spread_sheet_by_key.assert_called_once_with('sheet_id') + mock_get_worksheet_data_by_tab_id.assert_called_once_with(mock_spreadsheet, 'input_tab_id') + self.assertEqual(result, mock_worksheet_data) + + @mock.patch( + "course_discovery.apps.course_metadata.gspread_client.ascii_uppercase", + new=list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), + ) + def test_write_headers(self): + """ + Test write_headers method of Gspread client + """ + mock_sheet_tab = mock.Mock() + headers = ['header1', 'header2'] + + client = GspreadClient() + client._write_headers(mock_sheet_tab, headers) # pylint: disable=protected-access + + mock_sheet_tab.append_row.assert_called_once_with(headers) + mock_sheet_tab.format.assert_called_once_with('A1:B1', {'textFormat': {'bold': True}}) + + def test_write_rows(self): + """ + Test write_rows method of Gspread client + """ + mock_sheet_tab = mock.Mock() + headers = ['header1', 'header2'] + csv_data = [{'header1': 'value1', 'header2': 'value2'}, {'header1': 'value3', 'header2': 'value4'}] + + client = GspreadClient() + client._write_rows(mock_sheet_tab, headers, csv_data) # pylint: disable=protected-access + + mock_sheet_tab.append_row.assert_any_call(['value1', 'value2']) + mock_sheet_tab.append_row.assert_any_call(['value3', 'value4']) + self.assertEqual(mock_sheet_tab.append_row.call_count, 2) + + @mock.patch('course_discovery.apps.course_metadata.gspread_client.GspreadClient._get_or_create_worksheet') + @mock.patch('course_discovery.apps.course_metadata.gspread_client.GspreadClient._write_headers') + @mock.patch('course_discovery.apps.course_metadata.gspread_client.GspreadClient._write_rows') + @mock.patch('course_discovery.apps.course_metadata.gspread_client.GspreadClient.get_spread_sheet_by_key') + @mock.patch('course_discovery.apps.course_metadata.gspread_client.logger') + def test_write_data( + self, + _mock_logger, + mock_get_spread_sheet_by_key, + mock_write_rows, + mock_write_headers, + mock_get_or_create_worksheet, + ): + """ + Test write_data method of Gspread client with mock data + """ + mock_spreadsheet = mock.Mock() + mock_sheet_tab = mock.Mock() + mock_get_spread_sheet_by_key.return_value = mock_spreadsheet + mock_get_or_create_worksheet.return_value = mock_sheet_tab + + client = GspreadClient() + config = {"SHEET_ID": "sheet_id", "OUTPUT_TAB_ID": "output_tab_id"} + csv_headers = ["header1", "header2"] + csv_data = [{"header1": "value1", "header2": "value2"}] + + client.write_data(config, csv_headers, csv_data, overwrite=True) + + mock_get_spread_sheet_by_key.assert_called_once_with("sheet_id") + mock_get_or_create_worksheet.assert_called_once_with( + mock_spreadsheet, "output_tab_id", len(csv_headers) + 1, len(csv_data) + 1 + ) + mock_sheet_tab.clear.assert_called_once() + mock_write_headers.assert_called_once_with(mock_sheet_tab, csv_headers) + mock_write_rows.assert_called_once_with(mock_sheet_tab, csv_headers, csv_data) diff --git a/course_discovery/settings/base.py b/course_discovery/settings/base.py index a2c4fe836a..6321638ddd 100644 --- a/course_discovery/settings/base.py +++ b/course_discovery/settings/base.py @@ -672,6 +672,8 @@ 'CLIENT_X509_CERT_URL': '' } +PRODUCT_CATALOG_SHEET_ID = '' + GETSMARTER_CLIENT_CREDENTIALS = { 'CLIENT_ID': '', 'CLIENT_SECRET': '',