feat: add management command for SFMC Course Catalog POC' (#4409)

openedx · Aug 28, 2024 · 83681de · 83681de
1 parent 5c8ceb1
commit 83681de
Show file tree

Hide file tree

Showing 6 changed files with 535 additions and 0 deletions.
diff --git a/course_discovery/apps/course_metadata/gspread_client.py b/course_discovery/apps/course_metadata/gspread_client.py
@@ -1,4 +1,5 @@
 import logging
+from string import ascii_uppercase
 
 import gspread
 from django.conf import settings
@@ -37,6 +38,96 @@ def read_data(self, config):
             logger.exception('[Spread Sheet Read Error]: Exception occurred while reading sheet data')
         return None
 
+    def _get_or_create_worksheet(self, spread_sheet, tab_id, cols, rows):
+        """
+        Get or create a worksheet with the given tab_id in the given spread_sheet
+
+        Args:
+            spread_sheet: The spread sheet object
+            tab_id: The tab id of the worksheet
+            cols: The number of columns in the worksheet
+            rows: The number of rows in the worksheet
+        """
+        try:
+            return spread_sheet.worksheet(tab_id)
+        except gspread.exceptions.WorksheetNotFound:
+            return spread_sheet.add_worksheet(
+                title=tab_id,
+                rows=rows,
+                cols=cols,
+            )
+
+    def _write_headers(self, sheet_tab, headers):
+        """
+        Write headers to the first row of the worksheet
+
+        Args:
+            sheet_tab: The worksheet object
+            headers: The headers of the worksheet
+        """
+        sheet_tab.append_row(headers)
+        end_column = ascii_uppercase[len(headers) - 1]
+        cell_range = f"A1:{end_column}1"
+        sheet_tab.format(cell_range, {'textFormat': {'bold': True}})
+
+    def _write_rows(self, sheet_tab, headers, csv_data):
+        """
+        Write rows to the worksheet after headers
+
+        Args:
+            sheet_tab: The worksheet object
+            headers: The headers of the worksheet
+            csv_data: The data to be written in the worksheet, as a list of dictionaries, where
+            each dictionary represents a row
+        """
+        for row in csv_data:
+            sheet_tab.append_row(
+                [
+                    (
+                        row.get(header).replace('\"', '\"\"')  # double quote escape to preserve " in values
+                        if isinstance(row.get(header), str)
+                        else row.get(header)
+                    )
+                    for header in headers
+                ]
+            )
+
+    def write_data(self, config, csv_headers, csv_data, overwrite):
+        """
+        Write data to the google spread sheet
+
+        Args:
+            config: The configuration for the google spread sheet
+            csv_headers: The headers of the data to be written in the worksheet
+            csv_data: The data to be written in the worksheet, as a list of dictionaries, where
+            each dictionary represents a row
+            overwrite: Whether to overwrite the existing data in the worksheet
+        """
+        try:
+            spread_sheet = self.get_spread_sheet_by_key(config["SHEET_ID"])
+            sheet_tab = self._get_or_create_worksheet(
+                spread_sheet, config["OUTPUT_TAB_ID"], len(csv_headers) + 1, len(csv_data) + 1
+            )
+
+            if overwrite:
+                sheet_tab.clear()
+
+            if csv_headers:
+                self._write_headers(sheet_tab, csv_headers)
+
+            self._write_rows(sheet_tab, csv_headers, csv_data)
+
+            logger.info(
+                f"""
+                    [Spread Sheet Write Success]: Successfully written data to
+                    sheet {config["SHEET_ID"]} tab {config["OUTPUT_TAB_ID"]}
+                """
+            )
+        except gspread.exceptions.GSpreadException as e:
+            logger.exception(f"[Spread Sheet Write Error]: GSpreadException occurred while writing sheet data: {e}")
+        except Exception as e:  # pylint: disable=broad-except
+            logger.exception(f"[Spread Sheet Write Error]: Exception occurred while writing sheet data: {e}")
+
     @staticmethod
     def get_worksheet_data_by_tab_id(spread_sheet, tab_id):
         try:

diff --git a/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py b/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py
@@ -0,0 +1,184 @@
+import csv
+import datetime
+import logging
+
+from django.conf import settings
+from django.core.management import BaseCommand, CommandError
+from django.db.models import Prefetch
+
+from course_discovery.apps.course_metadata.gspread_client import GspreadClient
+from course_discovery.apps.course_metadata.models import Course, CourseType, SubjectTranslation
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    help = """
+    Populates Product Catalog for Salesforce Marketing Cloud Catalog
+
+    Example usage:
+    python manage.py populate_product_catalog --product_type={product_type} --output_csv=/path/to/output.csv --product_source={product_source}
+    python manage.py populate_product_catalog --product_type={product_type} --product_source={product_source} --use_gspread_client=True --overwrite=True
+    """
+
+    CATALOG_CSV_HEADERS = [
+        'UUID', 'Title', 'Organizations Name', 'Organizations Logo', 'Organizations Abbr', 'Languages',
+        'Subjects', 'Subjects Spanish', 'Marketing URL', 'Marketing Image'
+    ]
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            '--product_type',
+            dest='product_type',
+            type=str,
+            required=False,
+            help='Product Type to populate in the catalog'
+        )
+        parser.add_argument(
+            '--output_csv',
+            dest='output_csv',
+            type=str,
+            required=False,
+            help='Path of the output CSV'
+        )
+        parser.add_argument(
+            '--product_source',
+            dest='product_source',
+            type=str,
+            required=False,
+            help='The product source to filter the products'
+        )
+        parser.add_argument(
+            '--use_gspread_client',
+            dest='gspread_client_flag',
+            type=bool,
+            required=False,
+            help='Flag to use Gspread Client for writing data to Google Sheets'
+        )
+        parser.add_argument(
+            '--overwrite',
+            dest='overwrite_flag',
+            type=bool,
+            default=True,
+            required=False,
+            help='Flag to overwrite the existing data in Google Sheet tab'
+        )
+
+    def get_products(self, product_type, product_source):
+        """
+        Extract products from the DB for product catalog
+        """
+        ocm_course_catalog_types = [
+            CourseType.AUDIT, CourseType.VERIFIED_AUDIT, CourseType.PROFESSIONAL, CourseType.CREDIT_VERIFIED_AUDIT,
+            'verified', 'spoc-verified-audit'
+        ]
+
+        if (product_type := product_type.lower()) in ['executive_education', 'bootcamp', 'ocm_course']:
+            queryset = Course.objects.available()
+
+            if product_type == 'ocm_course':
+                queryset = queryset.filter(type__slug__in=ocm_course_catalog_types)
+
+            elif product_type == 'executive_education':
+                queryset = queryset.filter(type__slug=CourseType.EXECUTIVE_EDUCATION_2U)
+
+            elif product_type == 'bootcamp':
+                queryset = queryset.filter(type__slug=CourseType.BOOTCAMP_2U)
+
+            if product_source:
+                queryset = queryset.filter(product_source__slug=product_source)
+
+            # Prefetch Spanish translations of subjects
+            subject_translations = Prefetch(
+                'subjects__translations',
+                queryset=SubjectTranslation.objects.filter(language_code='es'),
+                to_attr='spanish_translations'
+            )
+
+            return queryset.prefetch_related(
+                'authoring_organizations',
+                'subjects',
+                subject_translations
+            )
+        else:
+            # Return empty queryset if invalid product type specified
+            return Course.objects.none()
+
+    def write_csv_header(self, output_csv):
+        """
+        Write the header of output CSV in the file.
+        """
+        writer = csv.DictWriter(output_csv, fieldnames=self.CATALOG_CSV_HEADERS)
+        writer.writeheader()
+        return writer
+
+    def get_transformed_data(self, product):
+        """
+        Transforms the product data for product's catalog
+        """
+        authoring_orgs = product.authoring_organizations.all()
+        return {
+            "UUID": str(product.uuid),
+            "Title": product.title,
+            "Organizations Name": ", ".join(org.name for org in authoring_orgs),
+            "Organizations Logo": ", ".join(
+                org.logo_image.url for org in authoring_orgs if org.logo_image
+            ),
+            "Organizations Abbr": ", ".join(org.key for org in authoring_orgs),
+            "Languages": product.languages_codes,
+            "Subjects": ", ".join(subject.name for subject in product.subjects.all()),
+            "Subjects Spanish": ", ".join(
+                translation.name for subject in product.subjects.all()
+                for translation in subject.spanish_translations
+            ),
+            "Marketing URL": product.marketing_url,
+            "Marketing Image": (product.image.url if product.image else ""),
+        }
+
+    def handle(self, *args, **options):
+        product_type = options.get('product_type')
+        output_csv = options.get('output_csv')
+        product_source = options.get('product_source')
+        gspread_client_flag = options.get('gspread_client_flag')
+        overwrite = options.get('overwrite_flag')
+        PRODUCT_CATALOG_CONFIG = {
+            'SHEET_ID': settings.PRODUCT_CATALOG_SHEET_ID,
+            'OUTPUT_TAB_ID': (
+                product_type.upper() + ('_' + datetime.datetime.now().strftime("%Y%m%d") if not overwrite else '')
+                if product_type else 'All'
+            ),
+        }
+
+        gspread_client = GspreadClient()
+
+        try:
+            products = self.get_products(product_type, product_source)
+            if not products.exists():
+                raise CommandError('No products found for the given criteria.')
+            products_count = products.count()
+
+            logger.info(f'Fetched {products_count} courses from the database')
+            if output_csv:
+                with open(output_csv, 'w', newline='') as output_file:
+                    output_writer = self.write_csv_header(output_file)
+                    for product in products:
+                        try:
+                            output_writer.writerow(self.get_transformed_data(product))
+                        except Exception as e:  # pylint: disable=broad-exception-caught
+                            logger.error(f"Error writing product {product.uuid} to CSV: {str(e)}")
+                            continue
+
+                    logger.info(f'Populated {products_count} {product_type}s to {output_csv}')
+
+            elif gspread_client_flag:
+                csv_data = [self.get_transformed_data(product) for product in products]
+                gspread_client.write_data(
+                    PRODUCT_CATALOG_CONFIG,
+                    self.CATALOG_CSV_HEADERS,
+                    csv_data,
+                    overwrite=overwrite,
+                )
+                logger.info(f'Populated {products_count} {product_type}s to Google Sheets')
+
+        except Exception as e:
+            raise CommandError(f'Error while populating product catalog: {str(e)}') from e