diff --git a/course_discovery/apps/course_metadata/constants.py b/course_discovery/apps/course_metadata/constants.py index 4d709c47a2..959cf1cbb0 100644 --- a/course_discovery/apps/course_metadata/constants.py +++ b/course_discovery/apps/course_metadata/constants.py @@ -158,7 +158,7 @@ class PathwayType(Enum): 'bootcamp' : ['bootcamp-2u'], } -SNOWFLAKE_POPULATE_PRODUCT_CATALOG_QUERY = """ +SNOWFLAKE_POPULATE_PRODUCT_COURSES_CATALOG_QUERY = """ WITH course_data AS ( SELECT c.id, c.uuid as COURSE_UUID, @@ -251,3 +251,88 @@ class PathwayType(Enum): (is_upcoming = 'True') OR (is_enrollable = 'True' AND has_ended != 'True' AND is_marketable = 'True'); """ + +SNOWFLAKE_POPULATE_PRODUCT_DEGREES_CATALOG_QUERY = """ + SELECT + cmp.uuid, + cmp.title, + cmp.status, + cmp.marketing_slug, + cmpt.slug AS program_type_slug, + product_source.slug AS PRODUCT_SOURCE, + CASE + WHEN cmd.program_ptr_id IS NULL THEN 0 + ELSE 1 + END AS is_degree, + CASE + WHEN POSITION('/', cmp.marketing_slug) = 0 THEN CONCAT( + cp.marketing_site_url_root, + cmpt.slug, + '/', + cmp.marketing_slug + ) + ELSE CONCAT(cp.marketing_site_url_root, cmp.marketing_slug) + END AS marketing_url, + COALESCE( + LISTAGG(DISTINCT cmo.name, ', ') WITHIN GROUP (ORDER BY cmo.name ASC), + '' + ) AS authoring_organizations, + + -- Concatenating primary subject with other subjects using LISTAGG + CONCAT_WS( + ', ', + cms_primary.slug, + LISTAGG(DISTINCT cms.slug, ', ') WITHIN GROUP (ORDER BY cms.slug ASC) + ) AS subjects, + + CASE + WHEN cmlt_override.id IS NULL THEN COALESCE( + LISTAGG(DISTINCT cmlt.name, ', ') WITHIN GROUP (ORDER BY cmlt.name ASC), + '' + ) + ELSE cmlt_override.name + END AS level_types, + + COALESCE( + LISTAGG(DISTINCT ts.name, ', ') WITHIN GROUP (ORDER BY ts.name ASC), + '' + ) AS skills + + FROM discovery.course_metadata_program cmp + INNER JOIN discovery.core_partner cp ON cp.id = cmp.partner_id + INNER JOIN discovery.course_metadata_programtype cmpt ON cmpt.id = cmp.type_id + LEFT JOIN discovery.course_metadata_degree cmd ON cmp.id = cmd.program_ptr_id + LEFT JOIN discovery.course_metadata_degreeadditionalmetadata cmdam ON cmd.program_ptr_id = cmdam.degree_id + LEFT JOIN discovery.course_metadata_program_authoring_organizations cmpao ON cmpao.program_id = cmp.id + LEFT JOIN discovery.course_metadata_organization cmo ON cmo.id = cmpao.organization_id + LEFT JOIN discovery.course_metadata_program_courses cmpc ON cmpc.program_id = cmp.id + LEFT JOIN discovery.course_metadata_course cmc ON cmc.id = cmpc.course_id + LEFT JOIN discovery.course_metadata_leveltype cmlt ON cmlt.id = cmc.level_type_id + LEFT JOIN discovery.course_metadata_course_subjects cmcs ON cmcs.course_id = cmc.id + LEFT JOIN discovery.course_metadata_subject cms ON cms.id = cmcs.subject_id + LEFT JOIN discovery.taxonomy_courseskills tcs ON tcs.course_key = cmc.key + LEFT JOIN discovery.taxonomy_skill ts ON ts.id = tcs.skill_id + LEFT JOIN discovery.course_metadata_subject cms_primary ON cms_primary.id = cmp.primary_subject_override_id + LEFT JOIN discovery.course_metadata_leveltype cmlt_override ON cmlt_override.id = cmp.level_type_override_id + join discovery.course_metadata_source as PRODUCT_SOURCE on cmp.product_source_id = PRODUCT_SOURCE.id + WHERE cmp.partner_id = 1 and status = 'active' {product_source_filter} + GROUP BY + cmp.uuid, + cmp.title, + cmp.status, + cmp.marketing_slug, + cmpt.slug, + cp.name, + cmd.program_ptr_id, + cp.marketing_site_url_root, + CMLT_OVERRIDE.ID, + CMLT_OVERRIDE.NAME, + product_source.slug, + cms_primary.slug -- Added cms_primary.slug to the GROUP BY clause + HAVING + IS_DEGREE = 1; +""" +SNOWFLAKE_POPULATE_PRODUCT_CATALOG_QUERY = { + 'course' : SNOWFLAKE_POPULATE_PRODUCT_COURSES_CATALOG_QUERY, + 'degree': SNOWFLAKE_POPULATE_PRODUCT_DEGREES_CATALOG_QUERY, +} diff --git a/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py b/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py index 89a47684a1..205e5fbb58 100644 --- a/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py +++ b/course_discovery/apps/course_metadata/management/commands/populate_product_catalog.py @@ -86,16 +86,20 @@ def get_products_via_snowflake(self, product_type='ocm_course', product_source=N database='prod' ) cs = snowflake_client.cursor() - course_types = ', '.join([f"'{ct}'" for ct in COURSE_TYPES.get(product_type, [])]) - product_source_filter = f"AND product_source.slug='{product_source}'" if product_source else '' + course_types = ', '.join(f"'{ct}'" for ct in COURSE_TYPES.get(product_type, [])) + product_source_list = product_source.split(',') if product_source else [] + product_source_filter = ( + f"AND product_source IN ({', '.join(map(repr, product_source_list))})" + if product_source_list else '' + ) + query_type = 'course' if product_type in ['executive_education', 'bootcamp', 'ocm_course'] else 'degree' rows = [] try: - cs.execute( - SNOWFLAKE_POPULATE_PRODUCT_CATALOG_QUERY.format( - course_types=course_types, - product_source_filter=product_source_filter - ) + query = SNOWFLAKE_POPULATE_PRODUCT_CATALOG_QUERY[query_type].format( + course_types=course_types, + product_source_filter=product_source_filter ) + cs.execute(query) rows = cs.fetchall() except Exception as e: logger.error('Error while fetching products from Snowflake for product catalog: %s', str(e))