Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow enrollment api admin to see all enrollments #1714

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ Change Log
Unreleased
----------

[4.10.0]
--------

feat: enrollment API enhancements

- Allows Enrollment API Admin to see all enrollments.
- Makes the endpoint return more fields, such as: enrollment_track,
enrollment_date, user_email, course_start and course_end.
- Changes EnterpriseCourseEnrollment's default ordering from 'created'
to 'id', which equivalent, but faster in some cases (due to the
existing indes on 'id').

[4.9.5]
--------

Expand Down
2 changes: 1 addition & 1 deletion enterprise/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Your project description goes here.
"""

__version__ = "4.9.5"
__version__ = "4.10.0"
32 changes: 31 additions & 1 deletion enterprise/api/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from django.contrib import auth

from enterprise.models import EnterpriseCustomerUser, SystemWideEnterpriseUserRoleAssignment
from enterprise.models import EnterpriseCustomer, EnterpriseCustomerUser, SystemWideEnterpriseUserRoleAssignment

User = auth.get_user_model()

Expand All @@ -33,6 +33,36 @@ def filter_queryset(self, request, queryset, view):
return queryset


class EnterpriseCourseEnrollmentFilterBackend(filters.BaseFilterBackend):
"""
Filter backend to return enrollments under the user's enterprise(s) only.

* Staff users will bypass this filter.
* Non-staff users will receive enrollments under their linked enterprises,
only if they have the `enterprise.can_enroll_learners` permission.
* Non-staff users without the `enterprise.can_enroll_learners` permission
will receive only their own enrollments.
"""

def filter_queryset(self, request, queryset, view):
"""
Filter out enrollments if learner is not linked
"""

if request.user.is_staff:
return queryset

if request.user.has_perm('enterprise.can_enroll_learners'):
enterprise_customers = EnterpriseCustomer.objects.filter(enterprise_customer_users__user_id=request.user.id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the idea here is that the user can be associated with multiple enterprises and you want to get all the enterprises the user is associated with and then all users in all those enterprises. Is that right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, @feanil. If a user has the enterprise.can_enroll_learners permission, that it should be able to see all enrollments from all enterprises it is associated with unless some specific filter is specified.

return queryset.filter(enterprise_customer_user__enterprise_customer__in=enterprise_customers)

filter_kwargs = {
view.USER_ID_FILTER: request.user.id,
}

return queryset.filter(**filter_kwargs)


class EnterpriseCustomerUserFilterBackend(filters.BaseFilterBackend):
"""
Allow filtering on the enterprise customer user api endpoint.
Expand Down
26 changes: 26 additions & 0 deletions enterprise/api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,32 @@ class Meta:
)


class EnterpriseCourseEnrollmentWithAdditionalFieldsReadOnlySerializer(EnterpriseCourseEnrollmentReadOnlySerializer):
"""
Serializer for EnterpriseCourseEnrollment model with additional fields.
"""

class Meta:
model = models.EnterpriseCourseEnrollment
fields = (
'enterprise_customer_user',
'course_id',
adamstankiewicz marked this conversation as resolved.
Show resolved Hide resolved
'created',
'unenrolled_at',
'enrollment_date',
'enrollment_track',
'user_email',
'course_start',
'course_end',
)

enrollment_track = serializers.CharField()
enrollment_date = serializers.DateTimeField()
user_email = serializers.EmailField()
course_start = serializers.DateTimeField()
course_end = serializers.DateTimeField()


class EnterpriseCourseEnrollmentWriteSerializer(serializers.ModelSerializer):
"""
Serializer for writing to the EnterpriseCourseEnrollment model.
Expand Down
63 changes: 61 additions & 2 deletions enterprise/api/v1/views/enterprise_course_enrollment.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,68 @@
"""
Views for the ``enterprise-course-enrollment`` API endpoint.
"""
from django_filters.rest_framework import DjangoFilterBackend
from edx_rest_framework_extensions.paginators import DefaultPagination
from rest_framework import filters

from django.core.paginator import Paginator
from django.utils.functional import cached_property

from enterprise import models
from enterprise.api.filters import EnterpriseCourseEnrollmentFilterBackend
from enterprise.api.v1 import serializers
from enterprise.api.v1.views.base_views import EnterpriseReadWriteModelViewSet

try:
from common.djangoapps.util.query import read_replica_or_default
except ImportError:
def read_replica_or_default():
return None


class PaginatorWithOptimizedCount(Paginator):
"""
Django < 4.2 ORM doesn't strip unused annotations from count queries.

For example, if we execute this query:

Book.objects.annotate(Count('chapters')).count()

it will generate SQL like this:

SELECT COUNT(*) FROM (SELECT COUNT(...), ... FROM ...) subquery

This isn't optimal on its own, but it's not a big deal. However, this
becomes problematic when annotations use subqueries, because it's terribly
inefficient to execute the subquery for every row in the outer query.

This class overrides the count() method of Django's Paginator to strip
unused annotations from the query by requesting only the primary key
instead of all fields.

This is a temporary workaround until Django is updated to 4.2, which will
include a fix for this issue.

See https://code.djangoproject.com/ticket/32169 for more details.

TODO: remove this class once Django is updated to 4.2 or higher.
"""
@cached_property
def count(self):
return self.object_list.values("pk").count()


class EnterpriseCourseEnrollmentPagination(DefaultPagination):
django_paginator_class = PaginatorWithOptimizedCount


class EnterpriseCourseEnrollmentViewSet(EnterpriseReadWriteModelViewSet):
"""
API views for the ``enterprise-course-enrollment`` API endpoint.
"""

queryset = models.EnterpriseCourseEnrollment.objects.all()
queryset = models.EnterpriseCourseEnrollment.with_additional_fields.all()
filter_backends = (filters.OrderingFilter, DjangoFilterBackend, EnterpriseCourseEnrollmentFilterBackend)

USER_ID_FILTER = 'enterprise_customer_user__user_id'
FIELDS = (
Expand All @@ -20,10 +71,18 @@ class EnterpriseCourseEnrollmentViewSet(EnterpriseReadWriteModelViewSet):
filterset_fields = FIELDS
ordering_fields = FIELDS

pagination_class = EnterpriseCourseEnrollmentPagination

def get_queryset(self):
queryset = super().get_queryset()
if self.request.method == 'GET':
queryset = queryset.using(read_replica_or_default())
return queryset

def get_serializer_class(self):
"""
Use a special serializer for any requests that aren't read-only.
"""
if self.request.method in ('GET',):
return serializers.EnterpriseCourseEnrollmentReadOnlySerializer
return serializers.EnterpriseCourseEnrollmentWithAdditionalFieldsReadOnlySerializer
return serializers.EnterpriseCourseEnrollmentWriteSerializer
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2 on 2023-12-29 17:03

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('enterprise', '0197_auto_20231130_2239'),
]

operations = [
migrations.AlterModelOptions(
name='enterprisecourseenrollment',
options={'ordering': ['id']},
),
]
61 changes: 60 additions & 1 deletion enterprise/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@
except ImportError:
CourseEntitlement = None

try:
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
except ImportError:
CourseOverview = None

LOGGER = getEnterpriseLogger(__name__)
User = auth.get_user_model()
mark_safe_lazy = lazy(mark_safe, str)
Expand Down Expand Up @@ -1857,11 +1862,61 @@ def get_queryset(self):
"""
Override to return only those enrollment records for which learner is linked to an enterprise.
"""

return super().get_queryset().select_related('enterprise_customer_user').filter(
enterprise_customer_user__linked=True
)


class EnterpriseCourseEnrollmentWithAdditionalFieldsManager(models.Manager):
"""
Model manager for `EnterpriseCourseEnrollment`.
"""

def get_queryset(self):
"""
Override to return only those enrollment records for which learner is linked to an enterprise.
"""

return super().get_queryset().select_related('enterprise_customer_user').filter(
enterprise_customer_user__linked=True
).annotate(**self._get_additional_data_annotations())

def _get_additional_data_annotations(self):
"""
Return annotations with additional data for the queryset.
Additional fields are None in the test environment, where platform models are not available.
"""

if not CourseEnrollment or not CourseOverview:
return {
'enrollment_track': models.Value(None, output_field=models.CharField()),
'enrollment_date': models.Value(None, output_field=models.DateTimeField()),
'user_email': models.Value(None, output_field=models.EmailField()),
'course_start': models.Value(None, output_field=models.DateTimeField()),
'course_end': models.Value(None, output_field=models.DateTimeField()),
}

enrollment_subquery = CourseEnrollment.objects.filter(
user=models.OuterRef('enterprise_customer_user__user_id'),
course_id=models.OuterRef('course_id'),
)
user_subquery = auth.get_user_model().objects.filter(
id=models.OuterRef('enterprise_customer_user__user_id'),
).values('email')[:1]
course_subquery = CourseOverview.objects.filter(
id=models.OuterRef('course_id'),
)

return {
'enrollment_track': models.Subquery(enrollment_subquery.values('mode')[:1]),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a lot of subqueries to other models, what's the performance impact of doing this on every enrollment we return? Do you have a production system that is using this? Would it be possible to get an explain on the MySQL query that this runs?

Copy link
Contributor Author

@0x29a 0x29a Nov 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@feanil, I've done some tests.

Snippet I used to generate some fake data. Sorry, it's a bit messy. The total number of enterprise enrollments was more than 16000. Other entities are counted in thousands too.
import random
from uuid import uuid4
from django.db.utils import IntegrityError
from common.djangoapps.student.tests.factories import CourseEnrollmentFactory

from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from enterprise.models import EnterpriseCustomerUser

from enterprise.models import (
    EnterpriseCustomer,
)

from xmodule.modulestore.tests.factories import (
    CourseFactory as ModuleStoreCourseFactory,
    XMODULE_FACTORY_LOCK
)

from common.djangoapps.student.tests.factories import (
    CourseEnrollmentFactory,
    UserFactory,
)
from openedx.core.djangoapps.catalog.tests.factories import (
    CourseFactory,
    CourseRunFactory,
)
from openedx.features.enterprise_support.tests.factories import (
    EnterpriseCourseEnrollmentFactory,
    EnterpriseCustomerFactory,
    EnterpriseCustomerUserFactory,
)

# Attach 10000 enrollments to a single already existing customer
for x in range(10000):
    print(f"First run - {x}")
    try:
        user = UserFactory()
        XMODULE_FACTORY_LOCK.enable()
        modulestore_courses = [ModuleStoreCourseFactory() for _ in range(10)]
        course_runs = [CourseRunFactory(key=str(modulestore_course.id)) for modulestore_course in modulestore_courses]
        courses = [CourseFactory(course_runs=[course_run]) for course_run in course_runs]
        XMODULE_FACTORY_LOCK.disable()
        enterprise_customer = EnterpriseCustomer.objects.get(name="CBA")
        enterprise_customer_user = EnterpriseCustomerUserFactory(
            user_id=user.id,
            enterprise_customer=enterprise_customer
        )
        for modulestore_course in modulestore_courses:
            CourseEnrollmentFactory(
                is_active=True,
                course_id=modulestore_course.id,
                user=user
            )
            EnterpriseCourseEnrollmentFactory(
                course_id=modulestore_course.id,
                enterprise_customer_user=enterprise_customer_user
            )
    except Exception as e:
        print(e)

# Create many more other enterprise users
for x in range(10000):
    print(f"Second run - {x}")
    enterprise_uuid = uuid4()
    try:
        user = UserFactory()
        XMODULE_FACTORY_LOCK.enable()
        modulestore_course = ModuleStoreCourseFactory()
        course_run = CourseRunFactory(key=str(modulestore_course.id))
        course = CourseFactory(course_runs=[course_run])
        XMODULE_FACTORY_LOCK.disable()
        enterprise_customer = EnterpriseCustomerFactory(uuid=enterprise_uuid)
        enterprise_customer_user = EnterpriseCustomerUserFactory(
            user_id=user.id,
            enterprise_customer=enterprise_customer
        )
        CourseEnrollmentFactory(
            is_active=True,
            course_id=modulestore_course.id,
            user=user
        )
        EnterpriseCourseEnrollmentFactory(
            course_id=modulestore_course.id,
            enterprise_customer_user=enterprise_customer_user
        )
    except Exception as e:
        print(e)

# Create many more users that are enrolled into all courses, use random enterprises
courses = list(CourseOverview.objects.all())
enterprise_customer_users = list(EnterpriseCustomerUser.objects.all())
for x in range(1_000_000):
    try:
        print('-' * 100)
        print(x)
        print('-' * 100)
        user = UserFactory()
        for course in [random.choice(courses) for _ in range(20)]:
            CourseEnrollmentFactory(
                is_active=True,
                course_id=str(course.id),
                user=user
            )
            EnterpriseCourseEnrollmentFactory(
                course_id=str(course.id),
                enterprise_customer_user=random.choice(enterprise_customer_users)
            )
    except IntegrityError:
        print("IntegrityError")
        continue
Django queryset I used to get an SQL query.
from enterprise.models import EnterpriseCourseEnrollment
qs = EnterpriseCourseEnrollment.with_additional_fields.all()
str(qs.query)
Raw SQL query.
SELECT
  `enterprise_enterprisecourseenrollment`.`id`, 
  `enterprise_enterprisecourseenrollment`.`created`, 
  `enterprise_enterprisecourseenrollment`.`modified`, 
  `enterprise_enterprisecourseenrollment`.`enterprise_customer_user_id`, 
  `enterprise_enterprisecourseenrollment`.`course_id`, 
  `enterprise_enterprisecourseenrollment`.`saved_for_later`, 
  `enterprise_enterprisecourseenrollment`.`source_id`, 
  (
    SELECT 
      U0.`mode` 
    FROM 
      `student_courseenrollment` U0 
    WHERE 
      (
        U0.`course_id` = `enterprise_enterprisecourseenrollment`.`course_id` 
        AND U0.`user_id` = `enterprise_enterprisecustomeruser`.`user_id`
      ) 
    ORDER BY 
      U0.`user_id` ASC, 
      U0.`course_id` ASC 
    LIMIT 
      1
  ) AS `enrollment_track`, 
  (
    SELECT 
      U0.`created` 
    FROM 
      `student_courseenrollment` U0 
    WHERE 
      (
        U0.`course_id` = `enterprise_enterprisecourseenrollment`.`course_id` 
        AND U0.`user_id` = `enterprise_enterprisecustomeruser`.`user_id`
      ) 
    ORDER BY 
      U0.`user_id` ASC, 
      U0.`course_id` ASC 
    LIMIT 
      1
  ) AS `enrollment_date`, 
  (
    SELECT 
      U0.`email` 
    FROM 
      `auth_user` U0 
    WHERE 
      U0.`id` = `enterprise_enterprisecustomeruser`.`user_id` 
    LIMIT 
      1
  ) AS `user_email`, 
  (
    SELECT 
      U0.`start` 
    FROM 
      `course_overviews_courseoverview` U0 
    WHERE 
      U0.`id` = `enterprise_enterprisecourseenrollment`.`course_id` 
    LIMIT 
      1
  ) AS `course_start`, 
  (
    SELECT 
      U0.`end` 
    FROM 
      `course_overviews_courseoverview` U0 
    WHERE 
      U0.`id` = `enterprise_enterprisecourseenrollment`.`course_id` 
    LIMIT 
      1
  ) AS `course_end`, 
  `enterprise_enterprisecustomeruser`.`id`, 
  `enterprise_enterprisecustomeruser`.`created`, 
  `enterprise_enterprisecustomeruser`.`modified`, 
  `enterprise_enterprisecustomeruser`.`enterprise_customer_id`, 
  `enterprise_enterprisecustomeruser`.`user_id`, 
  `enterprise_enterprisecustomeruser`.`active`, 
  `enterprise_enterprisecustomeruser`.`linked`, 
  `enterprise_enterprisecustomeruser`.`is_relinkable`, 
  `enterprise_enterprisecustomeruser`.`invite_key_id`, 
  `enterprise_enterprisecustomeruser`.`should_inactivate_other_customers` 
FROM 
  `enterprise_enterprisecourseenrollment` 
  INNER JOIN `enterprise_enterprisecustomeruser` ON (
    `enterprise_enterprisecourseenrollment`.`enterprise_customer_user_id` = `enterprise_enterprisecustomeruser`.`id`
  ) 
WHERE 
  `enterprise_enterprisecustomeruser`.`linked` 
ORDER BY 
  `enterprise_enterprisecourseenrollment`.`created` ASC
`EXPLAIN FORMAT=json` with `SQL_NO_CACHE`.
{
"query_block": {
  "select_id": 1,
  "cost_info": {
    "query_cost": "9701.15"
  },
  "ordering_operation": {
    "using_temporary_table": true,
    "using_filesort": true,
    "cost_info": {
      "sort_cost": "4130.80"
    },
    "nested_loop": [
      {
        "table": {
          "table_name": "enterprise_enterprisecustomeruser",
          "access_type": "ALL",
          "possible_keys": [
            "PRIMARY"
          ],
          "rows_examined_per_scan": 2977,
          "rows_produced_per_join": 2679,
          "filtered": "90.00",
          "cost_info": {
            "read_cost": "77.54",
            "eval_cost": "535.86",
            "prefix_cost": "613.40",
            "data_read_per_join": "586K"
          },
          "used_columns": [
            "id",
            "created",
            "modified",
            "user_id",
            "active",
            "linked",
            "enterprise_customer_id",
            "invite_key_id",
            "should_inactivate_other_customers",
            "is_relinkable"
          ],
          "attached_condition": "`edxapp`.`enterprise_enterprisecustomeruser`.`linked`"
        }
      },
      {
        "table": {
          "table_name": "enterprise_enterprisecourseenrollment",
          "access_type": "ref",
          "possible_keys": [
            "enterprise_enterprisecou_enterprise_customer_user_71fe301a_uniq"
          ],
          "key": "enterprise_enterprisecou_enterprise_customer_user_71fe301a_uniq",
          "used_key_parts": [
            "enterprise_customer_user_id"
          ],
          "key_length": "4",
          "ref": [
            "edxapp.enterprise_enterprisecustomeruser.id"
          ],
          "rows_examined_per_scan": 1,
          "rows_produced_per_join": 4130,
          "filtered": "100.00",
          "cost_info": {
            "read_cost": "4130.80",
            "eval_cost": "826.16",
            "prefix_cost": "5570.36",
            "data_read_per_join": "3M"
          },
          "used_columns": [
            "id",
            "created",
            "modified",
            "course_id",
            "enterprise_customer_user_id",
            "source_id",
            "saved_for_later"
          ]
        }
      }
    ],
    "select_list_subqueries": [
      {
        "dependent": true,
        "cacheable": false,
        "query_block": {
          "select_id": 6,
          "cost_info": {
            "query_cost": "1.20"
          },
          "table": {
            "table_name": "U0",
            "access_type": "eq_ref",
            "possible_keys": [
              "PRIMARY"
            ],
            "key": "PRIMARY",
            "used_key_parts": [
              "id"
            ],
            "key_length": "767",
            "ref": [
              "edxapp.enterprise_enterprisecourseenrollment.course_id"
            ],
            "rows_examined_per_scan": 1,
            "rows_produced_per_join": 1,
            "filtered": "100.00",
            "cost_info": {
              "read_cost": "1.00",
              "eval_cost": "0.20",
              "prefix_cost": "1.20",
              "data_read_per_join": "3K"
            },
            "used_columns": [
              "id",
              "end"
            ]
          }
        }
      },
      {
        "dependent": true,
        "cacheable": false,
        "query_block": {
          "select_id": 5,
          "cost_info": {
            "query_cost": "1.20"
          },
          "table": {
            "table_name": "U0",
            "access_type": "eq_ref",
            "possible_keys": [
              "PRIMARY"
            ],
            "key": "PRIMARY",
            "used_key_parts": [
              "id"
            ],
            "key_length": "767",
            "ref": [
              "edxapp.enterprise_enterprisecourseenrollment.course_id"
            ],
            "rows_examined_per_scan": 1,
            "rows_produced_per_join": 1,
            "filtered": "100.00",
            "cost_info": {
              "read_cost": "1.00",
              "eval_cost": "0.20",
              "prefix_cost": "1.20",
              "data_read_per_join": "3K"
            },
            "used_columns": [
              "id",
              "start"
            ]
          }
        }
      },
      {
        "dependent": true,
        "cacheable": false,
        "query_block": {
          "select_id": 4,
          "cost_info": {
            "query_cost": "1.20"
          },
          "table": {
            "table_name": "U0",
            "access_type": "eq_ref",
            "possible_keys": [
              "PRIMARY"
            ],
            "key": "PRIMARY",
            "used_key_parts": [
              "id"
            ],
            "key_length": "4",
            "ref": [
              "edxapp.enterprise_enterprisecustomeruser.user_id"
            ],
            "rows_examined_per_scan": 1,
            "rows_produced_per_join": 1,
            "filtered": "100.00",
            "cost_info": {
              "read_cost": "1.00",
              "eval_cost": "0.20",
              "prefix_cost": "1.20",
              "data_read_per_join": "2K"
            },
            "used_columns": [
              "id",
              "email"
            ],
            "attached_condition": "(`edxapp`.`U0`.`id` = `edxapp`.`enterprise_enterprisecustomeruser`.`user_id`)"
          }
        }
      },
      {
        "dependent": true,
        "cacheable": false,
        "query_block": {
          "select_id": 3,
          "cost_info": {
            "query_cost": "1.20"
          },
          "ordering_operation": {
            "using_filesort": false,
            "table": {
              "table_name": "U0",
              "access_type": "eq_ref",
              "possible_keys": [
                "student_courseenrollment_user_id_course_id_5d34a47f_uniq",
                "student_courseenrollment_user_id_4263a8e2",
                "student_cou_user_id_b19dcd_idx",
                "student_courseenrollment_course_id_a6f93be8"
              ],
              "key": "student_courseenrollment_user_id_course_id_5d34a47f_uniq",
              "used_key_parts": [
                "user_id",
                "course_id"
              ],
              "key_length": "771",
              "ref": [
                "edxapp.enterprise_enterprisecustomeruser.user_id",
                "edxapp.enterprise_enterprisecourseenrollment.course_id"
              ],
              "rows_examined_per_scan": 1,
              "rows_produced_per_join": 1,
              "filtered": "100.00",
              "index_condition": "((`edxapp`.`U0`.`user_id` <=> `edxapp`.`enterprise_enterprisecustomeruser`.`user_id`) and (`edxapp`.`U0`.`course_id` <=> `edxapp`.`enterprise_enterprisecourseenrollment`.`course_id`) and (`edxapp`.`U0`.`user_id` = `edxapp`.`enterprise_enterprisecustomeruser`.`user_id`))",
              "cost_info": {
                "read_cost": "1.00",
                "eval_cost": "0.20",
                "prefix_cost": "1.20",
                "data_read_per_join": "1K"
              },
              "used_columns": [
                "id",
                "course_id",
                "created",
                "user_id"
              ]
            }
          }
        }
      },
      {
        "dependent": true,
        "cacheable": false,
        "query_block": {
          "select_id": 2,
          "cost_info": {
            "query_cost": "1.20"
          },
          "ordering_operation": {
            "using_filesort": false,
            "table": {
              "table_name": "U0",
              "access_type": "eq_ref",
              "possible_keys": [
                "student_courseenrollment_user_id_course_id_5d34a47f_uniq",
                "student_courseenrollment_user_id_4263a8e2",
                "student_cou_user_id_b19dcd_idx",
                "student_courseenrollment_course_id_a6f93be8"
              ],
              "key": "student_courseenrollment_user_id_course_id_5d34a47f_uniq",
              "used_key_parts": [
                "user_id",
                "course_id"
              ],
              "key_length": "771",
              "ref": [
                "edxapp.enterprise_enterprisecustomeruser.user_id",
                "edxapp.enterprise_enterprisecourseenrollment.course_id"
              ],
              "rows_examined_per_scan": 1,
              "rows_produced_per_join": 1,
              "filtered": "100.00",
              "index_condition": "((`edxapp`.`U0`.`user_id` <=> `edxapp`.`enterprise_enterprisecustomeruser`.`user_id`) and (`edxapp`.`U0`.`course_id` <=> `edxapp`.`enterprise_enterprisecourseenrollment`.`course_id`) and (`edxapp`.`U0`.`user_id` = `edxapp`.`enterprise_enterprisecustomeruser`.`user_id`))",
              "cost_info": {
                "read_cost": "1.00",
                "eval_cost": "0.20",
                "prefix_cost": "1.20",
                "data_read_per_join": "1K"
              },
              "used_columns": [
                "id",
                "course_id",
                "mode",
                "user_id"
              ]
            }
          }
        }
      }
    ]
  }
}
}
MySQL profiler results.
mysql> show profiles \G;
*************************** 1. row ***************************
Query_ID: 1
Duration: 0.47852300
 Query: SELECT SQL_NO_CACHE `enterprise_enterprisecourseenrollment`.`id`, `enterprise_enterprisecourseenrollment`.`created`, `enterprise_enterprisecourseenrollment`.`modified`, `enterprise_enterprisecourseenrollment`.`enterprise_customer_user_id`, `enterprise_enterprisecourseenrollment`.`course_id`, `enterp
Django debug toolbar results for the same query (limited to one EnterpriseCustomerUser).

image

Django debug toolbar results for the same query (when a user has `enrollment_api_admin` role).

image

So, the query indeed is a bit slow (though, during my tests, the execution time wasn't growing linearly with the number of enrollments) when all records are fetched. However, for non-admin users it's as performant as when the database is almost empty.

I'm not sure if I missed anything @feanil. What do you think? Should I do any other tests?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ormsbee can you take a look at these as well, it looks it has to make a temp table for sorting and that's pretty costly, and the other major cost seems to be how it's reading the enrollment table.

Copy link

@ormsbee ormsbee Nov 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the table+filesort is definitely going to be expensive, and it looks like some of the bad reads on enterprise_enterprisecourseenrollment might be due to missing some useful indexes?

Questions:

  1. Is the performance better if we add a composite (course_id, enterprise_customer_user) index to EnterpriseCourseEnrollment? (Or even just a simple index on course_id?) It's doing a big query on course_id, but there's no good index for it. I think it's using the unique constraint that goes the opposite way ((course_id, enterprise_customer_user)) and doing a full index scan on that, because that's faster than a full table scan. But an index that leads with course_id should be a lot cheaper.
  2. Similarly, that table has an implicit ordering of created, but seems to have no index on that field. Do we really need to order at all? Can we add an index there if so? Is an ordering by created equivalent to ordering by the primary key, and can we just do that for the purposes of this query?
  3. Does this make use of read_replica_or_default? If not, can we make it use it?

Adding new indexes may be slow, so we'd have to communicate it so as not to be disruptive.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@feanil, @ormsbee, thank you for checking this. I'll try adding proposed indexes and will let you know how performance changes.

'enrollment_date': models.Subquery(enrollment_subquery.values('created')[:1]),
'user_email': models.Subquery(user_subquery),
'course_start': models.Subquery(course_subquery.values('start')[:1]),
'course_end': models.Subquery(course_subquery.values('end')[:1]),
}


class EnterpriseCourseEnrollment(TimeStampedModel):
"""
Store information about the enrollment of a user in a course.
Expand All @@ -1881,11 +1936,15 @@ class EnterpriseCourseEnrollment(TimeStampedModel):
"""

objects = EnterpriseCourseEnrollmentManager()
with_additional_fields = EnterpriseCourseEnrollmentWithAdditionalFieldsManager()

class Meta:
unique_together = (('enterprise_customer_user', 'course_id',),)
app_label = 'enterprise'
ordering = ['created']
# Originally, we were ordering by 'created', but there was never an index on that column. To avoid creating
# an index on that column, we are ordering by 'id' instead, which is indexed by default and is equivalent to
# ordering by 'created' in this case.
ordering = ['id']
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment to the effect that the originally intended ordering is by created, but there was never an index added for it, and that we're ordering by id because we believe it's equivalent and doesn't require an expensive migration that might require downtime.

At some point, it's possible that created will become editable, a bug will be filed, and someone is going to look at this and think, "oh, easy fix, I'll just make this created instead of id" without realizing the potential consequences.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, @ormsbee, thank you for taking time to review this at last.


enterprise_customer_user = models.ForeignKey(
EnterpriseCustomerUser,
Expand Down
35 changes: 35 additions & 0 deletions test_utils/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
EnterpriseCustomerReportingConfiguration,
EnterpriseCustomerSsoConfiguration,
EnterpriseCustomerUser,
EnterpriseFeatureRole,
EnterpriseFeatureUserRoleAssignment,
LearnerCreditEnterpriseCourseEnrollment,
LicensedEnterpriseCourseEnrollment,
PendingEnrollment,
Expand Down Expand Up @@ -272,6 +274,39 @@ class Meta:
invite_key = None


class EnterpriseFeatureRoleFactory(factory.django.DjangoModelFactory):
"""
EnterpriseFeatureRole factory.
Creates an instance of EnterpriseFeatureRole with minimal boilerplate.
"""

class Meta:
"""
Meta for EnterpriseFeatureRoleFactory.
"""

model = EnterpriseFeatureRole

name = factory.LazyAttribute(lambda x: FAKER.word())


class EnterpriseFeatureUserRoleAssignmentFactory(factory.django.DjangoModelFactory):
"""
EnterpriseFeatureUserRoleAssignment factory.
Creates an instance of EnterpriseFeatureUserRoleAssignment with minimal boilerplate.
"""

class Meta:
"""
Meta for EnterpriseFeatureUserRoleAssignmentFactory.
"""

model = EnterpriseFeatureUserRoleAssignment

role = factory.SubFactory(EnterpriseFeatureRoleFactory)
user = factory.SubFactory(UserFactory)


class AnonymousUserFactory(factory.Factory):
"""
Anonymous User factory.
Expand Down
Loading
Loading