diff --git a/kolibri/core/content/management/commands/importcontent.py b/kolibri/core/content/management/commands/importcontent.py index da52d30ae31..512c5a6a44e 100644 --- a/kolibri/core/content/management/commands/importcontent.py +++ b/kolibri/core/content/management/commands/importcontent.py @@ -413,7 +413,6 @@ def _transfer( # noqa: max-complexity=16 ContentNode.objects.filter(channel_id=channel_id, available=True) .exclude(kind=content_kinds.TOPIC) .values("content_id") - .distinct() .count() ) diff --git a/kolibri/core/content/utils/annotation.py b/kolibri/core/content/utils/annotation.py index dec4ef516cc..fe901df9240 100644 --- a/kolibri/core/content/utils/annotation.py +++ b/kolibri/core/content/utils/annotation.py @@ -766,10 +766,7 @@ def calculate_published_size(channel): def calculate_total_resource_count(channel): content_nodes = ContentNode.objects.filter(channel_id=channel.id) channel.total_resource_count = ( - content_nodes.filter(available=True) - .exclude(kind=content_kinds.TOPIC) - .dedupe_by_content_id() - .count() + content_nodes.filter(available=True).exclude(kind=content_kinds.TOPIC).count() ) channel.save() diff --git a/kolibri/core/content/utils/import_export_content.py b/kolibri/core/content/utils/import_export_content.py index 109694d9b57..4def62fdb64 100644 --- a/kolibri/core/content/utils/import_export_content.py +++ b/kolibri/core/content/utils/import_export_content.py @@ -138,8 +138,7 @@ def get_import_export_data( # noqa: C901 ) queried_file_objects = {} - - content_ids = set() + number_of_resources = 0 while min_boundary < max_rght: @@ -170,13 +169,11 @@ def get_import_export_data( # noqa: C901 ) ) - included_content_ids = nodes_segment.values_list( - "content_id", flat=True - ).distinct() + count_content_ids = nodes_segment.count() # Only bother with this query if there were any resources returned above. - if included_content_ids: - content_ids.update(included_content_ids) + if count_content_ids: + number_of_resources = number_of_resources + count_content_ids file_objects = LocalFile.objects.filter( files__contentnode__in=nodes_segment ).values("id", "file_size", "extension") @@ -207,8 +204,7 @@ def get_import_export_data( # noqa: C901 files_to_download = list(queried_file_objects.values()) total_bytes_to_transfer = sum(map(lambda x: x["file_size"] or 0, files_to_download)) - - return len(content_ids), files_to_download, total_bytes_to_transfer + return number_of_resources, files_to_download, total_bytes_to_transfer def retry_import(e): diff --git a/kolibri/core/content/utils/upgrade.py b/kolibri/core/content/utils/upgrade.py index 82066564751..429a3dcc3ba 100644 --- a/kolibri/core/content/utils/upgrade.py +++ b/kolibri/core/content/utils/upgrade.py @@ -566,7 +566,6 @@ def get_import_data_for_update( i = 0 updated_ids_slice = updated_resource_ids[i : i + batch_size] - nodes_to_include = ContentNode.objects.filter(channel_id=channel_id) # if requested, filter out nodes we're not able to render diff --git a/kolibri/core/public/test/test_api.py b/kolibri/core/public/test/test_api.py index 6e0b7d8592c..926f9860433 100644 --- a/kolibri/core/public/test/test_api.py +++ b/kolibri/core/public/test/test_api.py @@ -204,7 +204,7 @@ def test_public_channel_lookup(self): "name": "science", "language": "es", # root node language "description": "", - "total_resource_count": 2, # should account for nodes with duplicate content_ids + "total_resource_count": 3, # should account for nodes with duplicate content_ids "version": 0, "published_size": 20, "last_published": None, diff --git a/kolibri/plugins/device/api.py b/kolibri/plugins/device/api.py index a471ddd6741..e7aa42c077b 100644 --- a/kolibri/plugins/device/api.py +++ b/kolibri/plugins/device/api.py @@ -48,8 +48,7 @@ def to_representation(self, instance): # count the total number of renderable non-topic resources in the channel # (note: it's faster to count them all and then subtract the unrenderables, of which there are fewer) value["total_resources"] = ( - channel_nodes.dedupe_by_content_id().count() - - unrenderable_nodes.dedupe_by_content_id().count() + channel_nodes.count() - unrenderable_nodes.count() ) if "total_file_size" in include_fields: diff --git a/kolibri/plugins/device/test/test_api.py b/kolibri/plugins/device/test/test_api.py index 638f4bc342b..213165cff06 100644 --- a/kolibri/plugins/device/test/test_api.py +++ b/kolibri/plugins/device/test/test_api.py @@ -175,8 +175,8 @@ def test_channelmetadata_resource_info(self): get_params, ) # N.B. Because of our not very good fixture data, most of our content nodes are by default not renderable - # Hence this will return 1 if everything is deduped properly. - self.assertEqual(response.data["total_resources"], 1) + # Hence this will return 2 if everything is duplicated properly. + self.assertEqual(response.data["total_resources"], 2) self.assertEqual(response.data["total_file_size"], 0) self.assertEqual(response.data["on_device_resources"], 4) self.assertEqual(response.data["on_device_file_size"], 0) @@ -188,7 +188,7 @@ def test_channelmetadata_include_fields_filter_has_total_resources(self): reverse("kolibri:kolibri.plugins.device:device_channel-list"), {"include_fields": "total_resources"}, ) - self.assertEqual(response.data[0]["total_resources"], 1) + self.assertEqual(response.data[0]["total_resources"], 2) def test_channelmetadata_include_fields_filter_has_total_file_size(self): LocalFile.objects.filter( @@ -263,7 +263,7 @@ def test_all_nodes_present_studio(self): data={"channel_id": self.the_channel_id}, format="json", ) - self.assertEqual(response.data["resource_count"], 2) + self.assertEqual(response.data["resource_count"], 3) self.assertEqual( response.data["file_size"], sum( @@ -310,7 +310,7 @@ def test_all_nodes_present_export(self): data={"channel_id": self.the_channel_id, "export": True}, format="json", ) - self.assertEqual(response.data["resource_count"], 2) + self.assertEqual(response.data["resource_count"], 3) self.assertEqual( response.data["file_size"], sum(