Skip to content

Commit

Permalink
Merge pull request #8506 from AtKristijan/deduplication-of-resource-i…
Browse files Browse the repository at this point in the history
…mport

Fix bug with import resource when there are duplicates.
  • Loading branch information
rtibbles authored Mar 19, 2022
2 parents 0120fc2 + 936868e commit db62f2b
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 23 deletions.
1 change: 0 additions & 1 deletion kolibri/core/content/management/commands/importcontent.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,6 @@ def _transfer( # noqa: max-complexity=16
ContentNode.objects.filter(channel_id=channel_id, available=True)
.exclude(kind=content_kinds.TOPIC)
.values("content_id")
.distinct()
.count()
)

Expand Down
5 changes: 1 addition & 4 deletions kolibri/core/content/utils/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,7 @@ def calculate_published_size(channel):
def calculate_total_resource_count(channel):
content_nodes = ContentNode.objects.filter(channel_id=channel.id)
channel.total_resource_count = (
content_nodes.filter(available=True)
.exclude(kind=content_kinds.TOPIC)
.dedupe_by_content_id()
.count()
content_nodes.filter(available=True).exclude(kind=content_kinds.TOPIC).count()
)
channel.save()

Expand Down
14 changes: 5 additions & 9 deletions kolibri/core/content/utils/import_export_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,7 @@ def get_import_export_data( # noqa: C901
)

queried_file_objects = {}

content_ids = set()
number_of_resources = 0

while min_boundary < max_rght:

Expand Down Expand Up @@ -170,13 +169,11 @@ def get_import_export_data( # noqa: C901
)
)

included_content_ids = nodes_segment.values_list(
"content_id", flat=True
).distinct()
count_content_ids = nodes_segment.count()

# Only bother with this query if there were any resources returned above.
if included_content_ids:
content_ids.update(included_content_ids)
if count_content_ids:
number_of_resources = number_of_resources + count_content_ids
file_objects = LocalFile.objects.filter(
files__contentnode__in=nodes_segment
).values("id", "file_size", "extension")
Expand Down Expand Up @@ -207,8 +204,7 @@ def get_import_export_data( # noqa: C901
files_to_download = list(queried_file_objects.values())

total_bytes_to_transfer = sum(map(lambda x: x["file_size"] or 0, files_to_download))

return len(content_ids), files_to_download, total_bytes_to_transfer
return number_of_resources, files_to_download, total_bytes_to_transfer


def retry_import(e):
Expand Down
1 change: 0 additions & 1 deletion kolibri/core/content/utils/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,6 @@ def get_import_data_for_update(
i = 0

updated_ids_slice = updated_resource_ids[i : i + batch_size]

nodes_to_include = ContentNode.objects.filter(channel_id=channel_id)

# if requested, filter out nodes we're not able to render
Expand Down
2 changes: 1 addition & 1 deletion kolibri/core/public/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def test_public_channel_lookup(self):
"name": "science",
"language": "es", # root node language
"description": "",
"total_resource_count": 2, # should account for nodes with duplicate content_ids
"total_resource_count": 3, # should account for nodes with duplicate content_ids
"version": 0,
"published_size": 20,
"last_published": None,
Expand Down
3 changes: 1 addition & 2 deletions kolibri/plugins/device/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def to_representation(self, instance):
# count the total number of renderable non-topic resources in the channel
# (note: it's faster to count them all and then subtract the unrenderables, of which there are fewer)
value["total_resources"] = (
channel_nodes.dedupe_by_content_id().count()
- unrenderable_nodes.dedupe_by_content_id().count()
channel_nodes.count() - unrenderable_nodes.count()
)

if "total_file_size" in include_fields:
Expand Down
10 changes: 5 additions & 5 deletions kolibri/plugins/device/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ def test_channelmetadata_resource_info(self):
get_params,
)
# N.B. Because of our not very good fixture data, most of our content nodes are by default not renderable
# Hence this will return 1 if everything is deduped properly.
self.assertEqual(response.data["total_resources"], 1)
# Hence this will return 2 if everything is duplicated properly.
self.assertEqual(response.data["total_resources"], 2)
self.assertEqual(response.data["total_file_size"], 0)
self.assertEqual(response.data["on_device_resources"], 4)
self.assertEqual(response.data["on_device_file_size"], 0)
Expand All @@ -188,7 +188,7 @@ def test_channelmetadata_include_fields_filter_has_total_resources(self):
reverse("kolibri:kolibri.plugins.device:device_channel-list"),
{"include_fields": "total_resources"},
)
self.assertEqual(response.data[0]["total_resources"], 1)
self.assertEqual(response.data[0]["total_resources"], 2)

def test_channelmetadata_include_fields_filter_has_total_file_size(self):
LocalFile.objects.filter(
Expand Down Expand Up @@ -263,7 +263,7 @@ def test_all_nodes_present_studio(self):
data={"channel_id": self.the_channel_id},
format="json",
)
self.assertEqual(response.data["resource_count"], 2)
self.assertEqual(response.data["resource_count"], 3)
self.assertEqual(
response.data["file_size"],
sum(
Expand Down Expand Up @@ -310,7 +310,7 @@ def test_all_nodes_present_export(self):
data={"channel_id": self.the_channel_id, "export": True},
format="json",
)
self.assertEqual(response.data["resource_count"], 2)
self.assertEqual(response.data["resource_count"], 3)
self.assertEqual(
response.data["file_size"],
sum(
Expand Down

0 comments on commit db62f2b

Please sign in to comment.