Skip to content

Commit

Permalink
Merge pull request #4920 from kobotoolbox/add-import-cleanup-task
Browse files Browse the repository at this point in the history
Add maintenance function to remove old import tasks
  • Loading branch information
jamesrkiger authored Apr 24, 2024
2 parents b4aade5 + b1d0b2b commit 4340874
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 9 deletions.
10 changes: 9 additions & 1 deletion kobo/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,11 @@
'Number of days to keep asset snapshots',
'positive_int'
),
'IMPORT_TASK_DAYS_RETENTION': (
90,
'Number of days to keep import tasks',
'positive_int',
),
'FREE_TIER_THRESHOLDS': (
LazyJSONSerializable(FREE_TIER_NO_THRESHOLDS),
'Free tier thresholds: storage in kilobytes, '
Expand Down Expand Up @@ -661,10 +666,13 @@
'PROJECT_OWNERSHIP_AUTO_ACCEPT_INVITES',
),
'Trash bin': (
'ASSET_SNAPSHOT_DAYS_RETENTION',
'ACCOUNT_TRASH_GRACE_PERIOD',
'PROJECT_TRASH_GRACE_PERIOD',
),
'Regular maintenance settings': (
'ASSET_SNAPSHOT_DAYS_RETENTION',
'IMPORT_TASK_DAYS_RETENTION',
),
'Tier settings': (
'FREE_TIER_THRESHOLDS',
'FREE_TIER_DISPLAY',
Expand Down
20 changes: 13 additions & 7 deletions kpi/maintenance_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from django.db.models import Exists, OuterRef, Q
from django.utils import timezone

from kpi.models import AssetSnapshot
from kpi.models import AssetSnapshot, ImportTask
from kpi.utils.chunked_delete import chunked_delete


def remove_old_asset_snapshots():
Expand All @@ -20,9 +21,14 @@ def remove_old_asset_snapshots():
date_created__lt=timezone.now() - timedelta(days=days),
).filter(Exists(newer_snapshot_for_asset) | Q(asset_version=None))

while True:
count, _ = delete_queryset.filter(
pk__in=delete_queryset[:1000]
).delete()
if not count:
break
chunked_delete(delete_queryset)


def remove_old_import_tasks():
days = constance.config.IMPORT_TASK_DAYS_RETENTION

delete_queryset = ImportTask.objects.filter(
date_created__lt=timezone.now() - timedelta(days=days),
)

chunked_delete(delete_queryset)
3 changes: 2 additions & 1 deletion kpi/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files
from kobo.celery import celery_app
from kpi.constants import LIMIT_HOURS_23
from kpi.maintenance_tasks import remove_old_asset_snapshots
from kpi.maintenance_tasks import remove_old_asset_snapshots, remove_old_import_tasks
from kpi.models.asset import Asset
from kpi.models.import_export_task import (
ExportTask,
Expand Down Expand Up @@ -102,4 +102,5 @@ def perform_maintenance():
Run daily maintenance tasks
"""
remove_unused_markdown_files()
remove_old_import_tasks()
remove_old_asset_snapshots()
33 changes: 33 additions & 0 deletions kpi/tests/test_import_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# coding: utf-8
from datetime import timedelta

from django.contrib.auth import get_user_model
from django.utils import timezone

from kpi.maintenance_tasks import remove_old_import_tasks
from kpi.models import ImportTask
from kpi.tests.base_test_case import BaseTestCase


class AssetImportTaskHousekeepingTest(BaseTestCase):
fixtures = ['test_data']

def setUp(self):
User = get_user_model()
self.user = User.objects.get(username='someuser')

def test_remove_old_import_tasks(self):
old_task = ImportTask.objects.create(
user=self.user,
data='{}',
)
# Because of `auto_date_now`, we cannot specify created_date on creation
old_task.date_created = timezone.now() - timedelta(days=95)
old_task.save(update_fields=['date_created'])

new_task = ImportTask.objects.create(user=self.user, data='{}')

remove_old_import_tasks()

assert ImportTask.objects.filter(id=new_task.id).exists()
assert not ImportTask.objects.filter(id=old_task.id).exists()
8 changes: 8 additions & 0 deletions kpi/utils/chunked_delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from django.db.models import QuerySet


def chunked_delete(queryset: QuerySet):
while True:
count, _ = queryset.filter(pk__in=queryset[:1000]).delete()
if not count:
break

0 comments on commit 4340874

Please sign in to comment.