From 76443667f039610f294e8eebba5134d9dcf1d0da Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Tue, 12 Nov 2024 15:32:07 -0800 Subject: [PATCH 1/6] remove pulls trigger --- shared/django_apps/core/models.py | 3 +++ .../migrations/0005_delete_pulls_trigger.py | 21 +++++++++++++++++++ .../legacy_sql/main/triggers/main.py | 2 -- .../legacy_sql/main/triggers/pulls.py | 17 --------------- .../migrations/legacy_sql/upgrades/v440.py | 18 ---------------- shared/django_apps/utils/model_utils.py | 2 +- 6 files changed, 25 insertions(+), 38 deletions(-) create mode 100644 shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py delete mode 100644 shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/pulls.py diff --git a/shared/django_apps/core/models.py b/shared/django_apps/core/models.py index 4ac11021..5be51f95 100644 --- a/shared/django_apps/core/models.py +++ b/shared/django_apps/core/models.py @@ -435,6 +435,9 @@ def should_write_to_storage(self) -> bool: def save(self, *args, **kwargs): self.updatestamp = timezone.now() + if self.state != PullStates.OPEN.value and self.flare is not None: + # flare is used to draw graphs, can be quite large, so dump it when it's no longer needed + self.flare = None super().save(*args, **kwargs) diff --git a/shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py b/shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py new file mode 100644 index 00000000..95e47a49 --- /dev/null +++ b/shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.16 on 2024-11-12 23:28 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("legacy_migrations", "0004_auto_20231024_1937"), + ] + + # Moved the behavior in this trigger to the .save() method on Pull model + + operations = [ + migrations.RunSQL( + """ + DROP TRIGGER IF EXISTS pulls_before_update_drop_flare ON pulls; + DROP FUNCTION IF EXISTS pulls_drop_flare(); + """, + reverse_sql=migrations.RunSQL.noop, + ), + ] diff --git a/shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/main.py b/shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/main.py index 3298a47e..1fe50348 100644 --- a/shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/main.py +++ b/shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/main.py @@ -1,6 +1,5 @@ from .commits import run_sql as commits_run_sql from .owners import run_sql as owners_run_sql -from .pulls import run_sql as pulls_run_sql from .repos import run_sql as repos_run_sql @@ -8,4 +7,3 @@ def run_sql(schema_editor): commits_run_sql(schema_editor) owners_run_sql(schema_editor) repos_run_sql(schema_editor) - pulls_run_sql(schema_editor) diff --git a/shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/pulls.py b/shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/pulls.py deleted file mode 100644 index 00cc26a1..00000000 --- a/shared/django_apps/legacy_migrations/migrations/legacy_sql/main/triggers/pulls.py +++ /dev/null @@ -1,17 +0,0 @@ -def run_sql(schema_editor): - schema_editor.execute( - """ - create or replace function pulls_drop_flare() returns trigger as $$ - begin - new.flare = null; - return new; - end; - $$ language plpgsql; - - - create trigger pulls_before_update_drop_flare before update on pulls - for each row - when (new.state != 'open'::pull_state) - execute procedure pulls_drop_flare(); - """ - ) diff --git a/shared/django_apps/legacy_migrations/migrations/legacy_sql/upgrades/v440.py b/shared/django_apps/legacy_migrations/migrations/legacy_sql/upgrades/v440.py index ba30a10f..4309aa3f 100644 --- a/shared/django_apps/legacy_migrations/migrations/legacy_sql/upgrades/v440.py +++ b/shared/django_apps/legacy_migrations/migrations/legacy_sql/upgrades/v440.py @@ -57,24 +57,6 @@ def run_sql(schema_editor): ) execute procedure owner_yaml_updated(); - - drop trigger pulls_before_insert on pulls; - drop function pulls_insert(); - drop trigger pulls_before_update on pulls; - drop function pulls_update(); - - create or replace function pulls_drop_flare() returns trigger as $$ - begin - new.flare = null; - return new; - end; - $$ language plpgsql; - - create trigger pulls_before_update_drop_flare before update on pulls - for each row - when (new.state != 'open'::pull_state) - execute procedure pulls_drop_flare(); - ---- Function Changes ----- drop function if exists get_new_repos(int); drop function if exists get_pull(int, int); diff --git a/shared/django_apps/utils/model_utils.py b/shared/django_apps/utils/model_utils.py index 1067cbe6..f125b755 100644 --- a/shared/django_apps/utils/model_utils.py +++ b/shared/django_apps/utils/model_utils.py @@ -38,7 +38,7 @@ def get_commitid(self) -> Optional[str]: class ArchiveField: """This is a helper class that transparently handles models' fields that are saved in storage. - Classes that use the ArchiveField MUST implement ArchiveFieldInterface. It ill throw an error otherwise. + Classes that use the ArchiveField MUST implement ArchiveFieldInterface. It will throw an error otherwise. It uses the Descriptor pattern: https://docs.python.org/3/howto/descriptor.html Arguments: From f064855f16f35e16afe197fd459e4a2213422b6c Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Tue, 12 Nov 2024 15:50:11 -0800 Subject: [PATCH 2/6] add test --- shared/django_apps/core/models.py | 7 +++-- .../unit/django_apps/core/test_core_models.py | 30 +++++++++++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/shared/django_apps/core/models.py b/shared/django_apps/core/models.py index 5be51f95..579e05e0 100644 --- a/shared/django_apps/core/models.py +++ b/shared/django_apps/core/models.py @@ -430,13 +430,16 @@ def should_write_to_storage(self) -> bool: _flare = models.JSONField(db_column="flare", null=True) _flare_storage_path = models.URLField(db_column="flare_storage_path", null=True) flare = ArchiveField( - should_write_to_storage_fn=should_write_to_storage, default_value_class=dict + should_write_to_storage_fn=should_write_to_storage, + default_value_class=dict, ) def save(self, *args, **kwargs): self.updatestamp = timezone.now() - if self.state != PullStates.OPEN.value and self.flare is not None: + if self.state != PullStates.OPEN.value and self.flare: # flare is used to draw graphs, can be quite large, so dump it when it's no longer needed + # flare can be None or {} to indicate empty + # this gets set in the db as {} since that is the default value for the field self.flare = None super().save(*args, **kwargs) diff --git a/tests/unit/django_apps/core/test_core_models.py b/tests/unit/django_apps/core/test_core_models.py index 6e3dfcf9..e786e8cc 100644 --- a/tests/unit/django_apps/core/test_core_models.py +++ b/tests/unit/django_apps/core/test_core_models.py @@ -4,8 +4,12 @@ from django.forms import ValidationError from django.test import TestCase -from shared.django_apps.core.models import Commit -from shared.django_apps.core.tests.factories import CommitFactory, RepositoryFactory +from shared.django_apps.core.models import Commit, PullStates +from shared.django_apps.core.tests.factories import ( + CommitFactory, + PullFactory, + RepositoryFactory, +) from shared.django_apps.reports.tests.factories import CommitReportFactory from shared.storage.exceptions import FileNotInStorageError @@ -17,6 +21,28 @@ def test_clean_repo(self): repo.clean() +class PullTests(TestCase): + def test_save_method(self): + test_value_for_flare = {"test": "test"} + pull = PullFactory( + state=PullStates.OPEN.value, + _flare=test_value_for_flare, + repository=RepositoryFactory(), + ) + self.assertEqual(pull.flare, test_value_for_flare) + + pull.title = "changing something on the pull while it is open" + pull.save() + pull.refresh_from_db() + self.assertEqual(pull.flare, test_value_for_flare) + + pull.state = PullStates.MERGED.value + pull.save() + pull.refresh_from_db() + self.assertNotEqual(pull.flare, test_value_for_flare) + self.assertEqual(pull.flare, {}) + + class CommitTests(TestCase): def test_commitreport_no_code(self): commit = CommitFactory() From 37420c555f80b6810ae33c26c8fb40c1fa70c87f Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Thu, 5 Dec 2024 15:35:19 -0800 Subject: [PATCH 3/6] put cleanup method into cron job --- shared/celery_config.py | 3 ++- shared/django_apps/core/models.py | 18 +++++++++---- .../migrations/0005_delete_pulls_trigger.py | 21 ---------------- .../0007_delete_pull_update_trigger.py | 21 ++++++++++++++++ shared/django_apps/utils/model_utils.py | 2 +- .../unit/django_apps/core/test_core_models.py | 25 +------------------ 6 files changed, 38 insertions(+), 52 deletions(-) delete mode 100644 shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py create mode 100644 shared/django_apps/legacy_migrations/migrations/0007_delete_pull_update_trigger.py diff --git a/shared/celery_config.py b/shared/celery_config.py index 989be39b..ef714fd0 100644 --- a/shared/celery_config.py +++ b/shared/celery_config.py @@ -113,6 +113,7 @@ brolly_stats_rollup_task_name = ( f"app.cron.{TaskConfigGroup.daily.value}.BrollyStatsRollupTask" ) +flare_cleanup_task_name = f"app.cron.{TaskConfigGroup.daily.value}.FlareCleanupTask" def get_task_group(task_name: str) -> Optional[str]: @@ -169,7 +170,7 @@ class BaseCeleryConfig(object): worker_prefetch_multiplier = int( get_config("setup", "tasks", "celery", "prefetch", default=1) ) - # !!! NEVER 0 !!! 0 == infinate + # !!! NEVER 0 !!! 0 == infinite # http://celery.readthedocs.org/en/latest/configuration.html#celeryd-task-soft-time-limit task_soft_time_limit = int( diff --git a/shared/django_apps/core/models.py b/shared/django_apps/core/models.py index 579e05e0..80b344c7 100644 --- a/shared/django_apps/core/models.py +++ b/shared/django_apps/core/models.py @@ -418,6 +418,19 @@ def external_id(self): return self.pullid def should_write_to_storage(self) -> bool: + """ + This only applies to the flare field. + Flare is used to draw static graphs (see GraphHandler view in api) and can be large. + The majority of flare graphs are used in pr comments, so we keep the (maybe large) flare available while + the pull is OPEN. If the pull is not OPEN, we dump the flare to save space. We will recreate it on + the fly if it is needed for a non-OPEN pull (see GraphHandler view in api). + Flare cleanup is handled by FlareCleanupTask in worker. + """ + if self.state != PullStates.OPEN.value: + # while a pull is OPEN, we check whether to write_to_storage + # when a pull is no longer OPEN, we no longer want the flare in storage. + # The nightly cron job cleans up the value in storage (see FlareCleanupTask in worker) + return False if self.repository is None or self.repository.author is None: return False is_codecov_repo = self.repository.author.username == "codecov" @@ -436,11 +449,6 @@ def should_write_to_storage(self) -> bool: def save(self, *args, **kwargs): self.updatestamp = timezone.now() - if self.state != PullStates.OPEN.value and self.flare: - # flare is used to draw graphs, can be quite large, so dump it when it's no longer needed - # flare can be None or {} to indicate empty - # this gets set in the db as {} since that is the default value for the field - self.flare = None super().save(*args, **kwargs) diff --git a/shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py b/shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py deleted file mode 100644 index 95e47a49..00000000 --- a/shared/django_apps/legacy_migrations/migrations/0005_delete_pulls_trigger.py +++ /dev/null @@ -1,21 +0,0 @@ -# Generated by Django 4.2.16 on 2024-11-12 23:28 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("legacy_migrations", "0004_auto_20231024_1937"), - ] - - # Moved the behavior in this trigger to the .save() method on Pull model - - operations = [ - migrations.RunSQL( - """ - DROP TRIGGER IF EXISTS pulls_before_update_drop_flare ON pulls; - DROP FUNCTION IF EXISTS pulls_drop_flare(); - """, - reverse_sql=migrations.RunSQL.noop, - ), - ] diff --git a/shared/django_apps/legacy_migrations/migrations/0007_delete_pull_update_trigger.py b/shared/django_apps/legacy_migrations/migrations/0007_delete_pull_update_trigger.py new file mode 100644 index 00000000..776f548e --- /dev/null +++ b/shared/django_apps/legacy_migrations/migrations/0007_delete_pull_update_trigger.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.16 on 2024-12-04 19:53 + +from django.db import migrations + +from shared.django_apps.migration_utils import RiskyRunSQL + + +class Migration(migrations.Migration): + dependencies = [ + ("legacy_migrations", "0006_delete_many_owner_triggers"), + ] + + operations = [ + RiskyRunSQL( + """ + DROP TRIGGER IF EXISTS pulls_before_update_drop_flare ON pulls; + DROP FUNCTION IF EXISTS pulls_drop_flare(); + """, + reverse_sql=migrations.RunSQL.noop, + ) + ] diff --git a/shared/django_apps/utils/model_utils.py b/shared/django_apps/utils/model_utils.py index f125b755..86749784 100644 --- a/shared/django_apps/utils/model_utils.py +++ b/shared/django_apps/utils/model_utils.py @@ -58,7 +58,7 @@ class ArchiveField: rehydrate_fn=rehidrate_data, default_value='default' ) - For a full example check utils/tests/unit/test_model_utils.py + For a full example check utils/tests/unit/test_model_utils.py in worker """ def __init__( diff --git a/tests/unit/django_apps/core/test_core_models.py b/tests/unit/django_apps/core/test_core_models.py index e786e8cc..67d69c88 100644 --- a/tests/unit/django_apps/core/test_core_models.py +++ b/tests/unit/django_apps/core/test_core_models.py @@ -4,10 +4,9 @@ from django.forms import ValidationError from django.test import TestCase -from shared.django_apps.core.models import Commit, PullStates +from shared.django_apps.core.models import Commit from shared.django_apps.core.tests.factories import ( CommitFactory, - PullFactory, RepositoryFactory, ) from shared.django_apps.reports.tests.factories import CommitReportFactory @@ -21,28 +20,6 @@ def test_clean_repo(self): repo.clean() -class PullTests(TestCase): - def test_save_method(self): - test_value_for_flare = {"test": "test"} - pull = PullFactory( - state=PullStates.OPEN.value, - _flare=test_value_for_flare, - repository=RepositoryFactory(), - ) - self.assertEqual(pull.flare, test_value_for_flare) - - pull.title = "changing something on the pull while it is open" - pull.save() - pull.refresh_from_db() - self.assertEqual(pull.flare, test_value_for_flare) - - pull.state = PullStates.MERGED.value - pull.save() - pull.refresh_from_db() - self.assertNotEqual(pull.flare, test_value_for_flare) - self.assertEqual(pull.flare, {}) - - class CommitTests(TestCase): def test_commitreport_no_code(self): commit = CommitFactory() From 7da62b4af161c2864aad83835b2712d67a7ed2e6 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Thu, 5 Dec 2024 16:17:22 -0800 Subject: [PATCH 4/6] fix migration conflict --- ...l_update_trigger.py => 0008_delete_pull_update_trigger.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename shared/django_apps/legacy_migrations/migrations/{0007_delete_pull_update_trigger.py => 0008_delete_pull_update_trigger.py} (78%) diff --git a/shared/django_apps/legacy_migrations/migrations/0007_delete_pull_update_trigger.py b/shared/django_apps/legacy_migrations/migrations/0008_delete_pull_update_trigger.py similarity index 78% rename from shared/django_apps/legacy_migrations/migrations/0007_delete_pull_update_trigger.py rename to shared/django_apps/legacy_migrations/migrations/0008_delete_pull_update_trigger.py index 776f548e..a6dcc53d 100644 --- a/shared/django_apps/legacy_migrations/migrations/0007_delete_pull_update_trigger.py +++ b/shared/django_apps/legacy_migrations/migrations/0008_delete_pull_update_trigger.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.16 on 2024-12-04 19:53 +# Generated by Django 4.2.16 on 2024-12-06 00:15 from django.db import migrations @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("legacy_migrations", "0006_delete_many_owner_triggers"), + ("legacy_migrations", "0007_delete_repo_trigger_stats"), ] operations = [ From 1190957f6c943a140d780099b2a3ced69dd836b6 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Fri, 6 Dec 2024 16:41:42 -0800 Subject: [PATCH 5/6] remove the changes to should_write_to_storage --- shared/django_apps/core/models.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/shared/django_apps/core/models.py b/shared/django_apps/core/models.py index 80b344c7..eb3afddb 100644 --- a/shared/django_apps/core/models.py +++ b/shared/django_apps/core/models.py @@ -421,16 +421,8 @@ def should_write_to_storage(self) -> bool: """ This only applies to the flare field. Flare is used to draw static graphs (see GraphHandler view in api) and can be large. - The majority of flare graphs are used in pr comments, so we keep the (maybe large) flare available while - the pull is OPEN. If the pull is not OPEN, we dump the flare to save space. We will recreate it on - the fly if it is needed for a non-OPEN pull (see GraphHandler view in api). Flare cleanup is handled by FlareCleanupTask in worker. """ - if self.state != PullStates.OPEN.value: - # while a pull is OPEN, we check whether to write_to_storage - # when a pull is no longer OPEN, we no longer want the flare in storage. - # The nightly cron job cleans up the value in storage (see FlareCleanupTask in worker) - return False if self.repository is None or self.repository.author is None: return False is_codecov_repo = self.repository.author.username == "codecov" From 9da9d820e17c3d75d89eaa998b13e276347c3d30 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Fri, 27 Dec 2024 18:10:16 -0800 Subject: [PATCH 6/6] make repository non-required for ArchiveService, add bulk delete to ArchiveService, remove default value from paths --- shared/api_archive/archive.py | 22 ++++++++++++++++++---- shared/storage/aws.py | 2 +- shared/storage/fallback.py | 2 +- shared/storage/memory.py | 2 +- shared/storage/minio.py | 12 ++++++------ 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/shared/api_archive/archive.py b/shared/api_archive/archive.py index c8c8114c..343e899f 100644 --- a/shared/api_archive/archive.py +++ b/shared/api_archive/archive.py @@ -48,7 +48,7 @@ class ArchiveService(object): In s3 terms, this would be the name of the bucket """ - storage_hash: str + storage_hash: str | None """ A hash key of the repo for internal storage """ @@ -64,7 +64,10 @@ def __init__(self, repository, ttl=None): self.ttl = ttl or int(get_config("services", "minio", "ttl", default=self.ttl)) self.storage = StorageService() - self.storage_hash = self.get_archive_hash(repository) + if repository: + self.storage_hash = self.get_archive_hash(repository) + else: + self.storage_hash = None @classmethod def get_archive_hash(cls, repository) -> str: @@ -98,6 +101,8 @@ def write_json_data_to_storage( *, encoder=ReportEncoder, ): + if not self.storage_hash: + raise ValueError("No hash key provided") if commit_id is None: # Some classes don't have a commit associated with them # For example Pull belongs to multiple commits. @@ -147,20 +152,29 @@ def read_file(self, path: str) -> str: return contents.decode() @sentry_sdk.trace - def delete_file(self, path): + def delete_file(self, path: str) -> None: """ Generic method to delete a file from the archive. """ self.storage.delete_file(self.root, path) + @sentry_sdk.trace + def delete_files(self, paths: list[str]) -> None: + """ + Generic method to delete files from the archive. + """ + self.storage.delete_files(bucket_name=self.root, paths=paths) + def read_chunks(self, commit_sha: str) -> str: """ Convenience method to read a chunks file from the archive. """ + if not self.storage_hash: + raise ValueError("No hash key provided") path = MinioEndpoints.chunks.get_path( version="v4", repo_hash=self.storage_hash, commitid=commit_sha ) return self.read_file(path) - def create_presigned_put(self, path): + def create_presigned_put(self, path: str) -> str: return self.storage.create_presigned_put(self.root, path, self.ttl) diff --git a/shared/storage/aws.py b/shared/storage/aws.py index 6d5b8c3e..a4b011f6 100644 --- a/shared/storage/aws.py +++ b/shared/storage/aws.py @@ -147,7 +147,7 @@ def delete_file(self, bucket_name, path): except ClientError: raise - def delete_files(self, bucket_name, paths=[]): + def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]: """Batch deletes a list of files from a given bucket Note: diff --git a/shared/storage/fallback.py b/shared/storage/fallback.py index 87bc0eea..b9ce6a36 100644 --- a/shared/storage/fallback.py +++ b/shared/storage/fallback.py @@ -89,7 +89,7 @@ def delete_file(self, bucket_name, path): second_deletion = self.fallback_service.delete_file(bucket_name, path) return first_deletion and second_deletion - def delete_files(self, bucket_name, paths=[]): + def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]: """Batch deletes a list of files from a given bucket (what happens to the files that don't exist?) diff --git a/shared/storage/memory.py b/shared/storage/memory.py index f1d9de0f..afb8ea05 100644 --- a/shared/storage/memory.py +++ b/shared/storage/memory.py @@ -123,7 +123,7 @@ def delete_file(self, bucket_name, path): raise FileNotInStorageError() return True - def delete_files(self, bucket_name, paths=[]): + def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]: """Batch deletes a list of files from a given bucket (what happens to the files that don't exist?) diff --git a/shared/storage/minio.py b/shared/storage/minio.py index 58d51c61..f2ed67d3 100644 --- a/shared/storage/minio.py +++ b/shared/storage/minio.py @@ -233,21 +233,21 @@ def read_file( deletion, returns a ResponseError otherwise. """ - def delete_file(self, bucket_name, url): + def delete_file(self, bucket_name: str, path: str) -> bool: try: - # delete a file given a bucket name and a url - self.minio_client.remove_object(bucket_name, url) + # delete a file given a bucket name and a path + self.minio_client.remove_object(bucket_name, path) return True except MinioException: raise - def delete_files(self, bucket_name, urls=[]): + def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]: try: for del_err in self.minio_client.remove_objects( - bucket_name, [DeleteObject(url) for url in urls] + bucket_name, [DeleteObject(path) for path in paths] ): print("Deletion error: {}".format(del_err)) - return [True] * len(urls) + return [True] * len(paths) except MinioException: raise