Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove pulls trigger #450

Merged
merged 6 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions shared/api_archive/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class ArchiveService(object):
In s3 terms, this would be the name of the bucket
"""

storage_hash: str
storage_hash: str | None
"""
A hash key of the repo for internal storage
"""
Expand All @@ -64,7 +64,10 @@ def __init__(self, repository, ttl=None):
self.ttl = ttl or int(get_config("services", "minio", "ttl", default=self.ttl))

self.storage = StorageService()
self.storage_hash = self.get_archive_hash(repository)
if repository:
self.storage_hash = self.get_archive_hash(repository)
else:
self.storage_hash = None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making ArchiveService dependent on a repository was complicating my flare cleanup task.
It seems like repository is only needed to make the hash for reading and writing to the archive - you don't need it to delete.
To make my flare cleanup task much lighter, I removed the requirement for a repository in order to use ArchiveService. I added checks to the ArchiveStorage functions where the hash is used, so that you can't do something like write to storage without a hash.


@classmethod
def get_archive_hash(cls, repository) -> str:
Expand Down Expand Up @@ -98,6 +101,8 @@ def write_json_data_to_storage(
*,
encoder=ReportEncoder,
):
if not self.storage_hash:
raise ValueError("No hash key provided")
if commit_id is None:
# Some classes don't have a commit associated with them
# For example Pull belongs to multiple commits.
Expand Down Expand Up @@ -147,20 +152,29 @@ def read_file(self, path: str) -> str:
return contents.decode()

@sentry_sdk.trace
def delete_file(self, path):
def delete_file(self, path: str) -> None:
"""
Generic method to delete a file from the archive.
"""
self.storage.delete_file(self.root, path)

@sentry_sdk.trace
def delete_files(self, paths: list[str]) -> None:
"""
Generic method to delete files from the archive.
"""
self.storage.delete_files(bucket_name=self.root, paths=paths)

def read_chunks(self, commit_sha: str) -> str:
"""
Convenience method to read a chunks file from the archive.
"""
if not self.storage_hash:
raise ValueError("No hash key provided")
path = MinioEndpoints.chunks.get_path(
version="v4", repo_hash=self.storage_hash, commitid=commit_sha
)
return self.read_file(path)

def create_presigned_put(self, path):
def create_presigned_put(self, path: str) -> str:
return self.storage.create_presigned_put(self.root, path, self.ttl)
3 changes: 2 additions & 1 deletion shared/celery_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
brolly_stats_rollup_task_name = (
f"app.cron.{TaskConfigGroup.daily.value}.BrollyStatsRollupTask"
)
flare_cleanup_task_name = f"app.cron.{TaskConfigGroup.daily.value}.FlareCleanupTask"


def get_task_group(task_name: str) -> Optional[str]:
Expand Down Expand Up @@ -169,7 +170,7 @@ class BaseCeleryConfig(object):
worker_prefetch_multiplier = int(
get_config("setup", "tasks", "celery", "prefetch", default=1)
)
# !!! NEVER 0 !!! 0 == infinate
# !!! NEVER 0 !!! 0 == infinite

# http://celery.readthedocs.org/en/latest/configuration.html#celeryd-task-soft-time-limit
task_soft_time_limit = int(
Expand Down
8 changes: 7 additions & 1 deletion shared/django_apps/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,11 @@ def external_id(self):
return self.pullid

def should_write_to_storage(self) -> bool:
"""
This only applies to the flare field.
Flare is used to draw static graphs (see GraphHandler view in api) and can be large.
Flare cleanup is handled by FlareCleanupTask in worker.
"""
if self.repository is None or self.repository.author is None:
return False
is_codecov_repo = self.repository.author.username == "codecov"
Expand All @@ -430,7 +435,8 @@ def should_write_to_storage(self) -> bool:
_flare = models.JSONField(db_column="flare", null=True)
_flare_storage_path = models.URLField(db_column="flare_storage_path", null=True)
flare = ArchiveField(
should_write_to_storage_fn=should_write_to_storage, default_value_class=dict
should_write_to_storage_fn=should_write_to_storage,
default_value_class=dict,
)

def save(self, *args, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 4.2.16 on 2024-12-06 00:15

from django.db import migrations

from shared.django_apps.migration_utils import RiskyRunSQL


class Migration(migrations.Migration):
dependencies = [
("legacy_migrations", "0007_delete_repo_trigger_stats"),
]

operations = [
RiskyRunSQL(
"""
DROP TRIGGER IF EXISTS pulls_before_update_drop_flare ON pulls;
DROP FUNCTION IF EXISTS pulls_drop_flare();
""",
reverse_sql=migrations.RunSQL.noop,
)
]
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from .commits import run_sql as commits_run_sql
from .owners import run_sql as owners_run_sql
from .pulls import run_sql as pulls_run_sql
from .repos import run_sql as repos_run_sql


def run_sql(schema_editor):
commits_run_sql(schema_editor)
owners_run_sql(schema_editor)
repos_run_sql(schema_editor)
pulls_run_sql(schema_editor)

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -57,24 +57,6 @@ def run_sql(schema_editor):
)
execute procedure owner_yaml_updated();


drop trigger pulls_before_insert on pulls;
drop function pulls_insert();
drop trigger pulls_before_update on pulls;
drop function pulls_update();

create or replace function pulls_drop_flare() returns trigger as $$
begin
new.flare = null;
return new;
end;
$$ language plpgsql;

create trigger pulls_before_update_drop_flare before update on pulls
for each row
when (new.state != 'open'::pull_state)
execute procedure pulls_drop_flare();

---- Function Changes -----
drop function if exists get_new_repos(int);
drop function if exists get_pull(int, int);
Expand Down
4 changes: 2 additions & 2 deletions shared/django_apps/utils/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_commitid(self) -> Optional[str]:

class ArchiveField:
"""This is a helper class that transparently handles models' fields that are saved in storage.
Classes that use the ArchiveField MUST implement ArchiveFieldInterface. It ill throw an error otherwise.
Classes that use the ArchiveField MUST implement ArchiveFieldInterface. It will throw an error otherwise.
It uses the Descriptor pattern: https://docs.python.org/3/howto/descriptor.html

Arguments:
Expand All @@ -58,7 +58,7 @@ class ArchiveField:
rehydrate_fn=rehidrate_data,
default_value='default'
)
For a full example check utils/tests/unit/test_model_utils.py
For a full example check utils/tests/unit/test_model_utils.py in worker
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion shared/storage/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def delete_file(self, bucket_name, path):
except ClientError:
raise

def delete_files(self, bucket_name, paths=[]):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This default value is dangerous and unnecessary. fixed the occurrences I found across the project.

def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
"""Batch deletes a list of files from a given bucket

Note:
Expand Down
2 changes: 1 addition & 1 deletion shared/storage/fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def delete_file(self, bucket_name, path):
second_deletion = self.fallback_service.delete_file(bucket_name, path)
return first_deletion and second_deletion

def delete_files(self, bucket_name, paths=[]):
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
"""Batch deletes a list of files from a given bucket
(what happens to the files that don't exist?)

Expand Down
2 changes: 1 addition & 1 deletion shared/storage/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def delete_file(self, bucket_name, path):
raise FileNotInStorageError()
return True

def delete_files(self, bucket_name, paths=[]):
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
"""Batch deletes a list of files from a given bucket
(what happens to the files that don't exist?)

Expand Down
12 changes: 6 additions & 6 deletions shared/storage/minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,21 +233,21 @@ def read_file(
deletion, returns a ResponseError otherwise.
"""

def delete_file(self, bucket_name, url):
def delete_file(self, bucket_name: str, path: str) -> bool:
try:
# delete a file given a bucket name and a url
self.minio_client.remove_object(bucket_name, url)
# delete a file given a bucket name and a path
self.minio_client.remove_object(bucket_name, path)
return True
except MinioException:
raise

def delete_files(self, bucket_name, urls=[]):
def delete_files(self, bucket_name: str, paths: list[str]) -> list[bool]:
try:
for del_err in self.minio_client.remove_objects(
bucket_name, [DeleteObject(url) for url in urls]
bucket_name, [DeleteObject(path) for path in paths]
):
print("Deletion error: {}".format(del_err))
return [True] * len(urls)
return [True] * len(paths)
except MinioException:
raise

Expand Down
5 changes: 4 additions & 1 deletion tests/unit/django_apps/core/test_core_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from django.test import TestCase

from shared.django_apps.core.models import Commit
from shared.django_apps.core.tests.factories import CommitFactory, RepositoryFactory
from shared.django_apps.core.tests.factories import (
CommitFactory,
RepositoryFactory,
)
from shared.django_apps.reports.tests.factories import CommitReportFactory
from shared.storage.exceptions import FileNotInStorageError

Expand Down
Loading