Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(findings): Optimize findings endpoint #7019

3 changes: 3 additions & 0 deletions api/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ All notable changes to the **Prowler API** are documented in this file.
### Added
- Social login integration with Google and GitHub [(#6906)](https://github.com/prowler-cloud/prowler/pull/6906)

### Changed
- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019).

---

## [v1.4.0] (Prowler v5.3.0) - 2025-02-10
Expand Down
27 changes: 13 additions & 14 deletions api/src/backend/api/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,7 @@
)

return (
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan__id=value_uuid)
queryset.filter(id__gte=start).filter(id__lt=end).filter(scan_id=value_uuid)
)

def filter_scan_id_in(self, queryset, name, value):
Expand All @@ -474,31 +472,32 @@
]
)
if start == end:
return queryset.filter(id__gte=start).filter(scan__id__in=uuid_list)
return queryset.filter(id__gte=start).filter(scan_id__in=uuid_list)

Check warning on line 475 in api/src/backend/api/filters.py

View check run for this annotation

Codecov / codecov/patch

api/src/backend/api/filters.py#L475

Added line #L475 was not covered by tests
else:
return (
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan__id__in=uuid_list)
.filter(scan_id__in=uuid_list)
)

def filter_inserted_at(self, queryset, name, value):
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))

return queryset.filter(id__gte=start).filter(inserted_at__date=value)
return queryset.filter(id__gte=start, id__lt=end)

def filter_inserted_at_gte(self, queryset, name, value):
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))

return queryset.filter(id__gte=start).filter(inserted_at__gte=value)
return queryset.filter(id__gte=start)

def filter_inserted_at_lte(self, queryset, name, value):
value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(value))
datetime_value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))

return queryset.filter(id__lte=end).filter(inserted_at__lte=value)
return queryset.filter(id__lt=end)

def filter_resource_tag(self, queryset, name, value):
overall_query = Q()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from functools import partial

from django.db import connection, migrations


def create_index_on_partitions(
apps, schema_editor, parent_table: str, index_name: str, index_details: str
):
with connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]
# Iterate over partitions and create index concurrently.
# Note: PostgreSQL does not allow CONCURRENTLY inside a transaction,
# so we need atomic = False for this migration.
for partition in partitions:
sql = (
f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {partition.replace('.', '_')}_{index_name} ON {partition} "
f"{index_details};"
)
schema_editor.execute(sql)


def drop_index_on_partitions(apps, schema_editor, parent_table: str, index_name: str):
with schema_editor.connection.cursor() as cursor:
cursor.execute(

Check warning on line 32 in api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py

View check run for this annotation

Codecov / codecov/patch

api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py#L31-L32

Added lines #L31 - L32 were not covered by tests
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]

Check warning on line 40 in api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py

View check run for this annotation

Codecov / codecov/patch

api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py#L40

Added line #L40 was not covered by tests

# Iterate over partitions and drop index concurrently.
for partition in partitions:
partition_index = f"{partition.replace('.', '_')}_{index_name}"
sql = f"DROP INDEX CONCURRENTLY IF EXISTS {partition_index};"
schema_editor.execute(sql)

Check warning on line 46 in api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py

View check run for this annotation

Codecov / codecov/patch

api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py#L43-L46

Added lines #L43 - L46 were not covered by tests


class Migration(migrations.Migration):
atomic = False

dependencies = [
("api", "0009_increase_provider_uid_maximum_length"),
]

operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
index_details="(tenant_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
index_details="(tenant_id, scan_id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
index_details="(tenant_id, scan_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
index_details="(tenant_id, id) where delta = 'new'",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("api", "0010_findings_performance_indexes_partitions"),
]

operations = [
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
condition=models.Q(("delta", "new")),
fields=["tenant_id", "id"],
name="find_delta_new_idx",
),
),
migrations.AddIndex(
model_name="resourcetagmapping",
index=models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
),
migrations.AddIndex(
model_name="resource",
index=models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
),
]
20 changes: 20 additions & 0 deletions api/src/backend/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,10 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
fields=["uid", "region", "service", "name"],
name="resource_uid_reg_serv_name_idx",
),
models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
]

Expand Down Expand Up @@ -599,6 +603,12 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
),
]

indexes = [
models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
]


class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
"""
Expand Down Expand Up @@ -697,7 +707,17 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
],
name="findings_filter_idx",
),
models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
models.Index(
fields=["tenant_id", "id"],
condition=Q(delta="new"),
name="find_delta_new_idx",
),
]

class JSONAPIMeta:
Expand Down
6 changes: 3 additions & 3 deletions api/src/backend/api/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2435,7 +2435,7 @@ def test_findings_list(self, authenticated_client, findings_fixture):
[
("resources", ["resources"]),
("scan", ["scans"]),
("resources.provider,scan", ["resources", "scans", "providers"]),
("resources,scan.provider", ["resources", "scans", "providers"]),
],
)
def test_findings_list_include(
Expand Down Expand Up @@ -2491,8 +2491,8 @@ def test_findings_list_include(
("search", "orange juice", 1),
# full text search on resource
("search", "ec2", 2),
# full text search on finding tags
("search", "value2", 2),
# full text search on finding tags (disabled for now)
# ("search", "value2", 2),
# Temporary disabled until we implement tag filtering in the UI
# ("resource_tag_key", "key", 2),
# ("resource_tag_key__in", "key,key2", 2),
Expand Down
2 changes: 1 addition & 1 deletion api/src/backend/api/uuid_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def uuid7_end(uuid_obj: UUID, offset_months: int = 1) -> UUID:
Args:
uuid_obj: A UUIDv7 object.
offset_days: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
offset_months: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
partitions are not being used, if so the value will be the one set at FINDINGS_TABLE_PARTITION_MONTHS.
Returns:
Expand Down
Loading
Loading