Skip to content

Commit

Permalink
feat(findings): Optimize findings endpoint (#7019)
Browse files Browse the repository at this point in the history
  • Loading branch information
vicferpoy authored and cesararroba committed Mar 3, 2025
1 parent 78877c4 commit 80e24b9
Show file tree
Hide file tree
Showing 11 changed files with 362 additions and 62 deletions.
3 changes: 3 additions & 0 deletions api/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ All notable changes to the **Prowler API** are documented in this file.
- Social login integration with Google and GitHub [(#6906)](https://github.com/prowler-cloud/prowler/pull/6906)
- Configurable Sentry integration [(#6874)](https://github.com/prowler-cloud/prowler/pull/6874)

### Changed
- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019).

---

## [v1.4.0] (Prowler v5.3.0) - 2025-02-10
Expand Down
27 changes: 13 additions & 14 deletions api/src/backend/api/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,7 @@ def filter_scan_id(self, queryset, name, value):
)

return (
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan__id=value_uuid)
queryset.filter(id__gte=start).filter(id__lt=end).filter(scan_id=value_uuid)
)

def filter_scan_id_in(self, queryset, name, value):
Expand All @@ -474,31 +472,32 @@ def filter_scan_id_in(self, queryset, name, value):
]
)
if start == end:
return queryset.filter(id__gte=start).filter(scan__id__in=uuid_list)
return queryset.filter(id__gte=start).filter(scan_id__in=uuid_list)
else:
return (
queryset.filter(id__gte=start)
.filter(id__lt=end)
.filter(scan__id__in=uuid_list)
.filter(scan_id__in=uuid_list)
)

def filter_inserted_at(self, queryset, name, value):
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))

return queryset.filter(id__gte=start).filter(inserted_at__date=value)
return queryset.filter(id__gte=start, id__lt=end)

def filter_inserted_at_gte(self, queryset, name, value):
value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(value))
datetime_value = self.maybe_date_to_datetime(value)
start = uuid7_start(datetime_to_uuid7(datetime_value))

return queryset.filter(id__gte=start).filter(inserted_at__gte=value)
return queryset.filter(id__gte=start)

def filter_inserted_at_lte(self, queryset, name, value):
value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(value))
datetime_value = self.maybe_date_to_datetime(value)
end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))

return queryset.filter(id__lte=end).filter(inserted_at__lte=value)
return queryset.filter(id__lt=end)

def filter_resource_tag(self, queryset, name, value):
overall_query = Q()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from functools import partial

from django.db import connection, migrations


def create_index_on_partitions(
apps, schema_editor, parent_table: str, index_name: str, index_details: str
):
with connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]
# Iterate over partitions and create index concurrently.
# Note: PostgreSQL does not allow CONCURRENTLY inside a transaction,
# so we need atomic = False for this migration.
for partition in partitions:
sql = (
f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {partition.replace('.', '_')}_{index_name} ON {partition} "
f"{index_details};"
)
schema_editor.execute(sql)


def drop_index_on_partitions(apps, schema_editor, parent_table: str, index_name: str):
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = %s::regclass;
""",
[parent_table],
)
partitions = [row[0] for row in cursor.fetchall()]

# Iterate over partitions and drop index concurrently.
for partition in partitions:
partition_index = f"{partition.replace('.', '_')}_{index_name}"
sql = f"DROP INDEX CONCURRENTLY IF EXISTS {partition_index};"
schema_editor.execute(sql)


class Migration(migrations.Migration):
atomic = False

dependencies = [
("api", "0009_increase_provider_uid_maximum_length"),
]

operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
index_details="(tenant_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="findings_tenant_and_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
index_details="(tenant_id, scan_id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
index_details="(tenant_id, scan_id, id)",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_id_idx",
),
),
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
index_details="(tenant_id, id) where delta = 'new'",
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_delta_new_idx",
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("api", "0010_findings_performance_indexes_partitions"),
]

operations = [
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
),
migrations.AddIndex(
model_name="finding",
index=models.Index(
condition=models.Q(("delta", "new")),
fields=["tenant_id", "id"],
name="find_delta_new_idx",
),
),
migrations.AddIndex(
model_name="resourcetagmapping",
index=models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
),
migrations.AddIndex(
model_name="resource",
index=models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
),
]
20 changes: 20 additions & 0 deletions api/src/backend/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,10 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
fields=["uid", "region", "service", "name"],
name="resource_uid_reg_serv_name_idx",
),
models.Index(
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
]

Expand Down Expand Up @@ -599,6 +603,12 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
),
]

indexes = [
models.Index(
fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
),
]


class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
"""
Expand Down Expand Up @@ -697,7 +707,17 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
],
name="findings_filter_idx",
),
models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
models.Index(
fields=["tenant_id", "id"],
condition=Q(delta="new"),
name="find_delta_new_idx",
),
]

class JSONAPIMeta:
Expand Down
6 changes: 3 additions & 3 deletions api/src/backend/api/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2435,7 +2435,7 @@ def test_findings_list(self, authenticated_client, findings_fixture):
[
("resources", ["resources"]),
("scan", ["scans"]),
("resources.provider,scan", ["resources", "scans", "providers"]),
("resources,scan.provider", ["resources", "scans", "providers"]),
],
)
def test_findings_list_include(
Expand Down Expand Up @@ -2491,8 +2491,8 @@ def test_findings_list_include(
("search", "orange juice", 1),
# full text search on resource
("search", "ec2", 2),
# full text search on finding tags
("search", "value2", 2),
# full text search on finding tags (disabled for now)
# ("search", "value2", 2),
# Temporary disabled until we implement tag filtering in the UI
# ("resource_tag_key", "key", 2),
# ("resource_tag_key__in", "key,key2", 2),
Expand Down
2 changes: 1 addition & 1 deletion api/src/backend/api/uuid_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def uuid7_end(uuid_obj: UUID, offset_months: int = 1) -> UUID:
Args:
uuid_obj: A UUIDv7 object.
offset_days: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
offset_months: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
partitions are not being used, if so the value will be the one set at FINDINGS_TABLE_PARTITION_MONTHS.
Returns:
Expand Down
Loading

0 comments on commit 80e24b9

Please sign in to comment.