feat(findings): Optimize findings endpoint (#7019)

prowler-cloud · Mar 3, 2025 · 80e24b9 · 80e24b9
1 parent 78877c4
commit 80e24b9
Show file tree

Hide file tree

Showing 11 changed files with 362 additions and 62 deletions.
diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md
@@ -10,6 +10,9 @@ All notable changes to the **Prowler API** are documented in this file.
 - Social login integration with Google and GitHub [(#6906)](https://github.com/prowler-cloud/prowler/pull/6906)
 - Configurable Sentry integration [(#6874)](https://github.com/prowler-cloud/prowler/pull/6874)
 
+### Changed
+- Optimized `GET /findings` endpoint to improve response time and size [(#7019)](https://github.com/prowler-cloud/prowler/pull/7019).
+
 ---
 
 ## [v1.4.0] (Prowler v5.3.0) - 2025-02-10

diff --git a/api/src/backend/api/filters.py b/api/src/backend/api/filters.py
@@ -447,9 +447,7 @@ def filter_scan_id(self, queryset, name, value):
             )
 
         return (
-            queryset.filter(id__gte=start)
-            .filter(id__lt=end)
-            .filter(scan__id=value_uuid)
+            queryset.filter(id__gte=start).filter(id__lt=end).filter(scan_id=value_uuid)
         )
 
     def filter_scan_id_in(self, queryset, name, value):
@@ -474,31 +472,32 @@ def filter_scan_id_in(self, queryset, name, value):
                 ]
             )
         if start == end:
-            return queryset.filter(id__gte=start).filter(scan__id__in=uuid_list)
+            return queryset.filter(id__gte=start).filter(scan_id__in=uuid_list)
         else:
             return (
                 queryset.filter(id__gte=start)
                 .filter(id__lt=end)
-                .filter(scan__id__in=uuid_list)
+                .filter(scan_id__in=uuid_list)
             )
 
     def filter_inserted_at(self, queryset, name, value):
-        value = self.maybe_date_to_datetime(value)
-        start = uuid7_start(datetime_to_uuid7(value))
+        datetime_value = self.maybe_date_to_datetime(value)
+        start = uuid7_start(datetime_to_uuid7(datetime_value))
+        end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
 
-        return queryset.filter(id__gte=start).filter(inserted_at__date=value)
+        return queryset.filter(id__gte=start, id__lt=end)
 
     def filter_inserted_at_gte(self, queryset, name, value):
-        value = self.maybe_date_to_datetime(value)
-        start = uuid7_start(datetime_to_uuid7(value))
+        datetime_value = self.maybe_date_to_datetime(value)
+        start = uuid7_start(datetime_to_uuid7(datetime_value))
 
-        return queryset.filter(id__gte=start).filter(inserted_at__gte=value)
+        return queryset.filter(id__gte=start)
 
     def filter_inserted_at_lte(self, queryset, name, value):
-        value = self.maybe_date_to_datetime(value)
-        end = uuid7_start(datetime_to_uuid7(value))
+        datetime_value = self.maybe_date_to_datetime(value)
+        end = uuid7_start(datetime_to_uuid7(datetime_value + timedelta(days=1)))
 
-        return queryset.filter(id__lte=end).filter(inserted_at__lte=value)
+        return queryset.filter(id__lt=end)
 
     def filter_resource_tag(self, queryset, name, value):
         overall_query = Q()

diff --git a/api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py b/api/src/backend/api/migrations/0010_findings_performance_indexes_partitions.py
@@ -0,0 +1,109 @@
+from functools import partial
+
+from django.db import connection, migrations
+
+
+def create_index_on_partitions(
+    apps, schema_editor, parent_table: str, index_name: str, index_details: str
+):
+    with connection.cursor() as cursor:
+        cursor.execute(
+            """
+            SELECT inhrelid::regclass::text
+            FROM pg_inherits
+            WHERE inhparent = %s::regclass;
+        """,
+            [parent_table],
+        )
+        partitions = [row[0] for row in cursor.fetchall()]
+    # Iterate over partitions and create index concurrently.
+    # Note: PostgreSQL does not allow CONCURRENTLY inside a transaction,
+    # so we need atomic = False for this migration.
+    for partition in partitions:
+        sql = (
+            f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {partition.replace('.', '_')}_{index_name} ON {partition} "
+            f"{index_details};"
+        )
+        schema_editor.execute(sql)
+
+
+def drop_index_on_partitions(apps, schema_editor, parent_table: str, index_name: str):
+    with schema_editor.connection.cursor() as cursor:
+        cursor.execute(
+            """
+            SELECT inhrelid::regclass::text
+            FROM pg_inherits
+            WHERE inhparent = %s::regclass;
+        """,
+            [parent_table],
+        )
+        partitions = [row[0] for row in cursor.fetchall()]
+
+    # Iterate over partitions and drop index concurrently.
+    for partition in partitions:
+        partition_index = f"{partition.replace('.', '_')}_{index_name}"
+        sql = f"DROP INDEX CONCURRENTLY IF EXISTS {partition_index};"
+        schema_editor.execute(sql)
+
+
+class Migration(migrations.Migration):
+    atomic = False
+
+    dependencies = [
+        ("api", "0009_increase_provider_uid_maximum_length"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            partial(
+                create_index_on_partitions,
+                parent_table="findings",
+                index_name="findings_tenant_and_id_idx",
+                index_details="(tenant_id, id)",
+            ),
+            reverse_code=partial(
+                drop_index_on_partitions,
+                parent_table="findings",
+                index_name="findings_tenant_and_id_idx",
+            ),
+        ),
+        migrations.RunPython(
+            partial(
+                create_index_on_partitions,
+                parent_table="findings",
+                index_name="find_tenant_scan_idx",
+                index_details="(tenant_id, scan_id)",
+            ),
+            reverse_code=partial(
+                drop_index_on_partitions,
+                parent_table="findings",
+                index_name="find_tenant_scan_idx",
+            ),
+        ),
+        migrations.RunPython(
+            partial(
+                create_index_on_partitions,
+                parent_table="findings",
+                index_name="find_tenant_scan_id_idx",
+                index_details="(tenant_id, scan_id, id)",
+            ),
+            reverse_code=partial(
+                drop_index_on_partitions,
+                parent_table="findings",
+                index_name="find_tenant_scan_id_idx",
+            ),
+        ),
+        migrations.RunPython(
+            partial(
+                create_index_on_partitions,
+                parent_table="findings",
+                index_name="find_delta_new_idx",
+                index_details="(tenant_id, id) where delta = 'new'",
+            ),
+            reverse_code=partial(
+                drop_index_on_partitions,
+                parent_table="findings",
+                index_name="find_delta_new_idx",
+            ),
+        ),
+    ]
diff --git a/api/src/backend/api/migrations/0011_findings_performance_indexes_parent.py b/api/src/backend/api/migrations/0011_findings_performance_indexes_parent.py
@@ -0,0 +1,49 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("api", "0010_findings_performance_indexes_partitions"),
+    ]
+
+    operations = [
+        migrations.AddIndex(
+            model_name="finding",
+            index=models.Index(
+                fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="finding",
+            index=models.Index(
+                fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="finding",
+            index=models.Index(
+                fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="finding",
+            index=models.Index(
+                condition=models.Q(("delta", "new")),
+                fields=["tenant_id", "id"],
+                name="find_delta_new_idx",
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="resourcetagmapping",
+            index=models.Index(
+                fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="resource",
+            index=models.Index(
+                fields=["tenant_id", "service", "region", "type"],
+                name="resource_tenant_metadata_idx",
+            ),
+        ),
+    ]
diff --git a/api/src/backend/api/models.py b/api/src/backend/api/models.py
@@ -552,6 +552,10 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
                 fields=["uid", "region", "service", "name"],
                 name="resource_uid_reg_serv_name_idx",
             ),
+            models.Index(
+                fields=["tenant_id", "service", "region", "type"],
+                name="resource_tenant_metadata_idx",
+            ),
             GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
         ]
 
@@ -599,6 +603,12 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
             ),
         ]
 
+        indexes = [
+            models.Index(
+                fields=["tenant_id", "resource_id"], name="resource_tag_tenant_idx"
+            ),
+        ]
+
 
 class Finding(PostgresPartitionedModel, RowLevelSecurityProtectedModel):
     """
@@ -697,7 +707,17 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
                 ],
                 name="findings_filter_idx",
             ),
+            models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
             GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
+            models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
+            models.Index(
+                fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
+            ),
+            models.Index(
+                fields=["tenant_id", "id"],
+                condition=Q(delta="new"),
+                name="find_delta_new_idx",
+            ),
         ]
 
     class JSONAPIMeta:

diff --git a/api/src/backend/api/tests/test_views.py b/api/src/backend/api/tests/test_views.py
@@ -2435,7 +2435,7 @@ def test_findings_list(self, authenticated_client, findings_fixture):
         [
             ("resources", ["resources"]),
             ("scan", ["scans"]),
-            ("resources.provider,scan", ["resources", "scans", "providers"]),
+            ("resources,scan.provider", ["resources", "scans", "providers"]),
         ],
     )
     def test_findings_list_include(
@@ -2491,8 +2491,8 @@ def test_findings_list_include(
                 ("search", "orange juice", 1),
                 # full text search on resource
                 ("search", "ec2", 2),
-                # full text search on finding tags
-                ("search", "value2", 2),
+                # full text search on finding tags (disabled for now)
+                # ("search", "value2", 2),
                 # Temporary disabled until we implement tag filtering in the UI
                 # ("resource_tag_key", "key", 2),
                 # ("resource_tag_key__in", "key,key2", 2),

diff --git a/api/src/backend/api/uuid_utils.py b/api/src/backend/api/uuid_utils.py
@@ -106,7 +106,7 @@ def uuid7_end(uuid_obj: UUID, offset_months: int = 1) -> UUID:
 
     Args:
         uuid_obj: A UUIDv7 object.
-        offset_days: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
+        offset_months: Number of months to offset from the given UUID's date. Defaults to 1 to handle if
         partitions are not being used, if so the value will be the one set at FINDINGS_TABLE_PARTITION_MONTHS.
 
     Returns: