From d91024d8fca0e60dcd53ba870a842b3aba9905d3 Mon Sep 17 00:00:00 2001 From: Daniel Levi-Minzi <51272568+dleviminzi@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:00:02 -0500 Subject: [PATCH] Fix: Use indexed stub_id for task query instead of external id (#754) Resolve BE-2086 With these changes the query from the `listTaskWithRelatedQueryBuilder` should come out looking like this: ```sql SELECT t.*, w.external_id AS "workspace.external_id", w.name AS "workspace.name", s.external_id AS "stub.external_id", s.name AS "stub.name", s.config AS "stub.config", s.type AS "stub.type", d.external_id AS "deployment.external_id", d.name AS "deployment.name", d.version AS "deployment.version" FROM task t JOIN workspace w ON t.workspace_id = w.id JOIN stub s ON t.stub_id = s.id LEFT JOIN deployment d ON s.id = d.stub_id WHERE t.workspace_id = $1 AND t.stub_id IN (SELECT id FROM stub WHERE external_id IN ($2)) ``` instead of looking like this ```sql SELECT t.*, w.external_id AS "workspace.external_id", w.name AS "workspace.name", s.external_id AS "stub.external_id", s.name AS "stub.name", s.config AS "stub.config", s.type AS "stub.type", d.external_id AS "deployment.external_id", d.name AS "deployment.name", d.version AS "deployment.version" FROM task t JOIN workspace w ON t.workspace_id = w.id JOIN stub s ON t.stub_id = s.id LEFT JOIN deployment d ON s.id = d.stub_id WHERE t.workspace_id = $1 AND s.external_id IN ($2) ``` The difference is in how we are adding the filter for stub ids. The combination of using a subquery to de-reference external ids into stub ids and indexing the task table on stub id yields a large performance improvement (13s -> ~0.02s on my machine). --- pkg/repository/backend_postgres.go | 4 +- .../018_add_stubid_index_task.go | 22 +++++++++ pkg/repository/backend_postgres_test.go | 49 +++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 pkg/repository/backend_postgres_migrations/018_add_stubid_index_task.go create mode 100644 pkg/repository/backend_postgres_test.go diff --git a/pkg/repository/backend_postgres.go b/pkg/repository/backend_postgres.go index c3b45dcf8..080e52f48 100644 --- a/pkg/repository/backend_postgres.go +++ b/pkg/repository/backend_postgres.go @@ -548,7 +548,9 @@ func (c *PostgresBackendRepository) listTaskWithRelatedQueryBuilder(filters type } if len(filters.StubIds) > 0 { - qb = qb.Where(squirrel.Eq{"s.external_id": filters.StubIds}) + // Subquery to get the stub ids from the external ids + stubIdsSubquery := squirrel.Select("id").From("stub").Where(squirrel.Eq{"external_id": filters.StubIds}) + qb = qb.Where(squirrel.Expr("s.id in (?)", stubIdsSubquery)) } if len(filters.StubNames) > 0 { diff --git a/pkg/repository/backend_postgres_migrations/018_add_stubid_index_task.go b/pkg/repository/backend_postgres_migrations/018_add_stubid_index_task.go new file mode 100644 index 000000000..dc2834806 --- /dev/null +++ b/pkg/repository/backend_postgres_migrations/018_add_stubid_index_task.go @@ -0,0 +1,22 @@ +package backend_postgres_migrations + +import ( + "context" + "database/sql" + + "github.com/pressly/goose/v3" +) + +func init() { + goose.AddMigrationNoTxContext(upAddStubIdIndexTask, downAddStubIdIndexTask) +} + +func upAddStubIdIndexTask(ctx context.Context, db *sql.DB) error { + _, err := db.ExecContext(ctx, `CREATE INDEX CONCURRENTLY IF NOT EXISTS task_stub_id on task(stub_id);`) + return err +} + +func downAddStubIdIndexTask(ctx context.Context, db *sql.DB) error { + _, err := db.ExecContext(ctx, `DROP INDEX CONCURRENTLY IF EXISTS task_stub_id `) + return err +} diff --git a/pkg/repository/backend_postgres_test.go b/pkg/repository/backend_postgres_test.go new file mode 100644 index 000000000..b5247ba6b --- /dev/null +++ b/pkg/repository/backend_postgres_test.go @@ -0,0 +1,49 @@ +package repository + +import ( + "context" + "database/sql" + "fmt" + "testing" + + "github.com/beam-cloud/beta9/pkg/types" + "github.com/google/uuid" + "github.com/jmoiron/sqlx" + "github.com/stretchr/testify/require" +) + +func TestListTaskWithRelated(t *testing.T) { + // Remove this skip if you are testing on local data + t.Skip() + ctx := context.Background() + + dbName := "control_plane" + dbUser := "pguser" + connStr := fmt.Sprintf("postgres://%s@localhost:5433/%s?sslmode=disable", dbUser, dbName) + db, err := sql.Open("postgres", connStr) + require.NoError(t, err) + client := sqlx.NewDb(db, "postgres") + require.NotNil(t, client) + r := PostgresBackendRepository{ + client: client, + } + + workspaceId := uint(0) + stubId := uuid.New().String() + firstPageId := uint(0) + secondPageId := uint(0) + + res, err := r.ListTasksWithRelatedPaginated(ctx, types.TaskFilter{WorkspaceID: workspaceId, StubIds: []string{stubId}}) + require.NoError(t, err) + require.NotNil(t, res) + require.Equal(t, 10, len(res.Data)) + require.Equal(t, firstPageId, res.Data[0].Id) + require.NotNil(t, res.Next) + + res, err = r.ListTasksWithRelatedPaginated(ctx, types.TaskFilter{WorkspaceID: workspaceId, StubIds: []string{stubId}, Cursor: res.Next}) + require.NoError(t, err) + require.NotNil(t, res) + require.Equal(t, 10, len(res.Data)) + require.Equal(t, secondPageId, res.Data[0].Id) + require.NotNil(t, res.Next) +}