Skip to content

Commit

Permalink
Fix: Use indexed stub_id for task query instead of external id (#754)
Browse files Browse the repository at this point in the history
Resolve BE-2086

With these changes the query from the `listTaskWithRelatedQueryBuilder`
should come out looking like this:
```sql
SELECT
  t.*,
  w.external_id AS "workspace.external_id",
  w.name AS "workspace.name",
  s.external_id AS "stub.external_id",
  s.name AS "stub.name",
  s.config AS "stub.config",
  s.type AS "stub.type",
  d.external_id AS "deployment.external_id",
  d.name AS "deployment.name",
  d.version AS "deployment.version"
FROM
  task t
  JOIN workspace w ON t.workspace_id = w.id
  JOIN stub s ON t.stub_id = s.id
  LEFT JOIN deployment d ON s.id = d.stub_id
WHERE
  t.workspace_id = $1
  AND t.stub_id IN (SELECT id FROM stub WHERE external_id IN ($2))
```
instead of looking like this
```sql
SELECT
  t.*,
  w.external_id AS "workspace.external_id",
  w.name AS "workspace.name",
  s.external_id AS "stub.external_id",
  s.name AS "stub.name",
  s.config AS "stub.config",
  s.type AS "stub.type",
  d.external_id AS "deployment.external_id",
  d.name AS "deployment.name",
  d.version AS "deployment.version"
FROM
  task t
  JOIN workspace w ON t.workspace_id = w.id
  JOIN stub s ON t.stub_id = s.id
  LEFT JOIN deployment d ON s.id = d.stub_id
WHERE
  t.workspace_id = $1
  AND s.external_id IN ($2)
```
The difference is in how we are adding the filter for stub ids. The
combination of using a subquery to de-reference external ids into stub
ids and indexing the task table on stub id yields a large performance
improvement (13s -> ~0.02s on my machine).
  • Loading branch information
dleviminzi authored Dec 4, 2024
1 parent ef2f07b commit d91024d
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pkg/repository/backend_postgres.go
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,9 @@ func (c *PostgresBackendRepository) listTaskWithRelatedQueryBuilder(filters type
}

if len(filters.StubIds) > 0 {
qb = qb.Where(squirrel.Eq{"s.external_id": filters.StubIds})
// Subquery to get the stub ids from the external ids
stubIdsSubquery := squirrel.Select("id").From("stub").Where(squirrel.Eq{"external_id": filters.StubIds})
qb = qb.Where(squirrel.Expr("s.id in (?)", stubIdsSubquery))
}

if len(filters.StubNames) > 0 {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package backend_postgres_migrations

import (
"context"
"database/sql"

"github.com/pressly/goose/v3"
)

func init() {
goose.AddMigrationNoTxContext(upAddStubIdIndexTask, downAddStubIdIndexTask)
}

func upAddStubIdIndexTask(ctx context.Context, db *sql.DB) error {
_, err := db.ExecContext(ctx, `CREATE INDEX CONCURRENTLY IF NOT EXISTS task_stub_id on task(stub_id);`)
return err
}

func downAddStubIdIndexTask(ctx context.Context, db *sql.DB) error {
_, err := db.ExecContext(ctx, `DROP INDEX CONCURRENTLY IF EXISTS task_stub_id `)
return err
}
49 changes: 49 additions & 0 deletions pkg/repository/backend_postgres_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package repository

import (
"context"
"database/sql"
"fmt"
"testing"

"github.com/beam-cloud/beta9/pkg/types"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
"github.com/stretchr/testify/require"
)

func TestListTaskWithRelated(t *testing.T) {
// Remove this skip if you are testing on local data
t.Skip()
ctx := context.Background()

dbName := "control_plane"
dbUser := "pguser"
connStr := fmt.Sprintf("postgres://%s@localhost:5433/%s?sslmode=disable", dbUser, dbName)
db, err := sql.Open("postgres", connStr)
require.NoError(t, err)
client := sqlx.NewDb(db, "postgres")
require.NotNil(t, client)
r := PostgresBackendRepository{
client: client,
}

workspaceId := uint(0)
stubId := uuid.New().String()
firstPageId := uint(0)
secondPageId := uint(0)

res, err := r.ListTasksWithRelatedPaginated(ctx, types.TaskFilter{WorkspaceID: workspaceId, StubIds: []string{stubId}})
require.NoError(t, err)
require.NotNil(t, res)
require.Equal(t, 10, len(res.Data))
require.Equal(t, firstPageId, res.Data[0].Id)
require.NotNil(t, res.Next)

res, err = r.ListTasksWithRelatedPaginated(ctx, types.TaskFilter{WorkspaceID: workspaceId, StubIds: []string{stubId}, Cursor: res.Next})
require.NoError(t, err)
require.NotNil(t, res)
require.Equal(t, 10, len(res.Data))
require.Equal(t, secondPageId, res.Data[0].Id)
require.NotNil(t, res.Next)
}

0 comments on commit d91024d

Please sign in to comment.