Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Upgrades] Support rolling back a prepared upgrade #7731

Merged
merged 1 commit into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion .github/workflows.src/tests-inplace.tpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ jobs:
test:
runs-on: ubuntu-latest
needs: build
strategy:
fail-fast: false
matrix:
include:
- flags:
tests:
- flags: --rollback-and-test
tests:
# Do the reapply test on a smaller selection of tests, since
# it is slower.
- flags: --rollback-and-reapply
tests: -k test_link_on_target_delete -k test_edgeql_select -k test_dump

steps:
<<- restore_cache() >>
Expand All @@ -33,7 +45,7 @@ jobs:
# TODO: Would it be better to split this up into multiple jobs?
- name: Test performing in-place upgrades
run: |
./tests/inplace-testing/test.sh vt
./tests/inplace-testing/test.sh ${{ matrix.flags }} vt ${{ matrix.tests }}

workflow-notifications:
if: failure() && github.event_name != 'pull_request'
Expand Down
14 changes: 13 additions & 1 deletion .github/workflows/tests-inplace.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions edb/server/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ class ServerConfig(NamedTuple):
bootstrap_only: bool
inplace_upgrade_prepare: Optional[pathlib.Path]
inplace_upgrade_finalize: bool
inplace_upgrade_rollback: bool
bootstrap_command: str
bootstrap_command_file: pathlib.Path
default_branch: Optional[str]
Expand Down Expand Up @@ -679,12 +680,17 @@ def resolve_envvar_value(self, ctx: click.Context):
click.option(
'--inplace-upgrade-prepare', type=PathPath(),
envvar="EDGEDB_SERVER_INPLACE_UPGRADE_PREPARE",
cls=EnvvarResolver, # XXX?
cls=EnvvarResolver,
help='try to do an in-place upgrade with the specified dump file'),
click.option(
'--inplace-upgrade-rollback', type=bool, is_flag=True,
envvar="EDGEDB_SERVER_INPLACE_UPGRADE_ROLLBACK",
cls=EnvvarResolver,
help='rollback a prepared upgrade'),
click.option(
'--inplace-upgrade-finalize', type=bool, is_flag=True,
envvar="EDGEDB_SERVER_INPLACE_UPGRADE_FINALIZE",
cls=EnvvarResolver, # XXX?
cls=EnvvarResolver,
help='finalize an in-place upgrade'),
click.option(
'--default-branch', type=str,
Expand Down
111 changes: 80 additions & 31 deletions edb/server/inplace_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import (
Any,
Optional,
Sequence,
)

import dataclasses
Expand Down Expand Up @@ -51,6 +52,7 @@
from edb.server import defines as edbdef
from edb.server import instdata
from edb.server import pgcluster
from edb.server import pgcon

from edb.pgsql import common as pg_common
from edb.pgsql import dbops
Expand All @@ -59,6 +61,8 @@

logger = logging.getLogger('edb.server')

PGCon = bootstrap.PGConnectionProxy | pgcon.PGConnection


async def _load_schema(
ctx: bootstrap.BootstrapContext, state: edbcompiler.CompilerState
Expand Down Expand Up @@ -343,35 +347,10 @@ async def _upgrade_one(
'''


async def _cleanup_one(
ctx: bootstrap.BootstrapContext,
async def _delete_schemas(
conn: PGCon,
to_delete: Sequence[str]
) -> None:
conn = ctx.conn

# If the upgrade is already finalized, skip it. This lets us be
# resilient to crashes during the finalization process, which may
# leave some databases upgraded but not all.
if (await instdata.get_instdata(conn, 'upgrade_finalized', 'text')) == b'1':
logger.info(f"Database already pivoted")
return

trampoline_query = await instdata.get_instdata(
conn, 'trampoline_pivot_query', 'text')
fixup_query = await instdata.get_instdata(
conn, 'schema_fixup_query', 'text')

await conn.sql_execute(trampoline_query)
if fixup_query:
await conn.sql_execute(fixup_query)

namespaces = json.loads(await conn.sql_fetch_val("""
select json_agg(nspname) from pg_namespace
where nspname like 'edgedb%\\_v%'
""".encode('utf-8')))

cur_suffix = pg_common.versioned_schema("")
to_delete = [x for x in namespaces if not x.endswith(cur_suffix)]

# To add a bit more safety, check whether there are any
# dependencies on the modules we want to delete from outside those
# modules since the only way to delete non-empty schemas in
Expand Down Expand Up @@ -404,6 +383,44 @@ async def _cleanup_one(
drop schema {', '.join(to_delete)} cascade
""".encode('utf-8'))


async def _get_namespaces(
conn: PGCon,
) -> list[str]:
return json.loads(await conn.sql_fetch_val("""
select json_agg(nspname) from pg_namespace
where nspname like 'edgedb%\\_v%'
""".encode('utf-8')))


async def _finalize_one(
ctx: bootstrap.BootstrapContext,
) -> None:
conn = ctx.conn

# If the upgrade is already finalized, skip it. This lets us be
# resilient to crashes during the finalization process, which may
# leave some databases upgraded but not all.
if (await instdata.get_instdata(conn, 'upgrade_finalized', 'text')) == b'1':
logger.info(f"Database upgrade already finalized")
return

trampoline_query = await instdata.get_instdata(
conn, 'trampoline_pivot_query', 'text')
fixup_query = await instdata.get_instdata(
conn, 'schema_fixup_query', 'text')

await conn.sql_execute(trampoline_query)
if fixup_query:
await conn.sql_execute(fixup_query)

namespaces = await _get_namespaces(ctx.conn)

cur_suffix = pg_common.versioned_schema("")
to_delete = [x for x in namespaces if not x.endswith(cur_suffix)]

await _delete_schemas(conn, to_delete)

await bootstrap._store_static_text_cache(
ctx,
f'upgrade_finalized',
Expand Down Expand Up @@ -437,6 +454,35 @@ async def _get_databases(
return databases


async def _rollback_one(
ctx: bootstrap.BootstrapContext,
) -> None:
namespaces = await _get_namespaces(ctx.conn)

cur_suffix = pg_common.versioned_schema("")
to_delete = [x for x in namespaces if x.endswith(cur_suffix)]

await _delete_schemas(ctx.conn, to_delete)


async def _rollback_all(
ctx: bootstrap.BootstrapContext,
) -> None:
cluster = ctx.cluster
databases = await _get_databases(ctx)

for database in databases:
conn = bootstrap.PGConnectionProxy(
cluster, cluster.get_db_name(database))
try:
subctx = dataclasses.replace(ctx, conn=conn)

logger.info(f"Rolling back preparation of database '{database}'")
await _rollback_one(ctx=subctx)
finally:
conn.terminate()


async def _upgrade_all(
ctx: bootstrap.BootstrapContext,
) -> None:
Expand Down Expand Up @@ -471,7 +517,7 @@ async def _upgrade_all(
conn.terminate()


async def _finish_all(
async def _finalize_all(
ctx: bootstrap.BootstrapContext,
) -> None:
cluster = ctx.cluster
Expand All @@ -493,7 +539,7 @@ async def go(
if database == inject_failure_on:
raise AssertionError(f'failure injected on {database}')

await _cleanup_one(subctx)
await _finalize_one(subctx)
await conn.sql_execute(final_command)
finally:
conn.terminate()
Expand Down Expand Up @@ -529,11 +575,14 @@ async def inplace_upgrade(
mode = await bootstrap._get_cluster_mode(ctx)
ctx = dataclasses.replace(ctx, mode=mode)

if args.inplace_upgrade_rollback:
await _rollback_all(ctx)

if args.inplace_upgrade_prepare:
await _upgrade_all(ctx)

if args.inplace_upgrade_finalize:
await _finish_all(ctx)
await _finalize_all(ctx)

finally:
pgconn.terminate()
1 change: 1 addition & 0 deletions edb/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ async def run_server(
if (
args.inplace_upgrade_prepare
or args.inplace_upgrade_finalize
or args.inplace_upgrade_rollback
):
from . import inplace_upgrade
await inplace_upgrade.inplace_upgrade(cluster, args)
Expand Down
40 changes: 40 additions & 0 deletions tests/inplace-testing/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
#!/bin/bash -ex

while [[ $# -gt 0 ]]; do
case $1 in
--rollback-and-test)
ROLLBACK=1
shift
;;
--rollback-and-reapply)
REAPPLY=1
shift
;;
*)
break
;;
esac
done


DIR="$1"
shift

Expand Down Expand Up @@ -44,6 +61,29 @@ edb server --inplace-upgrade-prepare "$DIR"/upgrade.json --backend-dsn="$DSN"
# Check the server is still working
edgedb -H localhost -P $PORT --tls-security insecure -b select query 'select count(User)' | grep 2

if [ $ROLLBACK = 1 ]; then
edb server --inplace-upgrade-rollback --backend-dsn="$DSN"

# Rollback and then run the tests on the old database
kill $SPID
wait $SPID
SPID=
patch -R -f -p1 < tests/inplace-testing/upgrade.patch
make parsers
edb test --data-dir "$DIR" --use-data-dir-dbs -v "$@"
exit 0
fi

if [ $REAPPLY = 1 ]; then
# Rollback and then reapply
edb server --inplace-upgrade-rollback --backend-dsn="$DSN"

edb server --inplace-upgrade-prepare "$DIR"/upgrade.json --backend-dsn="$DSN"
fi

# Check the server is still working
edgedb -H localhost -P $PORT --tls-security insecure -b select query 'select count(User)' | grep 2

# Kill the old version so we can finalize the upgrade
kill $SPID
wait $SPID
Expand Down