diff --git a/.github/Makefile b/.github/Makefile index 990613304dc..9090781f58e 100644 --- a/.github/Makefile +++ b/.github/Makefile @@ -12,7 +12,8 @@ all: workflows/nightly.yml \ workflows/tests-managed-pg.yml \ workflows/tests-ha.yml \ workflows/tests-pg-versions.yml \ - workflows/tests-patches.yml + workflows/tests-patches.yml \ + workflows/tests-inplace.yml workflows/%.yml: workflows.src/%.tpl.yml workflows.src/%.targets.yml workflows.src/build.inc.yml workflows.src/ls-build.inc.yml $(ROOT)/workflows.src/render.py $* $*.targets.yml @@ -34,3 +35,6 @@ workflows.src/tests-pg-versions.tpl.yml: workflows.src/tests.inc.yml workflows.src/tests-patches.tpl.yml: workflows.src/tests.inc.yml touch $(ROOT)/workflows.src/tests-patches.tpl.yml + +workflows.src/tests-inplace.tpl.yml: workflows.src/tests.inc.yml + touch $(ROOT)/workflows.src/tests-inplace.tpl.yml diff --git a/.github/workflows.src/tests-inplace.targets.yml b/.github/workflows.src/tests-inplace.targets.yml new file mode 100644 index 00000000000..99d4a714ac7 --- /dev/null +++ b/.github/workflows.src/tests-inplace.targets.yml @@ -0,0 +1 @@ +data: diff --git a/.github/workflows.src/tests-inplace.tpl.yml b/.github/workflows.src/tests-inplace.tpl.yml new file mode 100644 index 00000000000..8cd1225dafd --- /dev/null +++ b/.github/workflows.src/tests-inplace.tpl.yml @@ -0,0 +1,55 @@ +<% from "tests.inc.yml" import build, calc_cache_key, restore_cache -%> + +name: Tests of in-place upgrades + +on: + schedule: + - cron: "0 3 * * *" + workflow_dispatch: + inputs: {} + push: + branches: + - "A-inplace*" + +jobs: + build: + runs-on: ubuntu-latest + + steps: + <%- call build() -%> + - name: Compute cache keys + run: | + << calc_cache_key()|indent >> + <%- endcall %> + + test: + runs-on: ubuntu-latest + needs: build + + steps: + <<- restore_cache() >> + + # Run the test + # TODO: Would it be better to split this up into multiple jobs? + - name: Test performing in-place upgrades + run: | + ./tests/inplace-testing/test.sh vt + + workflow-notifications: + if: failure() && github.event_name != 'pull_request' + name: Notify in Slack on failures + needs: + - build + - test + runs-on: ubuntu-latest + permissions: + actions: 'read' + steps: + - name: Slack Workflow Notification + uses: Gamesight/slack-workflow-status@26a36836c887f260477432e4314ec3490a84f309 + with: + repo_token: ${{secrets.GITHUB_TOKEN}} + slack_webhook_url: ${{secrets.ACTIONS_SLACK_WEBHOOK_URL}} + name: 'Workflow notifications' + icon_emoji: ':hammer:' + include_jobs: 'on-failure' diff --git a/.github/workflows/tests-inplace.yml b/.github/workflows/tests-inplace.yml new file mode 100644 index 00000000000..bf17d4aa864 --- /dev/null +++ b/.github/workflows/tests-inplace.yml @@ -0,0 +1,496 @@ +name: Tests of in-place upgrades + +on: + schedule: + - cron: "0 3 * * *" + workflow_dispatch: + inputs: {} + push: + branches: + - "A-inplace*" + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: false + + - uses: actions/checkout@v4 + with: + fetch-depth: 50 + submodules: true + + - name: Set up Python + uses: actions/setup-python@v5 + id: setup-python + with: + python-version: '3.12.2' + cache: 'pip' + cache-dependency-path: | + pyproject.toml + + # The below is technically a lie as we are technically not + # inside a virtual env, but there is really no reason to bother + # actually creating and activating one as below works just fine. + - name: Export $VIRTUAL_ENV + run: | + venv="$(python -c 'import sys; sys.stdout.write(sys.prefix)')" + echo "VIRTUAL_ENV=${venv}" >> $GITHUB_ENV + + - name: Set up uv cache + uses: actions/cache@v4 + with: + path: ~/.cache/uv + key: uv-cache-${{ runner.os }}-py-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('pyproject.toml') }} + + - name: Cached requirements.txt + uses: actions/cache@v4 + id: requirements-cache + with: + path: requirements.txt + key: edb-requirements-${{ hashFiles('pyproject.toml') }} + + - name: Compute requirements.txt + if: steps.requirements-cache.outputs.cache-hit != 'true' + run: | + python -m pip install pip-tools + pip-compile --no-strip-extras --all-build-deps \ + --extra test,language-server \ + --output-file requirements.txt pyproject.toml + + - name: Install Python dependencies + run: | + python -c "import sys; print(sys.prefix)" + python -m pip install uv~=0.1.0 && uv pip install -U -r requirements.txt + + - name: Compute cache keys + run: | + mkdir -p .tmp + python setup.py -q ci_helper --type cli > .tmp/edgedbcli_git_rev.txt + python setup.py -q ci_helper --type rust >.tmp/rust_cache_key.txt + python setup.py -q ci_helper --type ext >.tmp/ext_cache_key.txt + python setup.py -q ci_helper --type parsers >.tmp/parsers_cache_key.txt + python setup.py -q ci_helper --type postgres >.tmp/postgres_git_rev.txt + python setup.py -q ci_helper --type libpg_query >.tmp/libpg_query_git_rev.txt + echo 'f8cd94309eaccbfba5dea7835b88c78377608a37' >.tmp/stolon_git_rev.txt + python setup.py -q ci_helper --type bootstrap >.tmp/bootstrap_cache_key.txt + echo EDGEDBCLI_GIT_REV=$(cat .tmp/edgedbcli_git_rev.txt) >> $GITHUB_ENV + echo POSTGRES_GIT_REV=$(cat .tmp/postgres_git_rev.txt) >> $GITHUB_ENV + echo LIBPG_QUERY_GIT_REV=$(cat .tmp/libpg_query_git_rev.txt) >> $GITHUB_ENV + echo STOLON_GIT_REV=$(cat .tmp/stolon_git_rev.txt) >> $GITHUB_ENV + echo BUILD_LIB=$(python setup.py -q ci_helper --type build_lib) >> $GITHUB_ENV + echo BUILD_TEMP=$(python setup.py -q ci_helper --type build_temp) >> $GITHUB_ENV + + - name: Upload shared artifacts + uses: actions/upload-artifact@v4 + with: + name: shared-artifacts + path: .tmp + retention-days: 1 + + # Restore binary cache + + - name: Handle cached EdgeDB CLI binaries + uses: actions/cache@v4 + id: cli-cache + with: + path: build/cli + key: edb-cli-v3-${{ env.EDGEDBCLI_GIT_REV }} + + - name: Handle cached Rust extensions + uses: actions/cache@v4 + id: rust-cache + with: + path: build/rust_extensions + key: edb-rust-v4-${{ hashFiles('.tmp/rust_cache_key.txt') }} + restore-keys: | + edb-rust-v4- + + - name: Handle cached Cython extensions + uses: actions/cache@v4 + id: ext-cache + with: + path: build/extensions + key: edb-ext-v5-${{ hashFiles('.tmp/ext_cache_key.txt') }} + + - name: Handle cached PostgreSQL build + uses: actions/cache@v4 + id: postgres-cache + with: + path: build/postgres/install + key: edb-postgres-v2-${{ env.POSTGRES_GIT_REV }} + + - name: Handle cached Stolon build + uses: actions/cache@v4 + id: stolon-cache + with: + path: build/stolon/bin + key: edb-stolon-v2-${{ env.STOLON_GIT_REV }} + + - name: Handle cached libpg_query build + uses: actions/cache@v4 + id: libpg-query-cache + with: + path: edb/pgsql/parser/libpg_query/libpg_query.a + key: edb-libpg_query-v1-${{ env.LIBPG_QUERY_GIT_REV }} + + # Install system dependencies for building + + - name: Install system deps + if: | + steps.cli-cache.outputs.cache-hit != 'true' || + steps.rust-cache.outputs.cache-hit != 'true' || + steps.ext-cache.outputs.cache-hit != 'true' || + steps.stolon-cache.outputs.cache-hit != 'true' || + steps.postgres-cache.outputs.cache-hit != 'true' + run: | + sudo apt-get update + sudo apt-get install -y uuid-dev libreadline-dev bison flex + + - name: Install Rust toolchain + if: | + steps.cli-cache.outputs.cache-hit != 'true' || + steps.rust-cache.outputs.cache-hit != 'true' + uses: dsherret/rust-toolchain-file@v1 + + # Build EdgeDB CLI + + - name: Handle EdgeDB CLI build cache + uses: actions/cache@v4 + if: steps.cli-cache.outputs.cache-hit != 'true' + with: + path: ${{ env.BUILD_TEMP }}/rust/cli + key: edb-cli-build-v7-${{ env.EDGEDBCLI_GIT_REV }} + restore-keys: | + edb-cli-build-v7- + + - name: Build EdgeDB CLI + env: + CARGO_HOME: ${{ env.BUILD_TEMP }}/rust/cli/cargo_home + CACHE_HIT: ${{ steps.cli-cache.outputs.cache-hit }} + run: | + if [[ "$CACHE_HIT" == "true" ]]; then + cp -v build/cli/bin/edgedb edb/cli/edgedb + else + python setup.py -v build_cli + fi + + # Build Rust extensions + + - name: Handle Rust extensions build cache + uses: actions/cache@v4 + if: steps.rust-cache.outputs.cache-hit != 'true' + with: + path: ${{ env.BUILD_TEMP }}/rust/extensions + key: edb-rust-build-v1-${{ hashFiles('.tmp/rust_cache_key.txt') }} + restore-keys: | + edb-rust-build-v1- + + - name: Build Rust extensions + env: + CARGO_HOME: ${{ env.BUILD_TEMP }}/rust/extensions/cargo_home + CACHE_HIT: ${{ steps.rust-cache.outputs.cache-hit }} + run: | + if [[ "$CACHE_HIT" != "true" ]]; then + rm -rf ${BUILD_LIB} + mkdir -p build/rust_extensions + rsync -av ./build/rust_extensions/ ${BUILD_LIB}/ + python setup.py -v build_rust + rsync -av ${BUILD_LIB}/ build/rust_extensions/ + rm -rf ${BUILD_LIB} + fi + rsync -av ./build/rust_extensions/edb/ ./edb/ + + # Build libpg_query + + - name: Build libpg_query + if: | + steps.libpg-query-cache.outputs.cache-hit != 'true' && + steps.ext-cache.outputs.cache-hit != 'true' + run: | + python setup.py build_libpg_query + + # Build extensions + + - name: Handle Cython extensions build cache + uses: actions/cache@v4 + if: steps.ext-cache.outputs.cache-hit != 'true' + with: + path: ${{ env.BUILD_TEMP }}/edb + key: edb-ext-build-v3-${{ hashFiles('.tmp/ext_cache_key.txt') }} + + - name: Build Cython extensions + env: + CACHE_HIT: ${{ steps.ext-cache.outputs.cache-hit }} + BUILD_EXT_MODE: py-only + run: | + if [[ "$CACHE_HIT" != "true" ]]; then + rm -rf ${BUILD_LIB} + mkdir -p ./build/extensions + rsync -av ./build/extensions/ ${BUILD_LIB}/ + BUILD_EXT_MODE=py-only python setup.py -v build_ext + rsync -av ${BUILD_LIB}/ ./build/extensions/ + rm -rf ${BUILD_LIB} + fi + rsync -av ./build/extensions/edb/ ./edb/ + + # Build parsers + + - name: Handle compiled parsers cache + uses: actions/cache@v4 + id: parsers-cache + with: + path: build/lib + key: edb-parsers-v3-${{ hashFiles('.tmp/parsers_cache_key.txt') }} + restore-keys: | + edb-parsers-v3- + + - name: Build parsers + env: + CACHE_HIT: ${{ steps.parsers-cache.outputs.cache-hit }} + run: | + if [[ "$CACHE_HIT" != "true" ]]; then + rm -rf ${BUILD_LIB} + mkdir -p ./build/lib + rsync -av ./build/lib/ ${BUILD_LIB}/ + python setup.py -v build_parsers + rsync -av ${BUILD_LIB}/ ./build/lib/ + rm -rf ${BUILD_LIB} + fi + rsync -av ./build/lib/edb/ ./edb/ + + # Build PostgreSQL + + - name: Build PostgreSQL + env: + CACHE_HIT: ${{ steps.postgres-cache.outputs.cache-hit }} + run: | + if [[ "$CACHE_HIT" == "true" ]]; then + cp build/postgres/install/stamp build/postgres/ + else + python setup.py build_postgres + cp build/postgres/stamp build/postgres/install/ + fi + + # Build Stolon + + - name: Set up Go + if: steps.stolon-cache.outputs.cache-hit != 'true' + uses: actions/setup-go@v2 + with: + go-version: 1.16 + + - uses: actions/checkout@v4 + if: steps.stolon-cache.outputs.cache-hit != 'true' + with: + repository: edgedb/stolon + path: build/stolon + ref: ${{ env.STOLON_GIT_REV }} + fetch-depth: 0 + submodules: false + + - name: Build Stolon + if: steps.stolon-cache.outputs.cache-hit != 'true' + run: | + mkdir -p build/stolon/bin/ + curl -fsSL https://releases.hashicorp.com/consul/1.10.1/consul_1.10.1_linux_amd64.zip | zcat > build/stolon/bin/consul + chmod +x build/stolon/bin/consul + cd build/stolon && make + + # Install edgedb-server and populate egg-info + + - name: Install edgedb-server + env: + BUILD_EXT_MODE: skip + run: | + # --no-build-isolation because we have explicitly installed all deps + # and don't want them to be reinstalled in an "isolated env". + pip install --no-build-isolation --no-deps -e .[test,docs] + + # Refresh the bootstrap cache + + - name: Handle bootstrap cache + uses: actions/cache@v4 + id: bootstrap-cache + with: + path: build/cache + key: edb-bootstrap-v2-${{ hashFiles('.tmp/bootstrap_cache_key.txt') }} + restore-keys: | + edb-bootstrap-v2- + + - name: Bootstrap EdgeDB Server + if: steps.bootstrap-cache.outputs.cache-hit != 'true' + run: | + edb server --bootstrap-only + + test: + runs-on: ubuntu-latest + needs: build + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: false + + - uses: actions/checkout@v4 + with: + fetch-depth: 50 + submodules: true + + - name: Set up Python + uses: actions/setup-python@v5 + id: setup-python + with: + python-version: '3.12.2' + cache: 'pip' + cache-dependency-path: | + pyproject.toml + + # The below is technically a lie as we are technically not + # inside a virtual env, but there is really no reason to bother + # actually creating and activating one as below works just fine. + - name: Export $VIRTUAL_ENV + run: | + venv="$(python -c 'import sys; sys.stdout.write(sys.prefix)')" + echo "VIRTUAL_ENV=${venv}" >> $GITHUB_ENV + + - name: Set up uv cache + uses: actions/cache@v4 + with: + path: ~/.cache/uv + key: uv-cache-${{ runner.os }}-py-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('pyproject.toml') }} + + - name: Download requirements.txt + uses: actions/cache@v4 + with: + path: requirements.txt + key: edb-requirements-${{ hashFiles('pyproject.toml') }} + + - name: Install Python dependencies + run: python -m pip install uv~=0.1.0 && uv pip install -U -r requirements.txt + + # Restore the artifacts and environment variables + + - name: Download shared artifacts + uses: actions/download-artifact@v4 + with: + name: shared-artifacts + path: .tmp + + - name: Set environment variables + run: | + echo EDGEDBCLI_GIT_REV=$(cat .tmp/edgedbcli_git_rev.txt) >> $GITHUB_ENV + echo POSTGRES_GIT_REV=$(cat .tmp/postgres_git_rev.txt) >> $GITHUB_ENV + echo STOLON_GIT_REV=$(cat .tmp/stolon_git_rev.txt) >> $GITHUB_ENV + echo BUILD_LIB=$(python setup.py -q ci_helper --type build_lib) >> $GITHUB_ENV + echo BUILD_TEMP=$(python setup.py -q ci_helper --type build_temp) >> $GITHUB_ENV + + # Restore build cache + + - name: Restore cached EdgeDB CLI binaries + uses: actions/cache@v4 + id: cli-cache + with: + path: build/cli + key: edb-cli-v3-${{ env.EDGEDBCLI_GIT_REV }} + + - name: Restore cached Rust extensions + uses: actions/cache@v4 + id: rust-cache + with: + path: build/rust_extensions + key: edb-rust-v4-${{ hashFiles('.tmp/rust_cache_key.txt') }} + + - name: Restore cached Cython extensions + uses: actions/cache@v4 + id: ext-cache + with: + path: build/extensions + key: edb-ext-v5-${{ hashFiles('.tmp/ext_cache_key.txt') }} + + - name: Restore compiled parsers cache + uses: actions/cache@v4 + id: parsers-cache + with: + path: build/lib + key: edb-parsers-v3-${{ hashFiles('.tmp/parsers_cache_key.txt') }} + + - name: Restore cached PostgreSQL build + uses: actions/cache@v4 + id: postgres-cache + with: + path: build/postgres/install + key: edb-postgres-v2-${{ env.POSTGRES_GIT_REV }} + + - name: Restore cached Stolon build + uses: actions/cache@v4 + id: stolon-cache + with: + path: build/stolon/bin + key: edb-stolon-v2-${{ env.STOLON_GIT_REV }} + + - name: Restore bootstrap cache + uses: actions/cache@v4 + id: bootstrap-cache + with: + path: build/cache + key: edb-bootstrap-v2-${{ hashFiles('.tmp/bootstrap_cache_key.txt') }} + + - name: Stop if we cannot retrieve the cache + if: | + steps.cli-cache.outputs.cache-hit != 'true' || + steps.rust-cache.outputs.cache-hit != 'true' || + steps.ext-cache.outputs.cache-hit != 'true' || + steps.parsers-cache.outputs.cache-hit != 'true' || + steps.postgres-cache.outputs.cache-hit != 'true' || + steps.stolon-cache.outputs.cache-hit != 'true' || + steps.bootstrap-cache.outputs.cache-hit != 'true' + run: | + echo ::error::Cannot retrieve build cache. + exit 1 + + - name: Restore cache into the source tree + run: | + cp -v build/cli/bin/edgedb edb/cli/edgedb + rsync -av ./build/rust_extensions/edb/ ./edb/ + rsync -av ./build/extensions/edb/ ./edb/ + rsync -av ./build/lib/edb/ ./edb/ + cp build/postgres/install/stamp build/postgres/ + + - name: Install edgedb-server + env: + BUILD_EXT_MODE: skip + run: | + # --no-build-isolation because we have explicitly installed all deps + # and don't want them to be reinstalled in an "isolated env". + pip install --no-build-isolation --no-deps -e .[test,docs] + + # Run the test + # TODO: Would it be better to split this up into multiple jobs? + - name: Test performing in-place upgrades + run: | + ./tests/inplace-testing/test.sh vt + + workflow-notifications: + if: failure() && github.event_name != 'pull_request' + name: Notify in Slack on failures + needs: + - build + - test + runs-on: ubuntu-latest + permissions: + actions: 'read' + steps: + - name: Slack Workflow Notification + uses: Gamesight/slack-workflow-status@26a36836c887f260477432e4314ec3490a84f309 + with: + repo_token: ${{secrets.GITHUB_TOKEN}} + slack_webhook_url: ${{secrets.ACTIONS_SLACK_WEBHOOK_URL}} + name: 'Workflow notifications' + icon_emoji: ':hammer:' + include_jobs: 'on-failure' diff --git a/edb/pgsql/delta.py b/edb/pgsql/delta.py index 89b10166c1f..0555dd420c9 100644 --- a/edb/pgsql/delta.py +++ b/edb/pgsql/delta.py @@ -586,11 +586,13 @@ def create_tuple( cls, tup: s_types.Tuple, schema: s_schema.Schema, + conditional: bool=False, ) -> dbops.Command: elements = tup.get_element_types(schema).items(schema) + name = common.get_backend_name(schema, tup, catenate=False) ctype = dbops.CompositeType( - name=common.get_backend_name(schema, tup, catenate=False), + name=name, columns=[ dbops.Column( name=n, @@ -601,7 +603,12 @@ def create_tuple( ] ) - return dbops.CreateCompositeType(type=ctype) + neg_conditions = [] + if conditional: + neg_conditions.append(dbops.TypeExists(name=name)) + + return dbops.CreateCompositeType( + type=ctype, neg_conditions=neg_conditions) def apply( self, @@ -613,7 +620,12 @@ def apply( if self.scls.is_polymorphic(schema): return schema - self.pgops.add(self.create_tuple(self.scls, schema)) + self.pgops.add(self.create_tuple( + self.scls, + schema, + # XXX: WHY + conditional=context.stdmode, + )) return schema @@ -2422,8 +2434,15 @@ def create_enum( new_enum_name = common.get_backend_name(schema, scalar, catenate=False) + neg_conditions = [] + if context.stdmode: + neg_conditions.append(dbops.EnumExists(name=new_enum_name)) + ops.add_command( - dbops.CreateEnum(dbops.Enum(name=new_enum_name, values=values)) + dbops.CreateEnum( + dbops.Enum(name=new_enum_name, values=values), + neg_conditions=neg_conditions, + ) ) fcls = cls.get_function_type(new_enum_name) diff --git a/edb/pgsql/metaschema.py b/edb/pgsql/metaschema.py index d6b07af1baf..76e38aabaed 100644 --- a/edb/pgsql/metaschema.py +++ b/edb/pgsql/metaschema.py @@ -257,6 +257,79 @@ def __init__(self) -> None: ) +class CreateTrampolineViewFunction(trampoline.VersionedFunction): + text = f''' + DECLARE + cols text; + tgt text; + dummy text; + BEGIN + tgt := quote_ident(tgt_schema) || '.' || quote_ident(tgt_name); + + -- Check if the view already exists. + select viewname into dummy + from pg_catalog.pg_views + where schemaname = tgt_schema + and viewname = tgt_name; + + IF FOUND THEN + -- If the view already existed, we need to generate a column + -- list that maintains the order of anything that was present in + -- the old view, and that doesn't remove any columns that were + -- dropped. + select + string_agg( + COALESCE( + quote_ident(tname), + 'NULL::' || vtypname || ' AS ' || quote_ident(vname) + ), + ',' + ) + from ( + select + a1.attname as tname, + a2.attname as vname, + pg_catalog.format_type(a2.atttypid, NULL) as vtypname + from ( + select * from pg_catalog.pg_attribute + where attrelid = src::regclass::oid + and attnum >= 0 + ) a1 + full outer join ( + select * from pg_catalog.pg_attribute + where attrelid = tgt::regclass::oid + ) a2 + on a1.attname = a2.attname + order by a2.attnum, a1.attnum + ) + INTO cols; + + END IF; + + -- If it doesn't exist or has no columns, create it with SELECT * + cols := COALESCE(cols, '*'); + + EXECUTE 'CREATE OR REPLACE VIEW ' || tgt || ' AS ' || + 'SELECT ' || cols || ' FROM ' || src; + + END; + ''' + + def __init__(self) -> None: + super().__init__( + name=('edgedb', '_create_trampoline_view'), + args=[ + ('src', ('text',)), + ('tgt_schema', ('text',)), + ('tgt_name', ('text',)), + ], + returns=('void',), + language='plpgsql', + volatility='volatile', + text=self.text, + ) + + class BigintDomain(dbops.Domain): """Bigint: a variant of numeric that enforces zero digits after the dot. @@ -4804,28 +4877,63 @@ def go(cmd: dbops.Command) -> None: return ncmds -def get_bootstrap_commands( - config_spec: edbconfig.Spec, -) -> tuple[dbops.CommandGroup, list[trampoline.Trampoline]]: +def get_fixed_bootstrap_commands() -> dbops.CommandGroup: + """Create metaschema objects that are truly global""" + cmds = [ dbops.CreateSchema(name='edgedb'), dbops.CreateSchema(name='edgedbt'), dbops.CreateSchema(name='edgedbpub'), dbops.CreateSchema(name='edgedbstd'), - dbops.CreateSchema(name='edgedbsql'), + dbops.CreateTable( + DBConfigTable(), + ), + # TODO: SHOULD THIS BE VERSIONED? + dbops.CreateTable(DMLDummyTable()), + # TODO: SHOULD THIS BE VERSIONED? + dbops.CreateTable(QueryCacheTable()), + + dbops.Query(DMLDummyTable.SETUP_QUERY), + + dbops.CreateDomain(BigintDomain()), + dbops.CreateDomain(ConfigMemoryDomain()), + dbops.CreateDomain(TimestampTzDomain()), + dbops.CreateDomain(TimestampDomain()), + dbops.CreateDomain(DateDomain()), + dbops.CreateDomain(DurationDomain()), + dbops.CreateDomain(RelativeDurationDomain()), + dbops.CreateDomain(DateDurationDomain()), + + dbops.CreateEnum(SysConfigSourceType()), + dbops.CreateEnum(SysConfigScopeType()), + + dbops.CreateCompositeType(SysConfigValueType()), + dbops.CreateCompositeType(SysConfigEntryType()), + dbops.CreateRange(Float32Range()), + dbops.CreateRange(Float64Range()), + dbops.CreateRange(DatetimeRange()), + dbops.CreateRange(LocalDatetimeRange()), + ] + + commands = dbops.CommandGroup() + commands.add_commands(cmds) + return commands + + +def get_bootstrap_commands( + config_spec: edbconfig.Spec, +) -> tuple[dbops.CommandGroup, list[trampoline.Trampoline]]: + cmds = [ dbops.CreateSchema(name=V('edgedb')), dbops.CreateSchema(name=V('edgedbpub')), dbops.CreateSchema(name=V('edgedbstd')), dbops.CreateSchema(name=V('edgedbsql')), dbops.CreateView(NormalizedPgSettingsView()), - dbops.CreateTable(DBConfigTable()), - dbops.CreateTable(DMLDummyTable()), - dbops.CreateTable(QueryCacheTable()), - dbops.Query(DMLDummyTable.SETUP_QUERY), dbops.CreateFunction(EvictQueryCacheFunction()), dbops.CreateFunction(ClearQueryCacheFunction()), + dbops.CreateFunction(CreateTrampolineViewFunction()), dbops.CreateFunction(UuidGenerateV1mcFunction('edgedbext')), dbops.CreateFunction(UuidGenerateV4Function('edgedbext')), dbops.CreateFunction(UuidGenerateV5Function('edgedbext')), @@ -4859,14 +4967,6 @@ def get_bootstrap_commands( dbops.CreateFunction(NormalizeNameFunction()), dbops.CreateFunction(GetNameModuleFunction()), dbops.CreateFunction(NullIfArrayNullsFunction()), - dbops.CreateDomain(BigintDomain()), - dbops.CreateDomain(ConfigMemoryDomain()), - dbops.CreateDomain(TimestampTzDomain()), - dbops.CreateDomain(TimestampDomain()), - dbops.CreateDomain(DateDomain()), - dbops.CreateDomain(DurationDomain()), - dbops.CreateDomain(RelativeDurationDomain()), - dbops.CreateDomain(DateDurationDomain()), dbops.CreateFunction(StrToConfigMemoryFunction()), dbops.CreateFunction(ConfigMemoryToStrFunction()), dbops.CreateFunction(StrToBigint()), @@ -4902,10 +5002,6 @@ def get_bootstrap_commands( dbops.CreateFunction(ToLocalDatetimeFunction()), dbops.CreateFunction(StrToBool()), dbops.CreateFunction(BytesIndexWithBoundsFunction()), - dbops.CreateEnum(SysConfigSourceType()), - dbops.CreateEnum(SysConfigScopeType()), - dbops.CreateCompositeType(SysConfigValueType()), - dbops.CreateCompositeType(SysConfigEntryType()), dbops.CreateFunction(TypeIDToConfigType()), dbops.CreateFunction(ConvertPostgresConfigUnitsFunction()), dbops.CreateFunction(InterpretConfigValueToJsonFunction()), @@ -4924,10 +5020,6 @@ def get_bootstrap_commands( dbops.CreateFunction(GetTypeToMultiRangeNameMap()), dbops.CreateFunction(GetPgTypeForEdgeDBTypeFunction()), dbops.CreateFunction(DescribeRolesAsDDLFunctionForwardDecl()), - dbops.CreateRange(Float32Range()), - dbops.CreateRange(Float64Range()), - dbops.CreateRange(DatetimeRange()), - dbops.CreateRange(LocalDatetimeRange()), dbops.CreateFunction(RangeToJsonFunction()), dbops.CreateFunction(MultiRangeToJsonFunction()), dbops.CreateFunction(RangeValidateFunction()), diff --git a/edb/pgsql/trampoline.py b/edb/pgsql/trampoline.py index e0119d2a92d..4983d9f3f41 100644 --- a/edb/pgsql/trampoline.py +++ b/edb/pgsql/trampoline.py @@ -49,6 +49,7 @@ q = common.qname qi = common.quote_ident +ql = common.quote_literal V = common.versioned_schema @@ -107,10 +108,13 @@ def make(self) -> dbops.Command: @dataclasses.dataclass class TrampolineView(Trampoline): - view: dbops.View + old_name: tuple[str, str] def make(self) -> dbops.Command: - return dbops.CreateView(self.view, or_replace=True) + return dbops.Query(f''' + PERFORM {V('edgedb')}._create_trampoline_view( + {ql(q(*self.old_name))}, {ql(self.name[0])}, {ql(self.name[1])}) + ''') def make_trampoline(func: dbops.Function) -> TrampolineFunction: @@ -143,13 +147,7 @@ def make_table_trampoline(fullname: tuple[str, str]) -> TrampolineView: assert schema.endswith(namespace), schema new_name = (schema[:-len(namespace)], name) - view = dbops.View( - name=new_name, - query=f''' - SELECT * FROM {q(*fullname)} - ''', - ) - return TrampolineView(new_name, view) + return TrampolineView(new_name, fullname) def make_view_trampoline(view: dbops.View) -> TrampolineView: diff --git a/edb/schema/delta.py b/edb/schema/delta.py index 627cb2fefc3..1365a94d579 100644 --- a/edb/schema/delta.py +++ b/edb/schema/delta.py @@ -3128,10 +3128,16 @@ def _create_begin( metaclass = self.get_schema_metaclass() props = self.get_resolved_attributes(schema, context) + if not props.get('id'): + if context.schema_object_ids is not None: + specified_id = self.get_prespecified_id(context) + if specified_id is not None: + props['id'] = specified_id + schema, self.scls = metaclass.create_in_schema( schema, stable_ids=context.stable_ids, **props) - if not props.get('id'): + if not self.get_attribute_value('id'): # Record the generated ID. self.set_attribute_value('id', self.scls.id) @@ -3172,11 +3178,6 @@ def canonicalize_attributes( ) -> s_schema.Schema: schema = super().canonicalize_attributes(schema, context) - if context.schema_object_ids is not None: - specified_id = self.get_prespecified_id(context) - if specified_id is not None: - self.set_attribute_value('id', specified_id) - self.set_attribute_value('builtin', context.stdmode) if not self.has_attribute_value('internal'): self.set_attribute_value('internal', context.internal_schema_mode) diff --git a/edb/server/args.py b/edb/server/args.py index af4d0e8f91a..b5c3431d650 100644 --- a/edb/server/args.py +++ b/edb/server/args.py @@ -227,6 +227,7 @@ class ServerConfig(NamedTuple): log_level: str log_to: str bootstrap_only: bool + inplace_upgrade: Optional[pathlib.Path] bootstrap_command: str bootstrap_command_file: pathlib.Path default_branch: Optional[str] @@ -674,6 +675,11 @@ def resolve_envvar_value(self, ctx: click.Context): '--bootstrap-only', is_flag=True, envvar="EDGEDB_SERVER_BOOTSTRAP_ONLY", cls=EnvvarResolver, help='bootstrap the database cluster and exit'), + click.option( + '--inplace-upgrade', type=PathPath(), + envvar="EDGEDB_SERVER_INPLACE_UPGRADE", + cls=EnvvarResolver, # XXX? + help='try to do an in-place upgrade with the specified dump file'), click.option( '--default-branch', type=str, help='the name of the default branch to create'), @@ -1451,6 +1457,7 @@ def parse_args(**kwargs: Any): "backend_dsn", "backend_adaptive_ha", "bootstrap_only", + "inplace_upgrade", "bootstrap_command", "bootstrap_command_file", "instance_name", diff --git a/edb/server/bootstrap.py b/edb/server/bootstrap.py index d8ef7d67c4c..26431d58462 100644 --- a/edb/server/bootstrap.py +++ b/edb/server/bootstrap.py @@ -543,7 +543,11 @@ async def _store_static_json_cache( await _execute(ctx.conn, text) -def _process_delta_params(delta, schema: s_schema.Schema, params) -> tuple[ +def _process_delta_params( + delta, schema: s_schema.Schema, params, + stdmode: bool=True, + **kwargs, +) -> tuple[ s_schema.ChainedSchema, delta_cmds.MetaCommand, delta_cmds.CreateTrampolines, @@ -562,8 +566,9 @@ def _process_delta_params(delta, schema: s_schema.Schema, params) -> tuple[ delta = delta_cmds.CommandMeta.adapt(delta) context = sd.CommandContext( - stdmode=True, + stdmode=stdmode, backend_runtime_params=params, + **kwargs, ) schema = sd.apply(delta, schema=schema, context=context) @@ -1133,15 +1138,18 @@ async def create_branch( # because the original template has the stdschema in it, and so we # use --on-conflict-do-nothing to avoid conflicts since the dump # will have that in it too. That works, except for multi properties - # where it won't conflict. - multi_properties = { + # where it won't conflict, and modules, which might have a different + # 'default' module on each side. (Since it isn't in the stdschema, + # and could have an old id persisted from an in-place upgrade.) + to_delete: set[s_obj.Object] = { prop for prop in schema.get_objects(type=s_props.Property) if prop.get_cardinality(schema).is_multi() and prop.get_name(schema).module not in irtyputils.VIEW_MODULES } + to_delete.add(schema.get('schema::Module')) - for mprop in multi_properties: - name = pg_common.get_backend_name(schema, mprop, catenate=True) + for target in to_delete: + name = pg_common.get_backend_name(schema, target, catenate=True) await conn.sql_execute(f'delete from {name}'.encode('utf-8')) await conn.sql_execute(trampoline.fixup_query(f''' @@ -1491,14 +1499,44 @@ def read_data_cache( src_hash, file_name, source_dir=cache_dir, pickled=pickled) +def cleanup_tpldbdump(tpldbdump: bytes) -> bytes: + # Excluding the "edgedbext" schema above apparently + # doesn't apply to extensions created in that schema, + # so we have to resort to commenting out extension + # statements in the dump. + tpldbdump = re.sub( + rb'^(CREATE|COMMENT ON) EXTENSION.*$', + rb'-- \g<0>', + tpldbdump, + flags=re.MULTILINE, + ) + + # PostgreSQL 14 emits multirange_type_name in RANGE definitions, + # elide these to preserve compatibility with earlier servers. + tpldbdump = re.sub( + rb',\s*multirange_type_name\s*=[^,\n]+', + rb'', + tpldbdump, + flags=re.MULTILINE, + ) + + return tpldbdump + + async def _init_stdlib( ctx: BootstrapContext, testmode: bool, global_ids: Mapping[str, uuid.UUID], -) -> Tuple[StdlibBits, config.Spec, edbcompiler.Compiler]: +) -> tuple[ + StdlibBits, + config.Spec, + edbcompiler.Compiler, + Optional[dbops.SQLBlock], # trampoline block +]: in_dev_mode = devmode.is_in_dev_mode() conn = ctx.conn cluster = ctx.cluster + args = ctx.args tpldbdump_cache = 'backend-tpldbdump.sql' src_hash = _calculate_src_hash() @@ -1510,12 +1548,18 @@ async def _init_stdlib( src_hash=src_hash, cache_dir=cache_dir, ) - tpldbdump = read_data_cache( + tpldbdump_package = read_data_cache( tpldbdump_cache, - pickled=False, + pickled=True, src_hash=src_hash, cache_dir=cache_dir, ) + if args.inplace_upgrade: + tpldbdump = None + + tpldbdump, tpldbdump_inplace = None, None + if tpldbdump_package: + tpldbdump, tpldbdump_inplace = tpldbdump_package stdlib_was_none = stdlib is None if stdlib is None: @@ -1538,17 +1582,24 @@ async def _init_stdlib( trampolines=bootstrap_trampolines + stdlib.trampolines ) - logger.info('Creating the necessary PostgreSQL extensions...') backend_params = cluster.get_runtime_params() - await metaschema.create_pg_extensions(conn, backend_params) + if not args.inplace_upgrade: + logger.info('Creating the necessary PostgreSQL extensions...') + await metaschema.create_pg_extensions(conn, backend_params) trampolines = [] trampolines.extend(stdlib.trampolines) - if tpldbdump is None: + eff_tpldbdump = tpldbdump_inplace if args.inplace_upgrade else tpldbdump + if eff_tpldbdump is None: logger.info('Populating internal SQL structures...') assert bootstrap_commands is not None block = dbops.PLTopBlock() + + if not args.inplace_upgrade: + fixed_bootstrap_commands = metaschema.get_fixed_bootstrap_commands() + fixed_bootstrap_commands.generate(block) + bootstrap_commands.generate(block) await _execute_block(conn, block) logger.info('Executing the standard library...') @@ -1567,25 +1618,7 @@ async def _init_stdlib( dump_object_owners=False, ) - # Excluding the "edgedbext" schema above apparently - # doesn't apply to extensions created in that schema, - # so we have to resort to commenting out extension - # statements in the dump. - tpldbdump = re.sub( - rb'^(CREATE|COMMENT ON) EXTENSION.*$', - rb'-- \g<0>', - tpldbdump, - flags=re.MULTILINE, - ) - - # PostgreSQL 14 emits multirange_type_name in RANGE definitions, - # elide these to preserve compatibility with earlier servers. - tpldbdump = re.sub( - rb',\s*multirange_type_name\s*=[^,\n]+', - rb'', - tpldbdump, - flags=re.MULTILINE, - ) + tpldbdump = cleanup_tpldbdump(tpldbdump) # The instance metadata doesn't go in the dump, so collect # it ourselves. @@ -1622,11 +1655,28 @@ async def _init_stdlib( tpldbdump += b'\n' + text.encode('utf-8') + # XXX: TODO: We are going to need to deal with tuple types + # in edgedbpub... + tpldbdump_inplace = await cluster.dump_database( + tpl_pg_db_name, + include_schemas=[ + pg_common.versioned_schema('edgedb'), + pg_common.versioned_schema('edgedbstd'), + pg_common.versioned_schema('edgedbsql'), + ], + dump_object_owners=False, + ) + tpldbdump_inplace = cleanup_tpldbdump(tpldbdump_inplace) + + # XXX: BE SMARTER ABOUT THIS, DON'T DO ALL THAT WORK + if args.inplace_upgrade: + tpldbdump = None + buildmeta.write_data_cache( - tpldbdump, + (tpldbdump, tpldbdump_inplace), src_hash, tpldbdump_cache, - pickled=False, + pickled=True, target_dir=cache_dir, ) @@ -1638,7 +1688,7 @@ async def _init_stdlib( ) else: logger.info('Initializing the standard library...') - await _execute(conn, tpldbdump.decode('utf-8')) + await _execute(conn, eff_tpldbdump.decode('utf-8')) # Restore the search_path as the dump might have altered it. await conn.sql_execute( b"SELECT pg_catalog.set_config('search_path', 'edgedb', false)") @@ -1657,6 +1707,8 @@ async def _init_stdlib( # those are picked up. config_spec = config.load_spec_from_schema(stdlib.stdschema) + logger.info('Finalizing database setup...') + # Make sure that schema backend_id properties are in sync with # the database. # XXX: is ScalarType sufficient here? @@ -1765,9 +1817,14 @@ async def _init_stdlib( tramps.add_commands([t.make() for t in trampolines]) block = dbops.PLTopBlock() tramps.generate(block) - await _execute_block(conn, block) - return stdlib, config_spec, compiler + if args.inplace_upgrade: + trampoline_block = block + else: + await _execute_block(conn, block) + trampoline_block = None + + return stdlib, config_spec, compiler, trampoline_block async def _init_defaults(schema, compiler, conn): @@ -2376,7 +2433,10 @@ async def _bootstrap_edgedb_super_roles(ctx: BootstrapContext) -> uuid.UUID: return superuser_uid -async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: +async def _bootstrap( + ctx: BootstrapContext, + no_template: bool=False, +) -> tuple[edbcompiler.CompilerState, Optional[dbops.SQLBlock]]: args = ctx.args cluster = ctx.cluster backend_params = cluster.get_runtime_params() @@ -2399,16 +2459,20 @@ async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: else: superuser_uid = uuidgen.uuid1mc() - if backend_params.has_create_database: - new_template_db_id = await _create_edgedb_template_database(ctx) + using_template = backend_params.has_create_database and not no_template + + if using_template: + if not args.inplace_upgrade: + new_template_db_id = await _create_edgedb_template_database(ctx) + # XXX: THIS IS WRONG, RIGHT? + else: + new_template_db_id = uuidgen.uuid1mc() tpl_db = cluster.get_db_name(edbdef.EDGEDB_TEMPLATE_DB) conn = PGConnectionProxy(cluster, tpl_db) - else: - new_template_db_id = uuidgen.uuid1mc() - if backend_params.has_create_database: tpl_ctx = dataclasses.replace(ctx, conn=conn) else: + new_template_db_id = uuidgen.uuid1mc() tpl_ctx = ctx in_dev_mode = devmode.is_in_dev_mode() @@ -2426,7 +2490,7 @@ async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: await _populate_misc_instance_data(tpl_ctx) - stdlib, config_spec, compiler = await _init_stdlib( + stdlib, config_spec, compiler, trampoline_block = await _init_stdlib( tpl_ctx, testmode=args.testmode, global_ids={ @@ -2482,7 +2546,8 @@ async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: ) schema = s_schema.EMPTY_SCHEMA - schema = await _init_defaults(schema, compiler, tpl_ctx.conn) + if not no_template: + schema = await _init_defaults(schema, compiler, tpl_ctx.conn) # Run analyze on the template database, so that new dbs start # with up-to-date statistics. @@ -2494,7 +2559,7 @@ async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: b"SELECT pg_advisory_unlock(3987734529)", ) - if backend_params.has_create_database: + if using_template: # Close the connection to the template database # and wait until it goes away on the server side # so that we can safely use the template for new @@ -2514,7 +2579,9 @@ async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: async with iteration: await _pg_ensure_database_not_connected(ctx.conn, tpl_db) - if backend_params.has_create_database: + if args.inplace_upgrade: + pass + elif backend_params.has_create_database: await _create_edgedb_database( ctx, edbdef.EDGEDB_SYSTEM_DB, @@ -2544,7 +2611,9 @@ async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: compiler=compiler, ) - if backend_params.has_create_database: + if args.inplace_upgrade: + pass + elif backend_params.has_create_database: await _create_edgedb_database( ctx, default_branch, @@ -2581,7 +2650,211 @@ async def _bootstrap(ctx: BootstrapContext) -> edbcompiler.CompilerState: args.default_database_user or edbdef.EDGEDB_SUPERUSER, ) - return compiler.state + return compiler.state, trampoline_block + + +async def _load_schema( + ctx: BootstrapContext, state: edbcompiler.CompilerState +) -> s_schema.ChainedSchema: + assert state.global_intro_query + json_data = await ctx.conn.sql_fetch_val( + state.global_intro_query.encode('utf-8')) + global_schema = s_refl.parse_into( + base_schema=state.std_schema, + schema=s_schema.EMPTY_SCHEMA, + data=json_data, + schema_class_layout=state.schema_class_layout, + ) + + return s_schema.ChainedSchema( + state.std_schema, + s_schema.EMPTY_SCHEMA, + global_schema, + ) + + +async def _upgrade_one( + ctx: BootstrapContext, + state: edbcompiler.CompilerState, + upgrade_data: Optional[Any], +) -> None: + if not upgrade_data: + return + + backend_params = ctx.cluster.get_runtime_params() + assert backend_params.has_create_database + + ddl = upgrade_data['ddl'] + # ids: + schema_object_ids = { + ( + sn.name_from_string(name), qltype if qltype else None + ): uuidgen.UUID(objid) + for name, qltype, objid in upgrade_data['ids'] + } + + logger.info('Populating schema tables...') + + # Load the schemas + schema = await _load_schema(ctx, state) + + compilerctx = edbcompiler.new_compiler_context( + compiler_state=state, + user_schema=schema.get_top_schema(), + bootstrap_mode=False, # MAYBE? + ) + + # Apply the DDL, but *only* execute the schema storage part!! + for ddl_cmd in edgeql.parse_block(ddl): + current_block = dbops.PLTopBlock() + + keys: dict[str, Any] = dict( + testmode=True, + allow_dml_in_functions=True, + ) + + if debug.flags.sdl_loading: + ddl_cmd.dump_edgeql() + + assert isinstance(ddl_cmd, qlast.DDLCommand) + delta_command = s_ddl.delta_from_ddl( + ddl_cmd, modaliases={}, schema=schema, + schema_object_ids=schema_object_ids, + **keys, + ) + schema, plan, _ = _process_delta_params( + delta_command, + schema, + backend_params, + stdmode=False, + **keys, + ) + + compilerctx.state.current_tx().update_schema(schema) + + context = sd.CommandContext(**keys) + edbcompiler.compile_schema_storage_in_delta( + ctx=compilerctx, + delta=plan, + block=current_block, + context=context, + ) + + # TODO: Should we batch them all up? + patch = current_block.to_string() + + if debug.flags.delta_execute: + debug.header('Patch Script') + debug.dump_code(patch, lexer='sql') + + try: + await ctx.conn.sql_execute(patch.encode('utf-8')) + except Exception: + raise + + new_local_spec = config.load_spec_from_schema( + schema, + only_exts=True, + # suppress validation because we might be in an intermediate state + validate=False, + ) + spec_json = config.spec_to_json(new_local_spec) + await ctx.conn.sql_execute(trampoline.fixup_query(f'''\ + UPDATE + edgedbinstdata_VER.instdata + SET + json = {pg_common.quote_literal(spec_json)} + WHERE + key = 'configspec_ext'; + ''').encode('utf-8')) + + +async def _cleanup_one( + ctx: BootstrapContext, + state: edbcompiler.CompilerState, + trampoline_block: dbops.SQLBlock, +) -> None: + conn = ctx.conn + + await _execute_block(conn, trampoline_block) + + namespaces = json.loads(await conn.sql_fetch_val(""" + select json_agg(nspname) from pg_namespace + where nspname like 'edgedb%\\_v%' + """.encode('utf-8'))) + + cur_suffix = pg_common.versioned_schema("") + to_delete = [x for x in namespaces if not x.endswith(cur_suffix)] + + # TODO: Should we try to query functions/tables/views/types and + # pg_depend to make sure that nothing wrong is going to get + # cascaded?? + # It is *really* dumb the way that CASCADE works in postgres. + await conn.sql_execute(f""" + drop schema {', '.join(to_delete)} cascade + """.encode('utf-8')) + + +async def _upgrade_all( + ctx: BootstrapContext, + state: edbcompiler.CompilerState, + trampoline_block: dbops.SQLBlock, +) -> None: + cluster = ctx.cluster + + tpl_db = cluster.get_db_name(edbdef.EDGEDB_TEMPLATE_DB) + conn = PGConnectionProxy(cluster, tpl_db) + + # FIXME: Use the sys query instead? + try: + databases = json.loads(await conn.sql_fetch_val( + trampoline.fixup_query(""" + SELECT json_agg(name) FROM edgedb_VER."_SysBranch"; + """).encode('utf-8'), + )) + finally: + conn.terminate() + + assert ctx.args.inplace_upgrade + with open(ctx.args.inplace_upgrade) as f: + upgrade_data = json.load(f) + + # DEBUG VELOCITY HACK: You can add a failing database to EARLY + # when trying to upgrade the whole suite. + EARLY: tuple[str, ...] = () + databases.sort(key=lambda k: (k not in EARLY, k)) + + for database in databases: + if database == edbdef.EDGEDB_TEMPLATE_DB: + continue + + conn = PGConnectionProxy(cluster, ctx.cluster.get_db_name(database)) + try: + subctx = dataclasses.replace(ctx, conn=conn) + + logger.info(f"Upgrading database '{database}'") + await _bootstrap(ctx=subctx, no_template=True) + + logger.info(f"Populating schema tables for '{database}'") + await _upgrade_one( + ctx=subctx, + state=state, + upgrade_data=upgrade_data.get(database), + ) + + # XXX: This is not the right place to do this. We only + # want to do this if everything succeeds. + await _cleanup_one(subctx, state, trampoline_block) + finally: + conn.terminate() + + conn = PGConnectionProxy( + cluster, ctx.cluster.get_db_name(edbdef.EDGEDB_TEMPLATE_DB)) + try: + subctx = dataclasses.replace(ctx, conn=conn) + await _cleanup_one(subctx, state, trampoline_block) + finally: + conn.terminate() async def ensure_bootstrapped( @@ -2599,8 +2872,13 @@ async def ensure_bootstrapped( try: mode = await _get_cluster_mode(ctx) ctx = dataclasses.replace(ctx, mode=mode) - if mode == ClusterMode.pristine: - state = await _bootstrap(ctx) + if mode == ClusterMode.pristine or args.inplace_upgrade: + state, trampoline_block = await _bootstrap(ctx) + + if args.inplace_upgrade: + assert trampoline_block + await _upgrade_all(ctx, state, trampoline_block) + return True, state else: state = await _start(ctx) diff --git a/edb/server/compiler/compiler.py b/edb/server/compiler/compiler.py index 629ca91111d..b6ce3d3346a 100644 --- a/edb/server/compiler/compiler.py +++ b/edb/server/compiler/compiler.py @@ -1153,26 +1153,8 @@ def describe_database_dump( schema_ddl = s_ddl.ddl_text_from_schema( schema, include_migrations=True) - all_objects: Iterable[s_obj.Object] = schema.get_objects( - exclude_stdlib=True, - exclude_global=True, - ) - ids = [] - sequences = [] - for obj in all_objects: - if isinstance(obj, s_obj.QualifiedObject): - ql_class = '' - else: - ql_class = str(type(obj).get_ql_class_or_die()) - - ids.append(( - str(obj.get_name(schema)), - ql_class, - obj.id.bytes, - )) - - if isinstance(obj, s_types.Type) and obj.is_sequence(schema): - sequences.append(obj.id) + ids, sequences = get_obj_ids(schema) + raw_ids = [(name, cls, id.bytes) for name, cls, id in ids] objtypes = schema.get_objects( type=s_objtypes.ObjectType, @@ -1202,7 +1184,7 @@ def describe_database_dump( return DumpDescriptor( schema_ddl='\n'.join([sys_config_ddl, schema_ddl, user_config_ddl]), schema_dynamic_ddl=tuple(dynamic_ddl), - schema_ids=ids, + schema_ids=raw_ids, blocks=descriptors, ) @@ -1814,6 +1796,8 @@ def _compile_ql_administer( return ddl.administer_reindex(ctx, ql) elif ql.expr.func == 'vacuum': return ddl.administer_vacuum(ctx, ql) + elif ql.expr.func == 'prepare_upgrade': + return ddl.administer_prepare_upgrade(ctx, ql) else: raise errors.QueryError( 'Unknown ADMINISTER function', @@ -3063,6 +3047,45 @@ def _extract_params( return oparams, in_type_args # type: ignore[return-value] +def get_obj_ids( + schema: s_schema.Schema, + *, + include_extras: bool=False, +) -> tuple[list[tuple[str, str, uuid.UUID]], list[uuid.UUID]]: + all_objects: Iterable[s_obj.Object] = schema.get_objects( + exclude_stdlib=True, + exclude_global=True, + ) + ids = [] + sequences = [] + for obj in all_objects: + if isinstance(obj, s_obj.QualifiedObject): + ql_class = '' + else: + ql_class = str(type(obj).get_ql_class_or_die()) + + name = str(obj.get_name(schema)) + ids.append(( + name, + ql_class, + obj.id, + )) + + if isinstance(obj, s_types.Type) and obj.is_sequence(schema): + sequences.append(obj.id) + + if include_extras and isinstance(obj, s_func.Function): + backend_name = obj.get_backend_name(schema) + if backend_name: + ids.append(( + name, + f'{ql_class or None}-backend_name', + backend_name, + )) + + return ids, sequences + + def _describe_object( schema: s_schema.Schema, source: s_obj.Object, diff --git a/edb/server/compiler/ddl.py b/edb/server/compiler/ddl.py index 93dea044ced..ec67fd4b2cd 100644 --- a/edb/server/compiler/ddl.py +++ b/edb/server/compiler/ddl.py @@ -1541,3 +1541,37 @@ def administer_vacuum( sql=(command.encode('utf-8'),), is_transactional=False, ) + + +def administer_prepare_upgrade( + ctx: compiler.CompileContext, + ql: qlast.AdministerStmt, +) -> dbstate.BaseQuery: + + user_schema = ctx.state.current_tx().get_user_schema() + global_schema = ctx.state.current_tx().get_global_schema() + + schema = s_schema.ChainedSchema( + ctx.compiler_state.std_schema, + user_schema, + global_schema + ) + + schema_ddl = s_ddl.ddl_text_from_schema( + schema, include_migrations=True) + ids, _ = compiler.get_obj_ids(schema, include_extras=True) + json_ids = [(name, cls, str(id)) for name, cls, id in ids] + + obj = dict( + ddl=schema_ddl, ids=json_ids + ) + + desc_ql = edgeql.parse_query( + f'SELECT to_json({qlquote.quote_literal(json.dumps(obj))})' + ) + return compiler._compile_ql_query( + ctx, + desc_ql, + cacheable=False, + migration_block_query=True, + ) diff --git a/tests/inplace-testing/make-and-prep.sh b/tests/inplace-testing/make-and-prep.sh new file mode 100755 index 00000000000..7378a22cea7 --- /dev/null +++ b/tests/inplace-testing/make-and-prep.sh @@ -0,0 +1,14 @@ +#!/bin/bash -ex + +DIR="$1" +shift +PORT=12346 + +edb inittestdb -D "$DIR" "$@" +edb server -D "$DIR" -P $PORT & +SPID=$! + +EDGEDB_PORT=$PORT EDGEDB_CLIENT_TLS_SECURITY=insecure python3 tests/inplace-testing/prep-upgrades.py > "${DIR}/upgrade.json" + +kill $SPID +wait $SPID diff --git a/tests/inplace-testing/prep-upgrades.py b/tests/inplace-testing/prep-upgrades.py new file mode 100644 index 00000000000..484be16dd40 --- /dev/null +++ b/tests/inplace-testing/prep-upgrades.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +import edgedb +import json +import sys + + +def main(argv): + con = edgedb.create_client() + + dbs = con.query(''' + select sys::Database.name + ''') + + con.close() + + datas = {} + for db in dbs: + con = edgedb.create_client(database=db) + output = json.loads(con.query_single(''' + administer prepare_upgrade() + ''')) + datas[db] = output + + print(json.dumps(datas)) + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/tests/inplace-testing/test.sh b/tests/inplace-testing/test.sh new file mode 100755 index 00000000000..877305cb60d --- /dev/null +++ b/tests/inplace-testing/test.sh @@ -0,0 +1,23 @@ +#!/bin/bash -ex + +DIR="$1" +shift + +if ! git diff-index --quiet HEAD --; then + set +x + echo Refusing to run in-place upgrade test with dirty git state. + echo "(The test makes local modifications.)" + exit 1 +fi + +./tests/inplace-testing/make-and-prep.sh "$DIR" "$@" + +tar cf "$DIR".tar "$DIR" + +patch -f -p1 < tests/inplace-testing/upgrade.patch + +edb server --bootstrap-only --inplace-upgrade "$DIR"/upgrade.json --data-dir "$DIR" + +tar cf "$DIR"-cooked.tar "$DIR" + +edb test --data-dir "$DIR" --use-data-dir-dbs -v diff --git a/tests/inplace-testing/upgrade.patch b/tests/inplace-testing/upgrade.patch new file mode 100644 index 00000000000..ce5ab53a8a8 --- /dev/null +++ b/tests/inplace-testing/upgrade.patch @@ -0,0 +1,111 @@ +diff --git a/edb/buildmeta.py b/edb/buildmeta.py +index a56a33964..855e7730f 100644 +--- a/edb/buildmeta.py ++++ b/edb/buildmeta.py +@@ -68,6 +68,12 @@ class MetadataError(Exception): + pass + + ++# HACK: Put this down here so it overrides the above version without ++# merge conflicting with them. ++EDGEDB_CATALOG_VERSION = 2030_01_01_00_00 ++EDGEDB_MAJOR_VERSION = 1000 ++ ++ + class BackendVersion(NamedTuple): + major: int + minor: int +diff --git a/edb/lib/_testmode.edgeql b/edb/lib/_testmode.edgeql +index 508382ecd..addd01b4f 100644 +--- a/edb/lib/_testmode.edgeql ++++ b/edb/lib/_testmode.edgeql +@@ -214,6 +214,15 @@ create extension package _conf VERSION '1.0' { + + # std::_gen_series + ++CREATE FUNCTION ++std::_upgrade_test( ++) -> std::str ++{ ++ SET volatility := 'Immutable'; ++ USING ('asdf'); ++}; ++ ++ + CREATE FUNCTION + std::_gen_series( + `start`: std::int64, +diff --git a/edb/lib/schema.edgeql b/edb/lib/schema.edgeql +index 1cd386fc0..9e515f5d0 100644 +--- a/edb/lib/schema.edgeql ++++ b/edb/lib/schema.edgeql +@@ -536,6 +536,9 @@ CREATE TYPE schema::Global EXTENDING schema::AnnotationSubject { + CREATE TYPE schema::Function + EXTENDING schema::CallableObject, schema::VolatilitySubject + { ++ CREATE PROPERTY test_field_a -> std::str; ++ CREATE PROPERTY test_nativecode_size -> std::int64; ++ + CREATE PROPERTY preserves_optionality -> std::bool { + SET default := false; + }; +diff --git a/edb/schema/functions.py b/edb/schema/functions.py +index 9cca3de6d..c22d3f414 100644 +--- a/edb/schema/functions.py ++++ b/edb/schema/functions.py +@@ -1237,6 +1237,27 @@ class Function( + data_safe=True, + ): + ++ ## ++ test_field_a = so.SchemaField( ++ str, ++ default=None, ++ compcoef=0.4, ++ allow_ddl_set=True, ++ ) ++ ++ test_field_b = so.SchemaField( ++ str, ++ default=None, ++ compcoef=0.4, ++ allow_ddl_set=True, ++ ) ++ ++ test_nativecode_size = so.SchemaField( ++ int, ++ default=None, ++ ) ++ ## ++ + used_globals = so.SchemaField( + so.ObjectSet[s_globals.Global], + coerce=True, default=so.DEFAULT_CONSTRUCTOR, +@@ -1608,6 +1629,10 @@ class FunctionCommand( + nativecode.not_compiled() + ) + ++ if self.has_attribute_value('nativecode'): ++ code = self.get_attribute_value('nativecode') ++ self.set_attribute_value('test_nativecode_size', len(code.text)) ++ + # Resolving 'nativecode' has side effects on has_dml and + # volatility, so force it to happen as part of + # canonicalization of attributes. +diff --git a/edb/schema/operators.py b/edb/schema/operators.py +index a98cd3fec..bc9a3aea7 100644 +--- a/edb/schema/operators.py ++++ b/edb/schema/operators.py +@@ -68,12 +68,6 @@ class Operator( + code = so.SchemaField( + str, default=None, compcoef=0.4) + +- # An unused dummy field. We have this here to make it easier to +- # test the *removal* of internal schema fields during in-place +- # upgrades. +- _dummy_field = so.SchemaField( +- str, default=None) +- + # If this is a derivative operator, *derivative_of* would + # contain the name of the origin operator. + # For example, the `std::IN` operator has `std::=`