From 44e9781e1fa850a560a81edf733764f89457a584 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Tue, 27 Aug 2024 22:23:51 +0300 Subject: [PATCH 1/3] fix: Dump/Restore fails when masking tables with a generated column * Added attributes for metadata handling. * The restore command now retrieves table definitions from metadata in storage. * Added OVERRIDING SYSTEM VALUE to the INSERT statements for columns with GENERATED ALWAYS AS IDENTITY, now set by default. * Generated columns in toolkit.Table are now excluded from Columns list * Grouped imports Closes #183 Co-authored-by: Vincent Wilson vinnyjth@users.noreply.github.com --- internal/db/postgres/cmd/dump.go | 6 ++- internal/db/postgres/cmd/restore.go | 28 ++++++++++++-- internal/db/postgres/context/pg_catalog.go | 2 +- internal/db/postgres/context/schema.go | 3 +- internal/db/postgres/context/table.go | 8 +++- internal/db/postgres/entries/table.go | 2 + .../postgres/restorers/table_insert_format.go | 37 +++++++++++++++---- internal/db/postgres/storage/metadata_json.go | 15 ++++++-- 8 files changed, 81 insertions(+), 20 deletions(-) diff --git a/internal/db/postgres/cmd/dump.go b/internal/db/postgres/cmd/dump.go index 0a4f6ae6..bc4ccd0b 100644 --- a/internal/db/postgres/cmd/dump.go +++ b/internal/db/postgres/cmd/dump.go @@ -40,6 +40,7 @@ import ( "github.com/greenmaskio/greenmask/internal/db/postgres/transformers/utils" "github.com/greenmaskio/greenmask/internal/domains" "github.com/greenmaskio/greenmask/internal/storages" + "github.com/greenmaskio/greenmask/pkg/toolkit" ) const MetadataJsonFileName = "metadata.json" @@ -58,6 +59,7 @@ type Dump struct { schemaToc *toc.Toc resultToc *toc.Toc dumpedObjectSizes map[int32]storageDto.ObjectSizeStat + tableOidToDumpId map[toolkit.Oid]int32 tocFileSize int64 version int blobs *entries.Blobs @@ -81,6 +83,7 @@ func NewDump(cfg *domains.Config, st storages.Storager, registry *utils.Transfor tmpDir: path.Join(cfg.Common.TempDirectory, fmt.Sprintf("%d", time.Now().UnixNano())), dumpedObjectSizes: map[int32]storageDto.ObjectSizeStat{}, registry: registry, + tableOidToDumpId: make(map[toolkit.Oid]int32), } } @@ -300,6 +303,7 @@ func (d *Dump) createTocEntries() error { } switch v := obj.(type) { case *entries.Table: + d.tableOidToDumpId[v.Oid] = entry.DumpId d.dumpedObjectSizes[entry.DumpId] = storageDto.ObjectSizeStat{ Original: v.OriginalSize, Compressed: v.CompressedSize, @@ -414,7 +418,7 @@ func (d *Dump) writeMetaData(ctx context.Context, startedAt, completedAt time.Ti cycles := d.context.Graph.GetCycledTables() metadata, err := storageDto.NewMetadata( d.resultToc, d.tocFileSize, startedAt, completedAt, d.config.Dump.Transformation, d.dumpedObjectSizes, - d.context.DatabaseSchema, d.dumpDependenciesGraph, d.sortedTablesDumpIds, cycles, + d.context.DatabaseSchema, d.dumpDependenciesGraph, d.sortedTablesDumpIds, cycles, d.tableOidToDumpId, ) if err != nil { return fmt.Errorf("unable build metadata: %w", err) diff --git a/internal/db/postgres/cmd/restore.go b/internal/db/postgres/cmd/restore.go index d95c62d6..a9bb8d2f 100644 --- a/internal/db/postgres/cmd/restore.go +++ b/internal/db/postgres/cmd/restore.go @@ -36,13 +36,13 @@ import ( "golang.org/x/sync/errgroup" "gopkg.in/yaml.v3" - "github.com/greenmaskio/greenmask/internal/domains" - "github.com/greenmaskio/greenmask/internal/db/postgres/pgrestore" "github.com/greenmaskio/greenmask/internal/db/postgres/restorers" "github.com/greenmaskio/greenmask/internal/db/postgres/storage" "github.com/greenmaskio/greenmask/internal/db/postgres/toc" + "github.com/greenmaskio/greenmask/internal/domains" "github.com/greenmaskio/greenmask/internal/storages" + "github.com/greenmaskio/greenmask/pkg/toolkit" ) const ( @@ -72,6 +72,10 @@ const metadataObjectName = "metadata.json" const dependenciesCheckInterval = 15 * time.Millisecond +var ( + ErrTableDefinitionIsEmtpy = errors.New("table definition is empty: please re-dump the data using the latest version of greenmask if you want to use --inserts") +) + type Restore struct { binPath string dsn string @@ -641,8 +645,12 @@ func (r *Restore) taskPusher(ctx context.Context, tasks chan restorers.RestoreTa switch *entry.Desc { case toc.TableDataDesc: if r.restoreOpt.Inserts || r.restoreOpt.OnConflictDoNothing { + t, err := r.getTableDefinitionFromMeta(entry.DumpId) + if err != nil { + return fmt.Errorf("cannot get table definition from meta: %w", err) + } task = restorers.NewTableRestorerInsertFormat( - entry, r.st, r.restoreOpt.ExitOnError, r.restoreOpt.OnConflictDoNothing, + entry, t, r.st, r.restoreOpt.ExitOnError, r.restoreOpt.OnConflictDoNothing, r.cfg.ErrorExclusions, r.restoreOpt.Pgzip, ) } else { @@ -671,6 +679,20 @@ func (r *Restore) taskPusher(ctx context.Context, tasks chan restorers.RestoreTa } } +func (r *Restore) getTableDefinitionFromMeta(dumpId int32) (*toolkit.Table, error) { + tableOid, ok := r.metadata.DumpIdsToTableOid[dumpId] + if !ok { + return nil, ErrTableDefinitionIsEmtpy + } + idx := slices.IndexFunc(r.metadata.DatabaseSchema, func(t *toolkit.Table) bool { + return t.Oid == tableOid + }) + if idx == -1 { + panic(fmt.Sprintf("table with oid %d is not found in metadata", tableOid)) + } + return r.metadata.DatabaseSchema[idx], nil +} + func (r *Restore) restoreWorker(ctx context.Context, tasks <-chan restorers.RestoreTask, id int) error { // TODO: You should execute TX for each COPY stmt conn, err := pgx.Connect(ctx, r.dsn) diff --git a/internal/db/postgres/context/pg_catalog.go b/internal/db/postgres/context/pg_catalog.go index bf43aed2..3733954b 100644 --- a/internal/db/postgres/context/pg_catalog.go +++ b/internal/db/postgres/context/pg_catalog.go @@ -158,7 +158,7 @@ func getTables( // Columns were already initialized during the transformer initialization continue } - columns, err := getColumnsConfig(ctx, tx, t.Oid, version) + columns, err := getColumnsConfig(ctx, tx, t.Oid, version, true) if err != nil { return nil, nil, fmt.Errorf("unable to collect table columns: %w", err) } diff --git a/internal/db/postgres/context/schema.go b/internal/db/postgres/context/schema.go index 50e76fe9..f263b1fd 100644 --- a/internal/db/postgres/context/schema.go +++ b/internal/db/postgres/context/schema.go @@ -41,7 +41,8 @@ func getDatabaseSchema( // fill columns for _, table := range res { - columns, err := getColumnsConfig(ctx, tx, table.Oid, version) + // We do not exclude generated columns here, because the schema must be compared with the original + columns, err := getColumnsConfig(ctx, tx, table.Oid, version, false) if err != nil { return nil, err } diff --git a/internal/db/postgres/context/table.go b/internal/db/postgres/context/table.go index ad14a4b9..321f5260 100644 --- a/internal/db/postgres/context/table.go +++ b/internal/db/postgres/context/table.go @@ -83,7 +83,7 @@ func validateAndBuildTablesConfig( table.Constraints = constraints // Assign columns and transformersMap if were found - columns, err := getColumnsConfig(ctx, tx, table.Oid, version) + columns, err := getColumnsConfig(ctx, tx, table.Oid, version, true) if err != nil { return nil, nil, err } @@ -230,7 +230,7 @@ func getTable(ctx context.Context, tx pgx.Tx, t *domains.Table) ([]*entries.Tabl return tables, warnings, nil } -func getColumnsConfig(ctx context.Context, tx pgx.Tx, oid toolkit.Oid, version int) ([]*toolkit.Column, error) { +func getColumnsConfig(ctx context.Context, tx pgx.Tx, oid toolkit.Oid, version int, excludeGenerated bool) ([]*toolkit.Column, error) { defaultTypeMap := pgtype.NewMap() var res []*toolkit.Column buf := bytes.NewBuffer(nil) @@ -260,6 +260,10 @@ func getColumnsConfig(ctx context.Context, tx pgx.Tx, oid toolkit.Oid, version i if err != nil { return nil, fmt.Errorf("cannot scan tableColumnQuery: %w", err) } + // Skipping generated columns as they do not contain a real data + if excludeGenerated && column.IsGenerated { + continue + } column.CanonicalTypeName = column.TypeName // Getting canonical type name if exists. For instance - PostgreSQL type Integer is alias for int4 // (int4 - canonical type name) diff --git a/internal/db/postgres/entries/table.go b/internal/db/postgres/entries/table.go index 4eef88ad..629fb4a9 100644 --- a/internal/db/postgres/entries/table.go +++ b/internal/db/postgres/entries/table.go @@ -130,6 +130,8 @@ func (t *Table) Entry() (*toc.Entry, error) { } func (t *Table) GetCopyFromStatement() (string, error) { + // We could generate an explicit column list for the COPY statement, but it’s not necessary because, by default, + // generated columns are excluded from the COPY operation. query := fmt.Sprintf("COPY \"%s\".\"%s\" TO STDOUT", t.Schema, t.Name) if t.Query != "" { query = fmt.Sprintf("COPY (%s) TO STDOUT", t.Query) diff --git a/internal/db/postgres/restorers/table_insert_format.go b/internal/db/postgres/restorers/table_insert_format.go index 5700493e..449f9973 100644 --- a/internal/db/postgres/restorers/table_insert_format.go +++ b/internal/db/postgres/restorers/table_insert_format.go @@ -23,19 +23,22 @@ import ( "slices" "strings" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" + "github.com/rs/zerolog/log" + "github.com/greenmaskio/greenmask/internal/db/postgres/pgcopy" "github.com/greenmaskio/greenmask/internal/db/postgres/toc" "github.com/greenmaskio/greenmask/internal/domains" "github.com/greenmaskio/greenmask/internal/storages" "github.com/greenmaskio/greenmask/internal/utils/ioutils" "github.com/greenmaskio/greenmask/internal/utils/reader" - "github.com/jackc/pgx/v5" - "github.com/jackc/pgx/v5/pgconn" - "github.com/rs/zerolog/log" + "github.com/greenmaskio/greenmask/pkg/toolkit" ) type TableRestorerInsertFormat struct { Entry *toc.Entry + Table *toolkit.Table St storages.Storager doNothing bool exitOnError bool @@ -46,7 +49,7 @@ type TableRestorerInsertFormat struct { } func NewTableRestorerInsertFormat( - entry *toc.Entry, st storages.Storager, exitOnError bool, + entry *toc.Entry, t *toolkit.Table, st storages.Storager, exitOnError bool, doNothing bool, exclusions *domains.DataRestorationErrorExclusions, usePgzip bool, ) *TableRestorerInsertFormat { @@ -75,6 +78,7 @@ func NewTableRestorerInsertFormat( } return &TableRestorerInsertFormat{ + Table: t, Entry: entry, St: st, exitOnError: exitOnError, @@ -166,9 +170,13 @@ func (td *TableRestorerInsertFormat) streamInsertData(ctx context.Context, conn return nil } -func (td *TableRestorerInsertFormat) generateInsertStmt(row *pgcopy.Row, onConflictDoNothing bool) string { +func (td *TableRestorerInsertFormat) generateInsertStmt(onConflictDoNothing bool) string { var placeholders []string - for i := 0; i < row.Length(); i++ { + var columnNames []string + columns := getRealColumns(td.Table.Columns) + for i := 0; i < len(columns); i++ { + column := fmt.Sprintf(`"%s"`, columns[i].Name) + columnNames = append(columnNames, column) placeholders = append(placeholders, fmt.Sprintf("$%d", i+1)) } var onConflict string @@ -177,9 +185,10 @@ func (td *TableRestorerInsertFormat) generateInsertStmt(row *pgcopy.Row, onConfl } res := fmt.Sprintf( - `INSERT INTO %s.%s VALUES (%s)%s`, + `INSERT INTO %s.%s (%s) OVERRIDING SYSTEM VALUE VALUES (%s)%s`, *td.Entry.Namespace, *td.Entry.Tag, + strings.Join(columnNames, ", "), strings.Join(placeholders, ", "), onConflict, ) @@ -190,7 +199,7 @@ func (td *TableRestorerInsertFormat) insertDataOnConflictDoNothing( ctx context.Context, conn *pgx.Conn, row *pgcopy.Row, ) error { if td.query == "" { - td.query = td.generateInsertStmt(row, td.doNothing) + td.query = td.generateInsertStmt(td.doNothing) } // TODO: The implementation based on pgx.Conn.Exec is not efficient for bulk inserts. @@ -257,3 +266,15 @@ func isTerminationSeq(data []byte) bool { } return false } + +// GetRealColumns - returns only real columns (not generated) +func getRealColumns(columns []*toolkit.Column) []*toolkit.Column { + res := make([]*toolkit.Column, 0, len(columns)) + for _, col := range columns { + if col.IsGenerated { + continue + } + res = append(res, col) + } + return res +} diff --git a/internal/db/postgres/storage/metadata_json.go b/internal/db/postgres/storage/metadata_json.go index 15fc49bc..a49093f2 100644 --- a/internal/db/postgres/storage/metadata_json.go +++ b/internal/db/postgres/storage/metadata_json.go @@ -20,10 +20,9 @@ import ( "github.com/rs/zerolog/log" - "github.com/greenmaskio/greenmask/pkg/toolkit" - "github.com/greenmaskio/greenmask/internal/db/postgres/toc" "github.com/greenmaskio/greenmask/internal/domains" + "github.com/greenmaskio/greenmask/pkg/toolkit" ) type ObjectSizeStat struct { @@ -72,6 +71,8 @@ type Metadata struct { DependenciesGraph map[int32][]int32 `yaml:"dependencies_graph" json:"dependencies_graph"` DumpIdsOrder []int32 `yaml:"dump_ids_order" json:"dump_ids_order"` Cycles [][]string `yaml:"cycles" json:"cycles"` + TableOidToDumpId map[toolkit.Oid]int32 `yaml:"table_dump_id" json:"table_dump_id"` + DumpIdsToTableOid map[int32]toolkit.Oid `yaml:"dump_id_table" json:"dump_id_table"` } func NewMetadata( @@ -79,7 +80,7 @@ func NewMetadata( completedAt time.Time, transformers []*domains.Table, stats map[int32]ObjectSizeStat, databaseSchema []*toolkit.Table, dependenciesGraph map[int32][]int32, dumpIdsOrder []int32, - cycles [][]string, + cycles [][]string, tableOidToDumpId map[toolkit.Oid]int32, ) (*Metadata, error) { var format string @@ -160,6 +161,10 @@ func NewMetadata( totalOriginalSize += tocFileSize totalCompressedSize += tocFileSize + var dumpIdsToTableOid = make(map[int32]toolkit.Oid) + for oid, dumpId := range tableOidToDumpId { + dumpIdsToTableOid[dumpId] = oid + } return &Metadata{ OriginalSize: totalOriginalSize, @@ -184,6 +189,8 @@ func NewMetadata( TocFileSize: tocFileSize, Compression: tocObj.Header.CompressionSpec.Level, }, - Entries: entriesDto, + Entries: entriesDto, + TableOidToDumpId: tableOidToDumpId, + DumpIdsToTableOid: dumpIdsToTableOid, }, nil } From 268b8c8081f3e4794f244add6b7ebd129be1246e Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Wed, 28 Aug 2024 10:15:36 +0300 Subject: [PATCH 2/3] fix: Added integration test for generated columns Closes #183 --- docker/integration/filldb/.dockerignore | 1 + docker/integration/filldb/Dockerfile | 2 +- docker/integration/filldb/filldb.sh | 5 ++ docker/integration/filldb/generated.sql | 9 ++++ .../greenmask/backward_compatibility_test.go | 54 +++++++++++++------ 5 files changed, 55 insertions(+), 16 deletions(-) create mode 100644 docker/integration/filldb/.dockerignore create mode 100644 docker/integration/filldb/generated.sql diff --git a/docker/integration/filldb/.dockerignore b/docker/integration/filldb/.dockerignore new file mode 100644 index 00000000..94143827 --- /dev/null +++ b/docker/integration/filldb/.dockerignore @@ -0,0 +1 @@ +Dockerfile diff --git a/docker/integration/filldb/Dockerfile b/docker/integration/filldb/Dockerfile index 47110c7a..682c3735 100644 --- a/docker/integration/filldb/Dockerfile +++ b/docker/integration/filldb/Dockerfile @@ -8,7 +8,7 @@ ENV TMP_DIR=/tmp/schema RUN apt-get update && apt-get install -y wget && mkdir /tmp/schema -COPY filldb.sh /filldb.sh +COPY . / RUN chmod +x ./filldb.sh diff --git a/docker/integration/filldb/filldb.sh b/docker/integration/filldb/filldb.sh index 6b2b9ccf..ab2f1666 100644 --- a/docker/integration/filldb/filldb.sh +++ b/docker/integration/filldb/filldb.sh @@ -15,13 +15,18 @@ cd $TMP_DIR if [ ! -f $FILE_DUMP ]; then + echo "Downloading dump file" wget https://edu.postgrespro.com/$FILE_DUMP fi IFS="," read -ra PG_VERSIONS_CHECK <<< "${PG_VERSIONS_CHECK}" for pgver in ${PG_VERSIONS_CHECK[@]}; do + echo "Restoring database for PostgreSQL $pgver" if psql -p 5432 -h db-$pgver -U postgres -c 'CREATE DATABASE demo;'; then psql -p 5432 -h db-$pgver -U postgres -c 'DROP DATABASE demo_restore;' psql -p 5432 -h db-$pgver -U postgres -c 'CREATE DATABASE demo_restore;' gzip -dc $FILE_DUMP | psql -p 5432 -h db-$pgver -U postgres -d demo + if [ $pgver -ne '11' ]; then + psql -p 5432 -h db-$pgver -U postgres -d demo -f /generated.sql + fi fi done diff --git a/docker/integration/filldb/generated.sql b/docker/integration/filldb/generated.sql new file mode 100644 index 00000000..3815ea39 --- /dev/null +++ b/docker/integration/filldb/generated.sql @@ -0,0 +1,9 @@ +CREATE TABLE public.people +( + id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + generated text GENERATED ALWAYS AS (id || first_name) STORED, + first_name text +); + +INSERT INTO public.people("first_name") +VALUES ('bob'); diff --git a/tests/integration/greenmask/backward_compatibility_test.go b/tests/integration/greenmask/backward_compatibility_test.go index f37c5164..88fd418a 100644 --- a/tests/integration/greenmask/backward_compatibility_test.go +++ b/tests/integration/greenmask/backward_compatibility_test.go @@ -51,6 +51,16 @@ dump: schema: public transformation: +{{ if ge .version 120000 }} + - schema: public + name: "people" + transformers: + - name: "Masking" + params: + column: "first_name" + type: "name" +{{ end }} + - schema: "bookings" name: "flights" transformers: @@ -76,6 +86,7 @@ type BackwardCompatibilitySuite struct { configFilePath string conn *pgx.Conn restorationDbName string + pgVersionNum int } func (suite *BackwardCompatibilitySuite) SetupSuite() { @@ -100,20 +111,6 @@ func (suite *BackwardCompatibilitySuite) SetupSuite() { err = os.Mkdir(suite.runtimeTmpDir, 0700) suite.Require().NoError(err, "error creating tmp dir") - suite.configFilePath = path.Join(suite.tmpDir, "config.yaml") - confFile, err := os.Create(suite.configFilePath) - suite.Require().NoError(err, "error creating config.yaml file") - defer confFile.Close() - err = configStr.Execute( - confFile, - map[string]string{ - "pgBinPath": pgBinPath, - "tmpDir": suite.tmpDir, - "uri": uri, - "storageDir": suite.storageDir, - }) - suite.Require().NoError(err, "error encoding config into yaml") - suite.conn, err = pgx.Connect(context.Background(), uri) suite.Require().NoError(err, "error connecting to db") @@ -128,6 +125,33 @@ func (suite *BackwardCompatibilitySuite) SetupSuite() { defer restoreDbConn.Close(context.Background()) _, err = restoreDbConn.Exec(context.Background(), "drop schema public;") suite.Require().NoError(err, "error creating database") + + getVersionQuery := ` + select + setting::INT + from pg_settings + where name = 'server_version_num' + ` + + row := suite.conn.QueryRow(context.Background(), getVersionQuery) + err = row.Scan(&suite.pgVersionNum) + suite.Require().NoError(err, "error getting pg version") + + suite.configFilePath = path.Join(suite.tmpDir, "config.yaml") + confFile, err := os.Create(suite.configFilePath) + suite.Require().NoError(err, "error creating config.yaml file") + defer confFile.Close() + err = configStr.Execute( + confFile, + map[string]any{ + "pgBinPath": pgBinPath, + "tmpDir": suite.tmpDir, + "uri": uri, + "storageDir": suite.storageDir, + "version": suite.pgVersionNum, + }) + suite.Require().NoError(err, "error encoding config into yaml") + } func (suite *BackwardCompatibilitySuite) TestGreenmaskCompatibility() { @@ -171,7 +195,7 @@ func (suite *BackwardCompatibilitySuite) TestGreenmaskCompatibility() { } }) - suite.Run("testing pg_restore to the db", func() { + suite.Run("testing pg_restore restoration", func() { entry, err := os.ReadDir(suite.storageDir) suite.Require().NoError(err, "error reading storage directory") From 65f9684623b9189569bf39a3760223b935f13c22 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Wed, 28 Aug 2024 12:09:07 +0300 Subject: [PATCH 3/3] feat: Added `overriding-system-value` * introduced a new option for `restore` command `--overriding-system-value` * if `--inserts` option is provided generates `INSERT` statements with ` OVERRIDING SYSTEM VALUE VALUES` * Added doc description for `--overriding-system-value` Closes #183 --- cmd/greenmask/cmd/restore/restore.go | 6 ++- docs/commands/restore.md | 36 +++++++++++---- internal/db/postgres/cmd/restore.go | 2 +- internal/db/postgres/pgrestore/pgrestore.go | 2 + .../postgres/restorers/table_insert_format.go | 46 +++++++++++-------- 5 files changed, 63 insertions(+), 29 deletions(-) diff --git a/cmd/greenmask/cmd/restore/restore.go b/cmd/greenmask/cmd/restore/restore.go index 302b2916..a381094d 100644 --- a/cmd/greenmask/cmd/restore/restore.go +++ b/cmd/greenmask/cmd/restore/restore.go @@ -175,6 +175,10 @@ func init() { "batch-size", "", 0, "the number of rows to insert in a single batch during the COPY command (0 - all rows will be inserted in a single batch)", ) + Cmd.Flags().BoolP( + "overriding-system-value", "", false, + "use OVERRIDING SYSTEM VALUE clause for INSERTs", + ) // Connection options: Cmd.Flags().StringP("host", "h", "/var/run/postgres", "database server host or socket directory") @@ -189,7 +193,7 @@ func init() { "disable-triggers", "enable-row-security", "if-exists", "no-comments", "no-data-for-failed-tables", "no-security-labels", "no-subscriptions", "no-table-access-method", "no-tablespaces", "section", "strict-names", "use-set-session-authorization", "inserts", "on-conflict-do-nothing", "restore-in-order", - "pgzip", "batch-size", + "pgzip", "batch-size", "overriding-system-value", "host", "port", "username", } { diff --git a/docs/commands/restore.md b/docs/commands/restore.md index 3103da91..04fb7dea 100644 --- a/docs/commands/restore.md +++ b/docs/commands/restore.md @@ -45,6 +45,7 @@ Mostly it supports the same flags as the `pg_restore` utility, with some extra f --no-table-access-method do not restore table access methods --no-tablespaces do not restore tablespace assignments --on-conflict-do-nothing add ON CONFLICT DO NOTHING to INSERT commands + --overriding-system-value use OVERRIDING SYSTEM VALUE clause for INSERTs --pgzip use pgzip decompression instead of gzip -p, --port int database server port number (default 5432) --restore-in-order restore tables in topological order, ensuring that dependent tables are not restored until the tables they depend on have been restored @@ -70,28 +71,47 @@ Mostly it supports the same flags as the `pg_restore` utility, with some extra f Insert commands are a lot slower than `COPY` commands. Use this feature only when necessary. -By default, Greenmask restores data using the `COPY` command. If you prefer to restore data using `INSERT` commands, you can +By default, Greenmask restores data using the `COPY` command. If you prefer to restore data using `INSERT` commands, you +can use the `--inserts` flag. This flag allows you to manage errors that occur during the execution of INSERT commands. By -configuring an error and constraint [exclusion list in the config](../configuration.md#restoration-error-exclusion), +configuring an error and constraint [exclusion list in the config](../configuration.md#restoration-error-exclusion), you can skip certain errors and continue inserting subsequent rows from the dump. This can be useful when adding new records to an existing dump, but you don't want the process to stop if some records already exist in the database or violate certain constraints. -By adding the `--on-conflict-do-nothing` flag, it generates `INSERT` statements with the ON `CONFLICT DO NOTHING` -clause, similar to the original pg_dump option. However, this approach only works for unique or exclusion constraints. +By adding the `--on-conflict-do-nothing` flag, it generates `INSERT` statements with the ON `CONFLICT DO NOTHING` +clause, similar to the original pg_dump option. However, this approach only works for unique or exclusion constraints. If a foreign key is missing in the referenced table or any other constraint is violated, the insertion will still fail. To handle these issues, you can define an[exclusion list in the config](../configuration.md#restoration-error-exclusion). +```shell title="example with inserts and error handling" + ```shell title="example with inserts and on conflict do nothing" greenmask --config=config.yml restore DUMP_ID --inserts --on-conflict-do-nothing ``` +By adding the `--overriding-system-value` flag, it generates `INSERT` statements with the `OVERRIDING SYSTEM VALUE` +clause, which allows you to insert data into identity columns. + +```postgresql title="example of GENERATED ALWAYS AS IDENTITY column" +CREATE TABLE people ( + id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + generated text GENERATED ALWAYS AS (id || first_name) STORED, + first_name text +); +``` + +```shell title="example with inserts" +greenmask --config=config.yml restore DUMP_ID --inserts --overriding-system-value +``` + ### Restoration in topological order By default, Greenmask restores tables in the order they are listed in the dump file. To restore tables in topological -order, use the `--restore-in-order` flag. This is particularly useful when your schema includes foreign key references and +order, use the `--restore-in-order` flag. This is particularly useful when your schema includes foreign key references +and you need to insert data in the correct order. Without this flag, you may encounter errors when inserting data into tables with foreign key constraints. @@ -101,7 +121,6 @@ tables with foreign key constraints. tables with cyclic dependencies is to temporarily remove the foreign key constraint (to break the cycle), restore the data, and then re-add the foreign key constraint once the data restoration is complete. - If your database has cyclic dependencies you will be notified about it but the restoration will continue. ```text @@ -123,8 +142,9 @@ greenmask --config=config.yml restore latest --pgzip The COPY command returns the error only on transaction commit. This means that if you have a large dump and an error occurs, you will have to wait until the end of the transaction to see the error message. To avoid this, you can use the -`--batch-size` flag to specify the number of rows to insert in a single batch during the COPY command. If an error occurs -during the batch insertion, the error message will be displayed immediately. The data will be committed **only if all +`--batch-size` flag to specify the number of rows to insert in a single batch during the COPY command. If an error +occurs +during the batch insertion, the error message will be displayed immediately. The data will be committed **only if all batches are inserted successfully**. !!! warning diff --git a/internal/db/postgres/cmd/restore.go b/internal/db/postgres/cmd/restore.go index a9bb8d2f..31153d63 100644 --- a/internal/db/postgres/cmd/restore.go +++ b/internal/db/postgres/cmd/restore.go @@ -651,7 +651,7 @@ func (r *Restore) taskPusher(ctx context.Context, tasks chan restorers.RestoreTa } task = restorers.NewTableRestorerInsertFormat( entry, t, r.st, r.restoreOpt.ExitOnError, r.restoreOpt.OnConflictDoNothing, - r.cfg.ErrorExclusions, r.restoreOpt.Pgzip, + r.cfg.ErrorExclusions, r.restoreOpt.Pgzip, r.restoreOpt.OverridingSystemValue, ) } else { task = restorers.NewTableRestorer( diff --git a/internal/db/postgres/pgrestore/pgrestore.go b/internal/db/postgres/pgrestore/pgrestore.go index 4396054d..dde641fe 100644 --- a/internal/db/postgres/pgrestore/pgrestore.go +++ b/internal/db/postgres/pgrestore/pgrestore.go @@ -96,6 +96,8 @@ type Options struct { OnConflictDoNothing bool `mapstructure:"on-conflict-do-nothing"` Inserts bool `mapstructure:"inserts"` RestoreInOrder bool `mapstructure:"restore-in-order"` + // OverridingSystemValue is a custom option that allows to use OVERRIDING SYSTEM VALUE for INSERTs + OverridingSystemValue bool `mapstructure:"overriding-system-value"` // Use pgzip decompression instead of gzip Pgzip bool `mapstructure:"pgzip"` BatchSize int64 `mapstructure:"batch-size"` diff --git a/internal/db/postgres/restorers/table_insert_format.go b/internal/db/postgres/restorers/table_insert_format.go index 449f9973..06fe09ff 100644 --- a/internal/db/postgres/restorers/table_insert_format.go +++ b/internal/db/postgres/restorers/table_insert_format.go @@ -37,21 +37,22 @@ import ( ) type TableRestorerInsertFormat struct { - Entry *toc.Entry - Table *toolkit.Table - St storages.Storager - doNothing bool - exitOnError bool - query string - globalExclusions *domains.GlobalDataRestorationErrorExclusions - tableExclusion *domains.TablesDataRestorationErrorExclusions - usePgzip bool + Entry *toc.Entry + Table *toolkit.Table + St storages.Storager + doNothing bool + exitOnError bool + query string + globalExclusions *domains.GlobalDataRestorationErrorExclusions + tableExclusion *domains.TablesDataRestorationErrorExclusions + usePgzip bool + overridingSystemValue bool } func NewTableRestorerInsertFormat( entry *toc.Entry, t *toolkit.Table, st storages.Storager, exitOnError bool, doNothing bool, exclusions *domains.DataRestorationErrorExclusions, - usePgzip bool, + usePgzip bool, overridingSystemValue bool, ) *TableRestorerInsertFormat { var ( @@ -78,14 +79,15 @@ func NewTableRestorerInsertFormat( } return &TableRestorerInsertFormat{ - Table: t, - Entry: entry, - St: st, - exitOnError: exitOnError, - doNothing: doNothing, - globalExclusions: globalExclusion, - tableExclusion: tableExclusion, - usePgzip: usePgzip, + Table: t, + Entry: entry, + St: st, + exitOnError: exitOnError, + doNothing: doNothing, + globalExclusions: globalExclusion, + tableExclusion: tableExclusion, + usePgzip: usePgzip, + overridingSystemValue: overridingSystemValue, } } @@ -184,11 +186,17 @@ func (td *TableRestorerInsertFormat) generateInsertStmt(onConflictDoNothing bool onConflict = " ON CONFLICT DO NOTHING" } + overridingSystemValue := "" + if td.overridingSystemValue { + overridingSystemValue = "OVERRIDING SYSTEM VALUE " + } + res := fmt.Sprintf( - `INSERT INTO %s.%s (%s) OVERRIDING SYSTEM VALUE VALUES (%s)%s`, + `INSERT INTO %s.%s (%s) %sVALUES(%s)%s`, *td.Entry.Namespace, *td.Entry.Tag, strings.Join(columnNames, ", "), + overridingSystemValue, strings.Join(placeholders, ", "), onConflict, )