From 26816341054edc33775cd5faa7b9477228d860b6 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 8 Apr 2024 15:56:00 +0200 Subject: [PATCH 1/2] Unify check attempt data type to uint32 already used somewhere A float isn't necessary as in Icinga 2 Checkable#max_check_attempts and check_attempt are ints. But uint8 isn't enough for e.g. 1 check/s to get HARD after 5m (300s > 255). --- cmd/icingadb-migrate/convert.go | 8 ++++---- pkg/icingadb/db.go | 4 ++-- pkg/icingadb/v1/checkable.go | 2 +- pkg/icingadb/v1/history/state.go | 2 +- pkg/icingadb/v1/state.go | 2 +- schema/mysql/schema.sql | 8 ++++---- schema/mysql/upgrades/1.1.2.sql | 9 +++++++++ schema/mysql/upgrades/optional/1.1.2-history.sql | 1 + schema/pgsql/schema.sql | 8 ++++---- schema/pgsql/upgrades/1.1.2.sql | 9 +++++++++ schema/pgsql/upgrades/optional/1.1.2-history.sql | 3 +++ 11 files changed, 39 insertions(+), 17 deletions(-) create mode 100644 schema/mysql/upgrades/optional/1.1.2-history.sql create mode 100644 schema/pgsql/upgrades/optional/1.1.2-history.sql diff --git a/cmd/icingadb-migrate/convert.go b/cmd/icingadb-migrate/convert.go index 64a6b3498..5cfa7bdcb 100644 --- a/cmd/icingadb-migrate/convert.go +++ b/cmd/icingadb-migrate/convert.go @@ -724,8 +724,8 @@ type stateRow = struct { StateTimeUsec uint32 State uint8 StateType uint8 - CurrentCheckAttempt uint16 - MaxCheckAttempts uint16 + CurrentCheckAttempt uint32 + MaxCheckAttempts uint32 LastState uint8 LastHardState uint8 Output sql.NullString @@ -798,10 +798,10 @@ func convertStateRows( HardState: row.LastHardState, PreviousSoftState: row.LastState, PreviousHardState: previousHardState, - CheckAttempt: uint8(row.CurrentCheckAttempt), + CheckAttempt: row.CurrentCheckAttempt, Output: icingadbTypes.String{NullString: row.Output}, LongOutput: icingadbTypes.String{NullString: row.LongOutput}, - MaxCheckAttempts: uint32(row.MaxCheckAttempts), + MaxCheckAttempts: row.MaxCheckAttempts, CheckSource: icingadbTypes.String{NullString: row.CheckSource}, }) diff --git a/pkg/icingadb/db.go b/pkg/icingadb/db.go index 1bff6a6c4..9bb480ba2 100644 --- a/pkg/icingadb/db.go +++ b/pkg/icingadb/db.go @@ -93,8 +93,8 @@ func NewDb(db *sqlx.DB, logger *logging.Logger, options *Options) *DB { } const ( - expectedMysqlSchemaVersion = 4 - expectedPostgresSchemaVersion = 2 + expectedMysqlSchemaVersion = 5 + expectedPostgresSchemaVersion = 3 ) // CheckSchema asserts the database schema of the expected version being present. diff --git a/pkg/icingadb/v1/checkable.go b/pkg/icingadb/v1/checkable.go index dbb114cbc..4b1efeb9c 100644 --- a/pkg/icingadb/v1/checkable.go +++ b/pkg/icingadb/v1/checkable.go @@ -30,7 +30,7 @@ type Checkable struct { IconImageAlt string `json:"icon_image_alt"` IconImageId types.Binary `json:"icon_image_id"` IsVolatile types.Bool `json:"is_volatile"` - MaxCheckAttempts float64 `json:"max_check_attempts"` + MaxCheckAttempts uint32 `json:"max_check_attempts"` Notes string `json:"notes"` NotesUrlId types.Binary `json:"notes_url_id"` NotificationsEnabled types.Bool `json:"notifications_enabled"` diff --git a/pkg/icingadb/v1/history/state.go b/pkg/icingadb/v1/history/state.go index dec13b042..6320b738a 100644 --- a/pkg/icingadb/v1/history/state.go +++ b/pkg/icingadb/v1/history/state.go @@ -14,7 +14,7 @@ type StateHistory struct { HardState uint8 `json:"hard_state"` PreviousSoftState uint8 `json:"previous_soft_state"` PreviousHardState uint8 `json:"previous_hard_state"` - CheckAttempt uint8 `json:"check_attempt"` + CheckAttempt uint32 `json:"check_attempt"` Output types.String `json:"output"` LongOutput types.String `json:"long_output"` MaxCheckAttempts uint32 `json:"max_check_attempts"` diff --git a/pkg/icingadb/v1/state.go b/pkg/icingadb/v1/state.go index bad8f28c5..983b14d5a 100644 --- a/pkg/icingadb/v1/state.go +++ b/pkg/icingadb/v1/state.go @@ -9,7 +9,7 @@ type State struct { EnvironmentMeta `json:",inline"` AcknowledgementCommentId types.Binary `json:"acknowledgement_comment_id"` LastCommentId types.Binary `json:"last_comment_id"` - CheckAttempt uint8 `json:"check_attempt"` + CheckAttempt uint32 `json:"check_attempt"` CheckCommandline types.String `json:"check_commandline"` CheckSource types.String `json:"check_source"` SchedulingSource types.String `json:"scheduling_source"` diff --git a/schema/mysql/schema.sql b/schema/mysql/schema.sql index 2cc10cc81..745a5e620 100644 --- a/schema/mysql/schema.sql +++ b/schema/mysql/schema.sql @@ -292,7 +292,7 @@ CREATE TABLE host_state ( hard_state tinyint unsigned NOT NULL, previous_soft_state tinyint unsigned NOT NULL, previous_hard_state tinyint unsigned NOT NULL, - check_attempt tinyint unsigned NOT NULL, + check_attempt int unsigned NOT NULL, severity smallint unsigned NOT NULL, output longtext DEFAULT NULL, @@ -460,7 +460,7 @@ CREATE TABLE service_state ( hard_state tinyint unsigned NOT NULL, previous_soft_state tinyint unsigned NOT NULL, previous_hard_state tinyint unsigned NOT NULL, - check_attempt tinyint unsigned NOT NULL, + check_attempt int unsigned NOT NULL, severity smallint unsigned NOT NULL, output longtext DEFAULT NULL, @@ -1147,7 +1147,7 @@ CREATE TABLE state_history ( hard_state tinyint unsigned NOT NULL, previous_soft_state tinyint unsigned NOT NULL, previous_hard_state tinyint unsigned NOT NULL, - check_attempt tinyint unsigned NOT NULL, + check_attempt int unsigned NOT NULL, -- may be a tinyint unsigned, see https://icinga.com/docs/icinga-db/latest/doc/04-Upgrading/#upgrading-to-icinga-db-v112 output longtext DEFAULT NULL, long_output longtext DEFAULT NULL, max_check_attempts int unsigned NOT NULL, @@ -1343,4 +1343,4 @@ CREATE TABLE icingadb_schema ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin ROW_FORMAT=DYNAMIC; INSERT INTO icingadb_schema (version, timestamp) - VALUES (4, UNIX_TIMESTAMP() * 1000); + VALUES (5, UNIX_TIMESTAMP() * 1000); diff --git a/schema/mysql/upgrades/1.1.2.sql b/schema/mysql/upgrades/1.1.2.sql index 80f4af249..74cbcb15d 100644 --- a/schema/mysql/upgrades/1.1.2.sql +++ b/schema/mysql/upgrades/1.1.2.sql @@ -2,3 +2,12 @@ UPDATE icingadb_schema SET timestamp = UNIX_TIMESTAMP(timestamp / 1000) * 1000 W ALTER TABLE history ADD INDEX idx_history_event_time_event_type (event_time, event_type) COMMENT 'History filtered/ordered by event_time/event_type'; ALTER TABLE history DROP INDEX idx_history_event_time; + +ALTER TABLE host_state MODIFY COLUMN check_attempt int unsigned NOT NULL; + +ALTER TABLE service_state MODIFY COLUMN check_attempt int unsigned NOT NULL; + +ALTER TABLE state_history MODIFY COLUMN check_attempt tinyint unsigned NOT NULL COMMENT 'optional schema upgrade not applied yet, see https://icinga.com/docs/icinga-db/latest/doc/04-Upgrading/#upgrading-to-icinga-db-v112'; + +INSERT INTO icingadb_schema (version, timestamp) + VALUES (5, UNIX_TIMESTAMP() * 1000); diff --git a/schema/mysql/upgrades/optional/1.1.2-history.sql b/schema/mysql/upgrades/optional/1.1.2-history.sql new file mode 100644 index 000000000..4081fcb21 --- /dev/null +++ b/schema/mysql/upgrades/optional/1.1.2-history.sql @@ -0,0 +1 @@ +ALTER TABLE state_history MODIFY COLUMN check_attempt int unsigned NOT NULL; diff --git a/schema/pgsql/schema.sql b/schema/pgsql/schema.sql index ea87325f5..708c9143c 100644 --- a/schema/pgsql/schema.sql +++ b/schema/pgsql/schema.sql @@ -405,7 +405,7 @@ CREATE TABLE host_state ( hard_state tinyuint NOT NULL, previous_soft_state tinyuint NOT NULL, previous_hard_state tinyuint NOT NULL, - check_attempt tinyuint NOT NULL, + check_attempt uint NOT NULL, severity smalluint NOT NULL, output text DEFAULT NULL, @@ -675,7 +675,7 @@ CREATE TABLE service_state ( hard_state tinyuint NOT NULL, previous_soft_state tinyuint NOT NULL, previous_hard_state tinyuint NOT NULL, - check_attempt tinyuint NOT NULL, + check_attempt uint NOT NULL, severity smalluint NOT NULL, output text DEFAULT NULL, @@ -1846,7 +1846,7 @@ CREATE TABLE state_history ( hard_state tinyuint NOT NULL, previous_soft_state tinyuint NOT NULL, previous_hard_state tinyuint NOT NULL, - check_attempt tinyuint NOT NULL, + check_attempt uint NOT NULL, -- may be a tinyuint, see https://icinga.com/docs/icinga-db/latest/doc/04-Upgrading/#upgrading-to-icinga-db-v112 output text DEFAULT NULL, long_output text DEFAULT NULL, max_check_attempts uint NOT NULL, @@ -2181,4 +2181,4 @@ CREATE TABLE icingadb_schema ( ALTER SEQUENCE icingadb_schema_id_seq OWNED BY icingadb_schema.id; INSERT INTO icingadb_schema (version, timestamp) - VALUES (2, extract(epoch from now()) * 1000); + VALUES (3, extract(epoch from now()) * 1000); diff --git a/schema/pgsql/upgrades/1.1.2.sql b/schema/pgsql/upgrades/1.1.2.sql index 701fea00e..2203ffb80 100644 --- a/schema/pgsql/upgrades/1.1.2.sql +++ b/schema/pgsql/upgrades/1.1.2.sql @@ -142,3 +142,12 @@ CREATE INDEX CONCURRENTLY idx_history_event_time_event_type ON history(event_tim COMMENT ON INDEX idx_history_event_time_event_type IS 'History filtered/ordered by event_time/event_type'; DROP INDEX idx_history_event_time; + +ALTER TABLE host_state ALTER COLUMN check_attempt TYPE uint; + +ALTER TABLE service_state ALTER COLUMN check_attempt TYPE uint; + +COMMENT ON COLUMN state_history.check_attempt IS 'optional schema upgrade not applied yet, see https://icinga.com/docs/icinga-db/latest/doc/04-Upgrading/#upgrading-to-icinga-db-v112'; + +INSERT INTO icingadb_schema (version, timestamp) + VALUES (3, extract(epoch from now()) * 1000); diff --git a/schema/pgsql/upgrades/optional/1.1.2-history.sql b/schema/pgsql/upgrades/optional/1.1.2-history.sql new file mode 100644 index 000000000..ea95765a0 --- /dev/null +++ b/schema/pgsql/upgrades/optional/1.1.2-history.sql @@ -0,0 +1,3 @@ +ALTER TABLE state_history ALTER COLUMN check_attempt TYPE uint; + +COMMENT ON COLUMN state_history.check_attempt IS NULL; From ac85b529515c1b4979201e4490b3a14d9828bd3d Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Mon, 8 Apr 2024 13:15:24 +0200 Subject: [PATCH 2/2] Upgrading docs for state_history schema migration Co-authored-by: Alexander A. Klimov --- doc/04-Upgrading.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/04-Upgrading.md b/doc/04-Upgrading.md index 7a1412d26..f5a1bead0 100644 --- a/doc/04-Upgrading.md +++ b/doc/04-Upgrading.md @@ -9,12 +9,40 @@ Please apply the `1.1.2.sql` upgrade script to your database. For package instal `/usr/share/icingadb/schema/mysql/upgrades/` or `/usr/share/icingadb/schema/pgsql/upgrades/`, depending on your database vendor. +As the daemon checks the schema version, the recommended way to perform the upgrade is to stop the daemon, apply the +schema upgrade and then start the new daemon version. If you want to minimize downtime as much as possible, it is safe +to apply this schema upgrade while the Icinga DB v1.1.1 daemon is still running and then restart the daemon with the +new version. Please keep in mind that depending on the distribution, your package manager may automatically attempt to +restart the daemon when upgrading the package. + !!! warning With MySQL and MariaDB, a locking issue can occur if the schema upgrade is applied while the history view is accessed in Icinga DB Web. This can result in the upgrade being delayed unnecessarily and blocking other queries. Please see [unblock history tables](#unblock-history-tables) for how to detect and resolve this situation. +### Upgrading the state_history Table + +This release includes fixes for hosts and services reaching check attempt 256. However, on existing installations, +the schema upgrade required to fix the history tables isn't automatically applied by `1.1.2.sql` as a rewrite of the +whole `state_history` table is required. This can take a lot of time depending on the history size and the performance +of the database. During this time that table will be locked exclusively and can't be accessed otherwise. This means that +the existing history can't be viewed in Icinga Web and new history entries will be buffered in Redis. + +There is a separate upgrade script `optional/1.1.2-history.sql` to perform the rewrite of the `state_history` table. +This allows you to postpone part of the upgrade to a longer maintenance window in the future, or skip it entirely +if you deem this safe for your installation. + +!!! warning + + Until `optional/1.1.2-history.sql` is applied, you'll have to lower `max_check_attempts` to 255 or less, otherwise + Icinga DB will crash with a database error if hosts/services reach check attempt 256. If you need to lower + `max_check_attempts` but want to keep the same timespan from an outage to a hard state, you can raise + `retry_interval` instead so that `max_check_attempts * retry_interval` stays the same. + +If you apply it, be sure that `1.1.2.sql` was already applied before. Do not interrupt it! At best use tmux/screen not +to lose your SSH session. + ### Unblock History Tables !!! info