From a64507c0d0013987a3e5c24eff028f17bff1fd75 Mon Sep 17 00:00:00 2001 From: Matt Spilchen Date: Mon, 1 Apr 2024 11:09:36 -0300 Subject: [PATCH] Sync from server repo (99aa32b4094) --- commands/cluster_command_launcher.go | 4 + commands/cmd_add_node.go | 2 +- commands/cmd_add_subcluster.go | 4 +- commands/cmd_remove_subcluster.go | 4 +- commands/cmd_sandbox.go | 6 +- commands/cmd_stop_db.go | 2 +- commands/cmd_stop_subcluster.go | 163 +++++++++++++++++++ commands/cmd_unsandbox.go | 4 +- vclusterops/add_node.go | 2 +- vclusterops/add_subcluster.go | 2 +- vclusterops/cluster_op.go | 1 + vclusterops/create_db.go | 2 +- vclusterops/https_check_db_running_op.go | 36 ++++- vclusterops/https_get_up_nodes_op.go | 23 ++- vclusterops/https_stop_subcluster_op.go | 155 ++++++++++++++++++ vclusterops/https_sync_catalog_op.go | 37 ++++- vclusterops/remove_node.go | 2 +- vclusterops/start_db.go | 2 +- vclusterops/start_node.go | 2 +- vclusterops/stop_db.go | 2 +- vclusterops/stop_subcluster.go | 196 +++++++++++++++++++++++ vclusterops/vcluster_database_options.go | 1 + 22 files changed, 618 insertions(+), 34 deletions(-) create mode 100644 commands/cmd_stop_subcluster.go create mode 100644 vclusterops/https_stop_subcluster_op.go create mode 100644 vclusterops/stop_subcluster.go diff --git a/commands/cluster_command_launcher.go b/commands/cluster_command_launcher.go index c3b563b..35b0b10 100644 --- a/commands/cluster_command_launcher.go +++ b/commands/cluster_command_launcher.go @@ -75,6 +75,8 @@ const ( verboseKey = "verbose" outputFileFlag = "output-file" outputFileKey = "outputFile" + subclusterFlag = "subcluster" + sandboxFlag = "sandbox" ) // flags to viper key map @@ -111,6 +113,7 @@ const ( dropDBSubCmd = "drop_db" addSCSubCmd = "db_add_subcluster" removeSCSubCmd = "db_remove_subcluster" + stopSCSubCmd = "stop_subcluster" addNodeSubCmd = "db_add_node" removeNodeSubCmd = "db_remove_node" restartNodeSubCmd = "restart_node" @@ -403,6 +406,7 @@ func constructCmds() []*cobra.Command { // sc-scope cmds makeCmdAddSubcluster(), makeCmdRemoveSubcluster(), + makeCmdStopSubcluster(), makeCmdSandboxSubcluster(), makeCmdUnsandboxSubcluster(), // node-scope cmds diff --git a/commands/cmd_add_node.go b/commands/cmd_add_node.go index 7f0452f..28cc66b 100644 --- a/commands/cmd_add_node.go +++ b/commands/cmd_add_node.go @@ -109,7 +109,7 @@ func (c *CmdAddNode) setLocalFlags(cmd *cobra.Command) { ) cmd.Flags().StringVar( c.addNodeOptions.SCName, - "subcluster", + subclusterFlag, "", util.GetEonFlagMsg("The Name of subcluster"+ " to which the host(s) must be added. If empty default subcluster is considered"), diff --git a/commands/cmd_add_subcluster.go b/commands/cmd_add_subcluster.go index d7036ef..0436bc6 100644 --- a/commands/cmd_add_subcluster.go +++ b/commands/cmd_add_subcluster.go @@ -77,7 +77,7 @@ Examples: newCmd.setHiddenFlags(cmd) // require name of subcluster to add - markFlagsRequired(cmd, []string{"subcluster"}) + markFlagsRequired(cmd, []string{subclusterFlag}) return cmd } @@ -86,7 +86,7 @@ Examples: func (c *CmdAddSubcluster) setLocalFlags(cmd *cobra.Command) { cmd.Flags().StringVar( c.addSubclusterOptions.SCName, - "subcluster", + subclusterFlag, "", "The name of the new subcluster", ) diff --git a/commands/cmd_remove_subcluster.go b/commands/cmd_remove_subcluster.go index 7c9e9e0..bcbccf1 100644 --- a/commands/cmd_remove_subcluster.go +++ b/commands/cmd_remove_subcluster.go @@ -68,7 +68,7 @@ Examples: newCmd.setLocalFlags(cmd) // require name of subcluster to remove - markFlagsRequired(cmd, []string{"subcluster"}) + markFlagsRequired(cmd, []string{subclusterFlag}) return cmd } @@ -77,7 +77,7 @@ Examples: func (c *CmdRemoveSubcluster) setLocalFlags(cmd *cobra.Command) { cmd.Flags().StringVar( c.removeScOptions.SubclusterToRemove, - "subcluster", + subclusterFlag, "", "Name of subcluster to be removed", ) diff --git a/commands/cmd_sandbox.go b/commands/cmd_sandbox.go index c51bea6..a372276 100644 --- a/commands/cmd_sandbox.go +++ b/commands/cmd_sandbox.go @@ -79,7 +79,7 @@ Examples: newCmd.setLocalFlags(cmd) // require name of subcluster to sandbox as well as the sandbox name - markFlagsRequired(cmd, []string{"subcluster", "sandbox"}) + markFlagsRequired(cmd, []string{subclusterFlag, sandboxFlag}) return cmd } @@ -88,13 +88,13 @@ Examples: func (c *CmdSandboxSubcluster) setLocalFlags(cmd *cobra.Command) { cmd.Flags().StringVar( c.sbOptions.SCName, - "subcluster", + subclusterFlag, "", "The name of the subcluster to be sandboxed", ) cmd.Flags().StringVar( c.sbOptions.SandboxName, - "sandbox", + sandboxFlag, "", "The name of the sandbox", ) diff --git a/commands/cmd_stop_db.go b/commands/cmd_stop_db.go index af6a34a..0194556 100644 --- a/commands/cmd_stop_db.go +++ b/commands/cmd_stop_db.go @@ -80,7 +80,7 @@ func (c *CmdStopDB) setLocalFlags(cmd *cobra.Command) { ) cmd.Flags().StringVar( c.stopDBOptions.Sandbox, - "sandbox", + sandboxFlag, "", "Name of the sandbox to stop", ) diff --git a/commands/cmd_stop_subcluster.go b/commands/cmd_stop_subcluster.go new file mode 100644 index 0000000..f09120c --- /dev/null +++ b/commands/cmd_stop_subcluster.go @@ -0,0 +1,163 @@ +/* + (c) Copyright [2023-2024] Open Text. + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package commands + +import ( + "strconv" + + "github.com/spf13/cobra" + "github.com/spf13/viper" + "github.com/vertica/vcluster/vclusterops" + "github.com/vertica/vcluster/vclusterops/util" + "github.com/vertica/vcluster/vclusterops/vlog" +) + +/* CmdStopSubcluster + * + * Parses arguments to StopSubcluster and calls + * the high-level function for StopSubcluster. + * + * Implements ClusterCommand interface + */ + +type CmdStopSubcluster struct { + CmdBase + stopSCOptions *vclusterops.VStopSubclusterOptions +} + +func makeCmdStopSubcluster() *cobra.Command { + newCmd := &CmdStopSubcluster{} + opt := vclusterops.VStopSubclusterOptionsFactory() + newCmd.stopSCOptions = &opt + + cmd := makeBasicCobraCmd( + newCmd, + stopSCSubCmd, + "Stop a subcluster", + `This subcommand stops a subcluster from an existing Eon Mode database. + +You must provide the subcluster name with the --subcluster option. + +All hosts in the subcluster will be stopped. You cannot stop a sandboxed +subcluster. + +Examples: + # Gracefully stop a subcluster with config file + vcluster stop_subcluster --subcluster sc1 --drain-seconds 10 \ + --config /opt/vertica/config/vertica_cluster.yaml + + # Forcibly stop a subcluster with config file + vcluster stop_subcluster --subcluster sc1 --force \ + --config /opt/vertica/config/vertica_cluster.yaml + + # Gracefully stop a subcluster with user input + vcluster stop_subcluster --db-name test_db --subcluster sc1 \ + --hosts 10.20.30.40,10.20.30.41,10.20.30.42 --drain-seconds 10 + + # Forcibly stop a subcluster with user input + vcluster stop_subcluster --db-name test_db --subcluster sc1 \ + --hosts 10.20.30.40,10.20.30.41,10.20.30.42 --force +`, + []string{dbNameFlag, hostsFlag, ipv6Flag, eonModeFlag, configFlag, passwordFlag}, + ) + + // local flags + newCmd.setLocalFlags(cmd) + + // require name of subcluster to add + markFlagsRequired(cmd, []string{subclusterFlag}) + + // hide eon mode flag since we expect it to come from config file, not from user input + hideLocalFlags(cmd, []string{eonModeFlag}) + + return cmd +} + +// setLocalFlags will set the local flags the command has +func (c *CmdStopSubcluster) setLocalFlags(cmd *cobra.Command) { + cmd.Flags().IntVar( + &c.stopSCOptions.DrainSeconds, + "drain-seconds", + util.DefaultDrainSeconds, + util.GetEonFlagMsg("seconds to wait for user connections to close."+ + " Default value is "+strconv.Itoa(util.DefaultDrainSeconds)+" seconds."+ + " When the time expires, connections will be forcibly closed and the subcluster will shut down."+ + " If the value is 0, VCluster closes all user connections immediately."+ + " If the value is negative, VCluster waits indefinitely until all user sessions disconnect"), + ) + cmd.Flags().StringVar( + &c.stopSCOptions.SCName, + subclusterFlag, + "", + "The name of the target subcluster", + ) + cmd.Flags().BoolVar( + &c.stopSCOptions.Force, + "force", + false, + "Force the subcluster to shutdown immediately even if users are connected", + ) + cmd.MarkFlagsMutuallyExclusive("drain-seconds", "force") +} + +func (c *CmdStopSubcluster) Parse(inputArgv []string, logger vlog.Printer) error { + c.argv = inputArgv + logger.LogArgParse(&c.argv) + + // reset some options that are not included in user input + c.ResetUserInputOptions(&c.stopSCOptions.DatabaseOptions) + + // stop_subcluster only works for an Eon db so we assume the user always runs this subcommand + // on an Eon db. When Eon mode cannot be found in config file, we set its value to true. + if !viper.IsSet(eonModeKey) { + c.stopSCOptions.IsEon = true + } + + return c.validateParse(logger) +} + +func (c *CmdStopSubcluster) validateParse(logger vlog.Printer) error { + logger.Info("Called validateParse()") + err := c.getCertFilesFromCertPaths(&c.stopSCOptions.DatabaseOptions) + if err != nil { + return err + } + + err = c.ValidateParseBaseOptions(&c.stopSCOptions.DatabaseOptions) + if err != nil { + return err + } + return c.setDBPassword(&c.stopSCOptions.DatabaseOptions) +} + +func (c *CmdStopSubcluster) Run(vcc vclusterops.ClusterCommands) error { + vcc.LogInfo("Called method Run()") + + options := c.stopSCOptions + + err := vcc.VStopSubcluster(options) + if err != nil { + vcc.LogError(err, "failed to stop the subcluster", "Subcluster", options.SCName) + return err + } + vcc.PrintInfo("Successfully stopped subcluster %s", options.SCName) + return nil +} + +// SetDatabaseOptions will assign a vclusterops.DatabaseOptions instance to the one in CmdStopSubcluster +func (c *CmdStopSubcluster) SetDatabaseOptions(opt *vclusterops.DatabaseOptions) { + c.stopSCOptions.DatabaseOptions = *opt +} diff --git a/commands/cmd_unsandbox.go b/commands/cmd_unsandbox.go index ebc2fe7..819cbbc 100644 --- a/commands/cmd_unsandbox.go +++ b/commands/cmd_unsandbox.go @@ -83,7 +83,7 @@ Examples: newCmd.setLocalFlags(cmd) // require name of subcluster to unsandbox - markFlagsRequired(cmd, []string{"subcluster"}) + markFlagsRequired(cmd, []string{subclusterFlag}) return cmd } @@ -92,7 +92,7 @@ Examples: func (c *CmdUnsandboxSubcluster) setLocalFlags(cmd *cobra.Command) { cmd.Flags().StringVar( c.usOptions.SCName, - "subcluster", + subclusterFlag, "", "The name of the subcluster to be unsandboxed", ) diff --git a/vclusterops/add_node.go b/vclusterops/add_node.go index e71c186..f9fae00 100644 --- a/vclusterops/add_node.go +++ b/vclusterops/add_node.go @@ -426,7 +426,7 @@ func (vcc VClusterCommands) prepareAdditionalEonInstructions(vdb *VCoordinationD } if vdb.IsEon { - httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(initiatorHost, true, username, options.Password) + httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(initiatorHost, true, username, options.Password, AddNodeSyncCat) if err != nil { return instructions, err } diff --git a/vclusterops/add_subcluster.go b/vclusterops/add_subcluster.go index 932cddf..7040a21 100644 --- a/vclusterops/add_subcluster.go +++ b/vclusterops/add_subcluster.go @@ -252,7 +252,7 @@ func (vcc *VClusterCommands) produceAddSubclusterInstructions(options *VAddSubcl username := *options.UserName httpsGetUpNodesOp, err := makeHTTPSGetUpNodesOp(*options.DBName, options.Hosts, - options.usePassword, username, options.Password, DBAddSubclusterCmd) + options.usePassword, username, options.Password, AddSubclusterCmd) if err != nil { return instructions, err } diff --git a/vclusterops/cluster_op.go b/vclusterops/cluster_op.go index 533633e..614c019 100644 --- a/vclusterops/cluster_op.go +++ b/vclusterops/cluster_op.go @@ -502,6 +502,7 @@ type ClusterCommands interface { VStopDatabase(options *VStopDatabaseOptions) error VFetchCoordinationDatabase(options *VFetchCoordinationDatabaseOptions) (VCoordinationDatabase, error) VUnsandbox(options *VUnsandboxOptions) error + VStopSubcluster(options *VStopSubclusterOptions) error } type VClusterCommandsLogger struct { diff --git a/vclusterops/create_db.go b/vclusterops/create_db.go index d93e2f8..fdfd32d 100644 --- a/vclusterops/create_db.go +++ b/vclusterops/create_db.go @@ -658,7 +658,7 @@ func (vcc VClusterCommands) produceAdditionalCreateDBInstructions(vdb *VCoordina } if vdb.IsEon { - httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(bootstrapHost, true, username, options.Password) + httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(bootstrapHost, true, username, options.Password, CreateDBSyncCat) if err != nil { return instructions, err } diff --git a/vclusterops/https_check_db_running_op.go b/vclusterops/https_check_db_running_op.go index 31803a4..65a89aa 100644 --- a/vclusterops/https_check_db_running_op.go +++ b/vclusterops/https_check_db_running_op.go @@ -33,6 +33,7 @@ const ( StopDB StartDB ReviveDB + StopSC checkDBRunningOpName = "HTTPSCheckDBRunningOp" checkDBRunningOpDesc = "Verify database is running" @@ -48,6 +49,8 @@ func (op opType) String() string { return "Start DB" case ReviveDB: return "Revive DB" + case StopSC: + return "Stop Subcluster" } return "unknown operation" } @@ -93,6 +96,11 @@ func makeHTTPSCheckRunningDBOp(hosts []string, return op, nil } +func makeHTTPSCheckRunningDBOpWithoutHosts(useHTTPPassword bool, userName string, + httpsPassword *string, operationType opType) (httpsCheckRunningDBOp, error) { + return makeHTTPSCheckRunningDBOp(nil, useHTTPPassword, userName, httpsPassword, operationType) +} + func makeHTTPSCheckRunningDBWithSandboxOp(hosts []string, useHTTPPassword bool, userName string, sandbox string, mainCluster bool, httpsPassword *string, operationType opType, @@ -135,6 +143,18 @@ func (op *httpsCheckRunningDBOp) logPrepare() { } func (op *httpsCheckRunningDBOp) prepare(execContext *opEngineExecContext) error { + // If no hosts passed in, we will find the hosts from execute-context + if len(op.hosts) == 0 && op.opType == StopSC { + // execContext.nodesInfo stores the information of UP nodes in target subcluster + if len(execContext.nodesInfo) == 0 { + return fmt.Errorf(`[%s] Cannot find any node information of target subcluster in OpEngineExecContext`, op.name) + } + hostsInSC := make([]string, 0, len(execContext.nodesInfo)) + for _, node := range execContext.nodesInfo { + hostsInSC = append(hostsInSC, node.Address) + } + op.hosts = hostsInSC + } execContext.dispatcher.setup(op.hosts) return op.setupClusterHTTPRequest(op.hosts) @@ -196,7 +216,7 @@ func (op *httpsCheckRunningDBOp) isDBRunningOnHost(host string, case CreateDB: msg = fmt.Sprintf("[%s] Detected HTTPS service running on host %s, please stop the HTTPS service before creating a new database", op.name, host) - case StopDB, StartDB, ReviveDB: + case StopDB, StartDB, ReviveDB, StopSC: msg = fmt.Sprintf("[%s] Detected HTTPS service running on host %s", op.name, host) } // check whether the node is starting and hasn't pulled the latest catalog yet @@ -317,9 +337,13 @@ func (op *httpsCheckRunningDBOp) handleDBRunning(allErrs error, msg string, upHo op.logger.PrintInfo(createDBMsg) op.updateSpinnerMessage(createDBMsg) case StopDB: - const stopDBMsg = "the database has not been down yet" + const stopDBMsg = "the database is not down yet" op.logger.PrintInfo(stopDBMsg) op.updateSpinnerMessage(stopDBMsg) + case StopSC: + const stopSCMsg = "the subcluster is not down yet" + op.logger.PrintInfo(stopSCMsg) + op.updateSpinnerMessage(stopSCMsg) case StartDB: const startDBMsg = "aborting database start" op.logger.PrintInfo(startDBMsg) @@ -379,7 +403,7 @@ func (op *httpsCheckRunningDBOp) execute(execContext *opEngineExecContext) error switch op.opType { case CreateDB, StartDB, ReviveDB: return op.checkDBConnection(execContext) - case StopDB: + case StopDB, StopSC: return op.pollForDBDown(execContext) } @@ -425,7 +449,11 @@ func (op *httpsCheckRunningDBOp) pollForDBDown(execContext *opEngineExecContext) count++ } // timeout - msg := fmt.Sprintf("the DB is still up after %s seconds", timeoutSecondStr) + target := "DB" + if op.opType == StopSC { + target = "subcluster" + } + msg := fmt.Sprintf("the %s is still up after %s seconds", target, timeoutSecondStr) op.logger.PrintWarning(msg) return errors.New(msg) } diff --git a/vclusterops/https_get_up_nodes_op.go b/vclusterops/https_get_up_nodes_op.go index 1cbbaed..bfcaefb 100644 --- a/vclusterops/https_get_up_nodes_op.go +++ b/vclusterops/https_get_up_nodes_op.go @@ -29,7 +29,8 @@ const ( StartNodeCommand StopDBCmd ScrutinizeCmd - DBAddSubclusterCmd + AddSubclusterCmd + StopSubclusterCmd InstallPackageCmd UnsandboxCmd ) @@ -180,7 +181,7 @@ func (op *httpsGetUpNodesOp) processResult(execContext *opEngineExecContext) err continue } - if op.cmdType == StopDBCmd { + if op.cmdType == StopDBCmd || op.cmdType == StopSubclusterCmd { err = op.validateHosts(nodesStates) if err != nil { allErrs = errors.Join(allErrs, err) @@ -192,7 +193,7 @@ func (op *httpsGetUpNodesOp) processResult(execContext *opEngineExecContext) err err = op.collectUpHosts(nodesStates, host, upHosts, upScInfo, sandboxInfo, upScNodes) if err != nil { allErrs = errors.Join(allErrs, err) - break + return allErrs } if op.cmdType == UnsandboxCmd { @@ -215,7 +216,7 @@ func (op *httpsGetUpNodesOp) processResult(execContext *opEngineExecContext) err // Return true if all the results need to be scanned to figure out UP hosts func isCompleteScanRequired(cmdType CommandType) bool { - return cmdType == SandboxCmd || cmdType == StopDBCmd || cmdType == UnsandboxCmd + return cmdType == SandboxCmd || cmdType == StopDBCmd || cmdType == UnsandboxCmd || cmdType == StopSubclusterCmd } func (op *httpsGetUpNodesOp) finalize(_ *opEngineExecContext) error { @@ -238,6 +239,11 @@ func (op *httpsGetUpNodesOp) processHostLists(upHosts mapset.Set[string], upScIn execContext *opEngineExecContext) (ignoreErrors bool) { execContext.upScInfo = upScInfo + // when we found up nodes in the database, but cannot found up nodes in subcluster, we throw an error + if op.cmdType == StopSubclusterCmd && upHosts.Cardinality() > 0 && len(execContext.nodesInfo) == 0 { + op.logger.PrintError(`[%s] There are no UP nodes in subcluster %s. The subcluster is already down`, op.name, op.scName) + return false + } if op.sandbox != "" && op.cmdType != UnsandboxCmd { upSandbox := op.checkSandboxUp(sandboxInfo, op.sandbox) if !upSandbox { @@ -299,16 +305,20 @@ func (op *httpsGetUpNodesOp) validateHosts(nodesStates nodesStateInfo) error { func (op *httpsGetUpNodesOp) collectUpHosts(nodesStates nodesStateInfo, host string, upHosts mapset.Set[string], upScInfo, sandboxInfo map[string]string, upScNodes mapset.Set[NodeInfo]) (err error) { upMainNodeFound := false + foundSC := false for _, node := range nodesStates.NodeList { if node.Database != op.DBName { err = fmt.Errorf(`[%s] database %s is running on host %s, rather than database %s`, op.name, node.Database, host, op.DBName) return err } + if op.scName != "" && node.Subcluster == op.scName { + foundSC = true + } if node.State == util.NodeUpState { upHosts.Add(node.Address) upScInfo[node.Address] = node.Subcluster if op.cmdType == StopDBCmd { - if node.Sandbox != "" || !upMainNodeFound { + if node.Sandbox != util.MainClusterSandbox || !upMainNodeFound { sandboxInfo[node.Address] = node.Sandbox // We still need one main cluster UP node, when there are sandboxes upMainNodeFound = true @@ -325,6 +335,9 @@ func (op *httpsGetUpNodesOp) collectUpHosts(nodesStates nodesStateInfo, host str } } } + if !foundSC && op.cmdType == StopSubclusterCmd { + return fmt.Errorf(`[%s] cannot find subcluster %s in database %s`, op.name, op.scName, op.DBName) + } return err } diff --git a/vclusterops/https_stop_subcluster_op.go b/vclusterops/https_stop_subcluster_op.go new file mode 100644 index 0000000..31959b4 --- /dev/null +++ b/vclusterops/https_stop_subcluster_op.go @@ -0,0 +1,155 @@ +/* + (c) Copyright [2023-2024] Open Text. + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package vclusterops + +import ( + "errors" + "fmt" + "strconv" + "strings" + + "github.com/vertica/vcluster/vclusterops/util" +) + +type httpsStopSCOp struct { + opBase + opHTTPSBase + scName string + force bool + requestParams map[string]string +} + +func makeHTTPSStopSCOp(useHTTPPassword bool, userName string, + httpsPassword *string, scName string, timeout int, force bool) (httpsStopSCOp, error) { + op := httpsStopSCOp{} + op.name = "HTTPSStopSCOp" + op.description = "Stop subcluster" + op.scName = scName + op.force = force + op.useHTTPPassword = useHTTPPassword + + // set the query params + // If this is a force shutdown, we do not set "timeout" to make a shutdown without draining. + // Otherwise, we set "timeout" to make a shutdown with draining. + if !op.force { + op.requestParams = make(map[string]string) + op.requestParams["timeout"] = strconv.Itoa(timeout) + } + + if useHTTPPassword { + err := util.ValidateUsernameAndPassword(op.name, useHTTPPassword, userName) + if err != nil { + return op, err + } + op.userName = userName + op.httpsPassword = httpsPassword + } + return op, nil +} + +func (op *httpsStopSCOp) setupClusterHTTPRequest(hosts []string) error { + for _, host := range hosts { + httpRequest := hostHTTPRequest{} + httpRequest.Method = PostMethod + httpRequest.buildHTTPSEndpoint("subclusters/" + op.scName + "/shutdown") + if op.useHTTPPassword { + httpRequest.Password = op.httpsPassword + httpRequest.Username = op.userName + } + httpRequest.QueryParams = op.requestParams + op.clusterHTTPRequest.RequestCollection[host] = httpRequest + } + + return nil +} + +func (op *httpsStopSCOp) prepare(execContext *opEngineExecContext) error { + // execContext.nodesInfo stores the information of UP nodes in target subcluster + if len(execContext.nodesInfo) == 0 { + return fmt.Errorf(`[%s] Cannot find any node information of target subcluster in OpEngineExecContext`, op.name) + } + // send stop subcluster request to one UP host of the subcluster + op.hosts = []string{execContext.nodesInfo[0].Address} + execContext.dispatcher.setup(op.hosts) + + return op.setupClusterHTTPRequest(op.hosts) +} + +func (op *httpsStopSCOp) execute(execContext *opEngineExecContext) error { + if err := op.runExecute(execContext); err != nil { + return err + } + + return op.processResult(execContext) +} + +func (op *httpsStopSCOp) processResult(_ *opEngineExecContext) error { + var allErrs error + + for host, result := range op.clusterHTTPRequest.ResultCollection { + op.logResponse(host, result) + + if !result.isPassing() { + allErrs = errors.Join(allErrs, result.err) + continue + } + + // decode the json-format response + // The successful response object will be a dictionary: + // 1. shutdown without drain + // { + // "detail": "" + // } + // 2. shutdown with drain + // case 1: no alive connection exists when shutdown subcluster + // { + // "detail": "Shutdown message sent to subcluster (sc1)\n\n" + // } + // case 2: alive connection exists when shutdown subcluster + // { + // "detail": "Set subcluster (sc1) to draining state\nWaited for 1 nodes to drain\nShutdown message sent to subcluster (sc1)\n\n" + // } + // 3. shutdown a subcluster that is already down + // { + // "detail": "No action taken: all nodes in subcluster sc1 are not connected to the database group.\n" + // } + response, err := op.parseAndCheckMapResponse(host, result.content) + if err != nil { + err = fmt.Errorf(`[%s] fail to parse result on host %s, details: %w`, op.name, host, err) + allErrs = errors.Join(allErrs, err) + continue + } + + // verify if the endpoint returns correct successful message + if !op.force { + expectedDetails := "Shutdown message sent to subcluster (" + op.scName + ")" + if !strings.Contains(response["detail"], expectedDetails) { + err = fmt.Errorf(`[%s] response detail should like '... Shutdown message sent to subcluster ...' but got '%s'`, + op.name, response["detail"]) + allErrs = errors.Join(allErrs, err) + } + } else if response["detail"] != "" { + err = fmt.Errorf(`[%s] response detail should be empty but got '%s'`, op.name, response["detail"]) + allErrs = errors.Join(allErrs, err) + } + } + + return allErrs +} + +func (op *httpsStopSCOp) finalize(_ *opEngineExecContext) error { + return nil +} diff --git a/vclusterops/https_sync_catalog_op.go b/vclusterops/https_sync_catalog_op.go index ca4be6c..e39580b 100644 --- a/vclusterops/https_sync_catalog_op.go +++ b/vclusterops/https_sync_catalog_op.go @@ -23,17 +23,31 @@ import ( "github.com/vertica/vcluster/vclusterops/util" ) +type SyncCatCmdType int + +const ( + CreateDBSyncCat SyncCatCmdType = iota + StartDBSyncCat + StopDBSyncCat + StopSCSyncCat + AddNodeSyncCat + StartNodeSyncCat + RemoveNodeSyncCat +) + type httpsSyncCatalogOp struct { opBase opHTTPSBase + cmdType SyncCatCmdType } func makeHTTPSSyncCatalogOp(hosts []string, useHTTPPassword bool, - userName string, httpsPassword *string) (httpsSyncCatalogOp, error) { + userName string, httpsPassword *string, cmdType SyncCatCmdType) (httpsSyncCatalogOp, error) { op := httpsSyncCatalogOp{} op.name = "HTTPSSyncCatalogOp" op.description = "Synchronize catalog with communal storage" op.hosts = hosts + op.cmdType = cmdType op.useHTTPPassword = useHTTPPassword err := util.ValidateUsernameAndPassword(op.name, useHTTPPassword, userName) @@ -47,8 +61,8 @@ func makeHTTPSSyncCatalogOp(hosts []string, useHTTPPassword bool, } func makeHTTPSSyncCatalogOpWithoutHosts(useHTTPPassword bool, - userName string, httpsPassword *string) (httpsSyncCatalogOp, error) { - return makeHTTPSSyncCatalogOp(nil, useHTTPPassword, userName, httpsPassword) + userName string, httpsPassword *string, cmdType SyncCatCmdType) (httpsSyncCatalogOp, error) { + return makeHTTPSSyncCatalogOp(nil, useHTTPPassword, userName, httpsPassword, cmdType) } func (op *httpsSyncCatalogOp) setupClusterHTTPRequest(hosts []string) error { @@ -71,11 +85,20 @@ func (op *httpsSyncCatalogOp) setupClusterHTTPRequest(hosts []string) error { func (op *httpsSyncCatalogOp) prepare(execContext *opEngineExecContext) error { // If no hosts passed in, we will find the hosts from execute-context if len(op.hosts) == 0 { - if len(execContext.upHosts) == 0 { - return fmt.Errorf(`[%s] Cannot find any up hosts in OpEngineExecContext`, op.name) + if op.cmdType == StopSCSyncCat { + // execContext.nodesInfo stores the information of UP nodes in target subcluster + if len(execContext.nodesInfo) == 0 { + return fmt.Errorf(`[%s] Cannot find any node information of target subcluster in OpEngineExecContext`, op.name) + } + // use first up host in subcluster to execute https post request + op.hosts = []string{execContext.nodesInfo[0].Address} + } else { + if len(execContext.upHosts) == 0 { + return fmt.Errorf(`[%s] Cannot find any up hosts in OpEngineExecContext`, op.name) + } + // use first up host to execute https post request + op.hosts = []string{execContext.upHosts[0]} } - // use first up host to execute https post request - op.hosts = []string{execContext.upHosts[0]} } execContext.dispatcher.setup(op.hosts) diff --git a/vclusterops/remove_node.go b/vclusterops/remove_node.go index 5a3c711..265d52d 100644 --- a/vclusterops/remove_node.go +++ b/vclusterops/remove_node.go @@ -404,7 +404,7 @@ func (vcc VClusterCommands) produceRemoveNodeInstructions(vdb *VCoordinationData instructions = append(instructions, &nmaDeleteDirectoriesOp) if vdb.IsEon { - httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(initiatorHost, true, username, password) + httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(initiatorHost, true, username, password, RemoveNodeSyncCat) if err != nil { return instructions, err } diff --git a/vclusterops/start_db.go b/vclusterops/start_db.go index 5714c0d..c678f23 100644 --- a/vclusterops/start_db.go +++ b/vclusterops/start_db.go @@ -347,7 +347,7 @@ func (vcc VClusterCommands) produceStartDBInstructions(options *VStartDatabaseOp ) if options.OldIsEon.ToBool() { - httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(options.Hosts, true, *options.UserName, options.Password) + httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(options.Hosts, true, *options.UserName, options.Password, StartDBSyncCat) if err != nil { return instructions, err } diff --git a/vclusterops/start_node.go b/vclusterops/start_node.go index fc7e642..d65af6c 100644 --- a/vclusterops/start_node.go +++ b/vclusterops/start_node.go @@ -342,7 +342,7 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN ) if vdb.IsEon { - httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(options.Hosts, true, *options.UserName, options.Password) + httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(options.Hosts, true, *options.UserName, options.Password, StartNodeSyncCat) if err != nil { return instructions, err } diff --git a/vclusterops/stop_db.go b/vclusterops/stop_db.go index aa9c4d4..925f559 100644 --- a/vclusterops/stop_db.go +++ b/vclusterops/stop_db.go @@ -197,7 +197,7 @@ func (vcc *VClusterCommands) produceStopDBInstructions(options *VStopDatabaseOpt instructions = append(instructions, &httpsGetUpNodesOp) if options.IsEon { - httpsSyncCatalogOp, e := makeHTTPSSyncCatalogOpWithoutHosts(usePassword, *options.UserName, options.Password) + httpsSyncCatalogOp, e := makeHTTPSSyncCatalogOpWithoutHosts(usePassword, *options.UserName, options.Password, StopDBSyncCat) if e != nil { return instructions, e } diff --git a/vclusterops/stop_subcluster.go b/vclusterops/stop_subcluster.go new file mode 100644 index 0000000..89557d1 --- /dev/null +++ b/vclusterops/stop_subcluster.go @@ -0,0 +1,196 @@ +/* + (c) Copyright [2023-2024] Open Text. + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package vclusterops + +import ( + "fmt" + + "github.com/vertica/vcluster/vclusterops/util" + "github.com/vertica/vcluster/vclusterops/vlog" +) + +type VStopSubclusterOptions struct { + /* part 1: basic db info */ + DatabaseOptions + + /* part 2: eon db info */ + DrainSeconds int // time in seconds to wait for subcluster users' disconnection, its default value is 60 + SCName string // subcluster name + Force bool // force the subcluster to shutdown immediately even if users are connected +} + +func VStopSubclusterOptionsFactory() VStopSubclusterOptions { + opt := VStopSubclusterOptions{} + // set default values to the params + opt.setDefaultValues() + + return opt +} + +func (options *VStopSubclusterOptions) setDefaultValues() { + options.DatabaseOptions.setDefaultValues() + options.DrainSeconds = util.DefaultDrainSeconds +} + +func (options *VStopSubclusterOptions) validateRequiredOptions(log vlog.Printer) error { + err := options.validateBaseOptions(commandStopCluster, log) + if err != nil { + return err + } + + return nil +} + +func (options *VStopSubclusterOptions) validateEonOptions(log vlog.Printer) error { + if !options.IsEon { + return fmt.Errorf("stop subcluster is only supported in Eon mode") + } + if options.Force { + // this log is for vclusterops user since they probably set both DrainSeconds and Force + log.Info("The subcluster will be forcibly shutdown so provided drain seconds will be ignored") + } + + return nil +} + +func (options *VStopSubclusterOptions) validateExtraOptions() error { + return nil +} + +func (options *VStopSubclusterOptions) validateParseOptions(log vlog.Printer) error { + // batch 1: validate required parameters + err := options.validateRequiredOptions(log) + if err != nil { + return err + } + // batch 2: validate eon params + err = options.validateEonOptions(log) + if err != nil { + return err + } + // batch 3: validate all other params + err = options.validateExtraOptions() + if err != nil { + return err + } + return nil +} + +// resolve hostnames to be IPs +func (options *VStopSubclusterOptions) analyzeOptions() (err error) { + // resolve RawHosts to be IP addresses + if len(options.RawHosts) > 0 { + options.Hosts, err = util.ResolveRawHostsToAddresses(options.RawHosts, options.IPv6) + if err != nil { + return err + } + } + + return nil +} + +func (options *VStopSubclusterOptions) validateAnalyzeOptions(log vlog.Printer) error { + if err := options.validateParseOptions(log); err != nil { + return err + } + return options.analyzeOptions() +} + +func (vcc VClusterCommands) VStopSubcluster(options *VStopSubclusterOptions) error { + /* + * - Validate Options + * - Produce Instructions + * - Create a VClusterOpEngine + * - Give the instructions to the VClusterOpEngine to run + */ + + // validate and analyze all options + err := options.validateAnalyzeOptions(vcc.Log) + if err != nil { + return err + } + + instructions, err := vcc.produceStopSCInstructions(options) + if err != nil { + return fmt.Errorf("fail to production instructions: %w", err) + } + + // Create a VClusterOpEngine, and add certs to the engine + certs := httpsCerts{key: options.Key, cert: options.Cert, caCert: options.CaCert} + clusterOpEngine := makeClusterOpEngine(instructions, &certs) + + // Give the instructions to the VClusterOpEngine to run + runError := clusterOpEngine.run(vcc.Log) + if runError != nil { + return fmt.Errorf("failed to stop subcluster %s: %w", options.SCName, runError) + } + + return nil +} + +// produceStopSCInstructions will build a list of instructions to execute for +// the stop subcluster operation. +// +// The generated instructions will later perform the following operations necessary +// for a successful stop_subcluster: +// - Get up nodes in the target subcluster through https call +// - Sync catalog through the first up node in the target subcluster +// - Stop subcluster through the first up node in the target subcluster +// - Check if there are any running nodes in the target subcluster +func (vcc *VClusterCommands) produceStopSCInstructions(options *VStopSubclusterOptions) ([]clusterOp, error) { + var instructions []clusterOp + + // when password is specified, we will use username/password to call https endpoints + usePassword := false + if options.Password != nil { + usePassword = true + err := options.validateUserName(vcc.Log) + if err != nil { + return instructions, err + } + } + + httpsGetUpNodesOp, err := makeHTTPSGetUpScNodesOp(*options.DBName, options.Hosts, + usePassword, *options.UserName, options.Password, StopSubclusterCmd, options.SCName) + if err != nil { + return instructions, err + } + + httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOpWithoutHosts(usePassword, *options.UserName, options.Password, StopSCSyncCat) + if err != nil { + return instructions, err + } + + httpsStopSCOp, err := makeHTTPSStopSCOp(usePassword, *options.UserName, options.Password, + options.SCName, options.DrainSeconds, options.Force) + if err != nil { + return instructions, err + } + + httpsCheckDBRunningOp, err := makeHTTPSCheckRunningDBOpWithoutHosts(usePassword, *options.UserName, options.Password, StopSC) + if err != nil { + return instructions, err + } + + instructions = append(instructions, + &httpsGetUpNodesOp, + &httpsSyncCatalogOp, + &httpsStopSCOp, + &httpsCheckDBRunningOp, + ) + + return instructions, nil +} diff --git a/vclusterops/vcluster_database_options.go b/vclusterops/vcluster_database_options.go index 084b900..affe7fa 100644 --- a/vclusterops/vcluster_database_options.go +++ b/vclusterops/vcluster_database_options.go @@ -102,6 +102,7 @@ const ( commandRemoveNode = "db_remove_node" commandAddCluster = "db_add_subcluster" commandRemoveCluster = "db_remove_subcluster" + commandStopCluster = "stop_subcluster" commandSandboxSC = "sandbox_subcluster" commandUnsandboxSC = "unsandbox_subcluster" commandShowRestorePoints = "show_restore_points"