diff --git a/commands/vcluster_config.go b/commands/vcluster_config.go index 3ddddbb..24d9e90 100644 --- a/commands/vcluster_config.go +++ b/commands/vcluster_config.go @@ -233,30 +233,11 @@ func readVDBToDBConfig(vdb *vclusterops.VCoordinationDatabase) (DatabaseConfig, if !ok { return dbConfig, fmt.Errorf("cannot find host %s from HostNodeMap", host) } - nodeConfig := NodeConfig{} - nodeConfig.Name = vnode.Name - nodeConfig.Address = vnode.Address - nodeConfig.Subcluster = vnode.Subcluster - nodeConfig.Sandbox = vnode.Sandbox - - if vdb.CatalogPrefix == "" { - nodeConfig.CatalogPath = vnode.CatalogPath - } else { - nodeConfig.CatalogPath = vdb.GenCatalogPath(vnode.Name) - } - if vdb.DataPrefix == "" && len(vnode.StorageLocations) > 0 { - nodeConfig.DataPath = vnode.StorageLocations[0] - } else { - nodeConfig.DataPath = vdb.GenDataPath(vnode.Name) - } - if vdb.IsEon && vdb.DepotPrefix == "" { - nodeConfig.DepotPath = vnode.DepotPath - } else if vdb.DepotPrefix != "" { - nodeConfig.DepotPath = vdb.GenDepotPath(vnode.Name) - } + nodeConfig := buildNodeConfig(vnode, vdb) dbConfig.Nodes = append(dbConfig.Nodes, &nodeConfig) } + dbConfig.IsEon = vdb.IsEon dbConfig.CommunalStorageLocation = vdb.CommunalStorageLocation dbConfig.Ipv6 = vdb.Ipv6 @@ -266,6 +247,33 @@ func readVDBToDBConfig(vdb *vclusterops.VCoordinationDatabase) (DatabaseConfig, return dbConfig, nil } +func buildNodeConfig(vnode *vclusterops.VCoordinationNode, + vdb *vclusterops.VCoordinationDatabase) NodeConfig { + nodeConfig := NodeConfig{} + nodeConfig.Name = vnode.Name + nodeConfig.Address = vnode.Address + nodeConfig.Subcluster = vnode.Subcluster + nodeConfig.Sandbox = vnode.Sandbox + + if vdb.CatalogPrefix == "" { + nodeConfig.CatalogPath = vnode.CatalogPath + } else { + nodeConfig.CatalogPath = vdb.GenCatalogPath(vnode.Name) + } + if vdb.DataPrefix == "" && len(vnode.StorageLocations) > 0 { + nodeConfig.DataPath = vnode.StorageLocations[0] + } else { + nodeConfig.DataPath = vdb.GenDataPath(vnode.Name) + } + if vdb.IsEon && vdb.DepotPrefix == "" { + nodeConfig.DepotPath = vnode.DepotPath + } else if vdb.DepotPrefix != "" { + nodeConfig.DepotPath = vdb.GenDepotPath(vnode.Name) + } + + return nodeConfig +} + // read reads information from configFilePath to a DatabaseConfig object. // It returns any read error encountered. func readConfig() (dbConfig *DatabaseConfig, err error) { diff --git a/vclusterops/coordinator_database.go b/vclusterops/coordinator_database.go index 839458c..020b9aa 100644 --- a/vclusterops/coordinator_database.go +++ b/vclusterops/coordinator_database.go @@ -36,6 +36,7 @@ type VCoordinationDatabase struct { CatalogPrefix string DataPrefix string HostNodeMap vHostNodeMap + UnboundNodes []*VCoordinationNode // for convenience HostList []string // expected to be resolved IP addresses @@ -138,6 +139,13 @@ func (vdb *VCoordinationDatabase) setFromCreateDBOptions(options *VCreateDatabas // addNode adds a given host to the VDB's HostList and HostNodeMap. // Duplicate host will not be added. func (vdb *VCoordinationDatabase) addNode(vnode *VCoordinationNode) error { + // unbound nodes have the same 0.0.0.0 address, + // so we add them into the UnboundedNodes list + if vnode.Address == util.UnboundedIPv4 || vnode.Address == util.UnboundedIPv6 { + vdb.UnboundNodes = append(vdb.UnboundNodes, vnode) + return nil + } + if _, exist := vdb.HostNodeMap[vnode.Address]; exist { return fmt.Errorf("host %s has already been in the VDB's HostList", vnode.Address) } diff --git a/vclusterops/helpers.go b/vclusterops/helpers.go index e225b3e..b95f0dd 100644 --- a/vclusterops/helpers.go +++ b/vclusterops/helpers.go @@ -484,3 +484,19 @@ func (vcc *VClusterCommands) getUnreachableHosts(options *DatabaseOptions, hosts type nmaGenericJSONResponse struct { RespStr string } + +// extractCatalogPrefix extracts the catalog prefix from a node's catalog path. +// This function takes the full catalog path, database name, and node name as +// input parameters, and returns the catalog prefix along with a boolean indicating +// whether the extraction was successful. +func extractCatalogPrefix(catalogPath, dbName, nodeName string) (string, bool) { + catalogSuffix := "/" + dbName + "/" + nodeName + "_catalog/Catalog" + // if catalog suffix matches catalog path, it means we created the catalog in the root path + if catalogPath == catalogSuffix { + return "/", true + } + if !strings.HasSuffix(catalogPath, catalogSuffix) { + return "", false + } + return strings.TrimSuffix(catalogPath, catalogSuffix), true +} diff --git a/vclusterops/helpers_test.go b/vclusterops/helpers_test.go index 91d3b66..3dc20e5 100644 --- a/vclusterops/helpers_test.go +++ b/vclusterops/helpers_test.go @@ -188,3 +188,38 @@ func TestValidateHostMap(t *testing.T) { err = validateHostMaps(threeHosts, oneMap, twoMap) assert.Error(t, err) } + +func TestExtractCatalogPrefix(t *testing.T) { + // positive cases + catalogPath := "/verticadb/vertica01/vertica/v_vertica_node0001_catalog/Catalog" + dbName := "vertica" + nodeName := "v_vertica_node0001" + + expected := "/verticadb/vertica01" + catalogPrefix, found := extractCatalogPrefix(catalogPath, dbName, nodeName) + assert.True(t, found) + assert.Equal(t, catalogPrefix, expected) + + catalogPath = "/catalog/test/v_test_node0001_catalog/Catalog" + dbName = "test" + nodeName = "v_test_node0001" + + expected = "/catalog" + catalogPrefix, found = extractCatalogPrefix(catalogPath, dbName, nodeName) + assert.True(t, found) + assert.Equal(t, catalogPrefix, expected) + + catalogPath = "/test/v_test_node0001_catalog/Catalog" + expected = "/" + catalogPrefix, found = extractCatalogPrefix(catalogPath, dbName, nodeName) + assert.True(t, found) + assert.Equal(t, catalogPrefix, expected) + + // negative case + catalogPath = "/catalog/test/v_test_node0001_catalog/Catalog/test" + + expected = "" + catalogPrefix, found = extractCatalogPrefix(catalogPath, dbName, nodeName) + assert.False(t, found) + assert.Equal(t, catalogPrefix, expected) +} diff --git a/vclusterops/https_get_nodes_info_op.go b/vclusterops/https_get_nodes_info_op.go index 3eb9065..8b65bb6 100644 --- a/vclusterops/https_get_nodes_info_op.go +++ b/vclusterops/https_get_nodes_info_op.go @@ -18,7 +18,6 @@ package vclusterops import ( "errors" "fmt" - "strings" "github.com/vertica/vcluster/rfc7807" "github.com/vertica/vcluster/vclusterops/util" @@ -140,41 +139,30 @@ func (op *httpsGetNodesInfoOp) processResult(_ *opEngineExecContext) error { // save nodes info to vdb op.vdb.HostNodeMap = makeVHostNodeMap() op.vdb.HostList = []string{} + op.vdb.PrimaryUpNodes = []string{} + op.vdb.UnboundNodes = []*VCoordinationNode{} for _, node := range nodesStates.NodeList { if node.Database != op.dbName { err = fmt.Errorf(`[%s] database %s is running on host %s, rather than database %s`, op.name, node.Database, host, op.dbName) allErrs = errors.Join(allErrs, err) return appendHTTPSFailureError(allErrs) } - vNode := makeVCoordinationNode() - vNode.Name = node.Name - vNode.Address = node.Address - vNode.CatalogPath = node.CatalogPath - vNode.DepotPath = node.DepotPath - vNode.StorageLocations = node.StorageLocations - vNode.IsPrimary = node.IsPrimary - vNode.State = node.State - vNode.Subcluster = node.Subcluster - vNode.Sandbox = node.Sandbox - vNode.IsControlNode = node.IsControlNode - vNode.ControlNode = node.ControlNode + vnode := buildVnodeFromNodeStateInfo(node) if node.IsPrimary && node.State == util.NodeUpState { op.vdb.PrimaryUpNodes = append(op.vdb.PrimaryUpNodes, node.Address) } - err := op.vdb.addNode(&vNode) + err := op.vdb.addNode(&vnode) if err != nil { allErrs = errors.Join(allErrs, err) return appendHTTPSFailureError(allErrs) } // extract catalog prefix from node's catalog path - // catalog prefix is preceding db name - dbPath := "/" + node.Database - index := strings.Index(node.CatalogPath, dbPath) - if index == -1 { - op.logger.PrintWarning("[%s] failed to get catalog prefix because catalog path %s does not contain database name %s", - op.name, node.CatalogPath, node.Database) + catalogPrefix, found := extractCatalogPrefix(node.CatalogPath, node.Database, node.Name) + if !found { + op.logger.PrintError("[%s] failed to retrieve catalog prefix because catalog path %q is malformed", + op.name, node.CatalogPath) } - op.vdb.CatalogPrefix = node.CatalogPath[:index] + op.vdb.CatalogPrefix = catalogPrefix } return nil @@ -187,3 +175,20 @@ func (op *httpsGetNodesInfoOp) processResult(_ *opEngineExecContext) error { func (op *httpsGetNodesInfoOp) finalize(_ *opEngineExecContext) error { return nil } + +func buildVnodeFromNodeStateInfo(node *nodeStateInfo) VCoordinationNode { + vnode := makeVCoordinationNode() + vnode.Name = node.Name + vnode.Address = node.Address + vnode.CatalogPath = node.CatalogPath + vnode.DepotPath = node.DepotPath + vnode.StorageLocations = node.StorageLocations + vnode.IsPrimary = node.IsPrimary + vnode.State = node.State + vnode.Subcluster = node.Subcluster + vnode.Sandbox = node.Sandbox + vnode.IsControlNode = node.IsControlNode + vnode.ControlNode = node.ControlNode + + return vnode +} diff --git a/vclusterops/nma_download_file_op.go b/vclusterops/nma_download_file_op.go index d6d9170..23c2aa1 100644 --- a/vclusterops/nma_download_file_op.go +++ b/vclusterops/nma_download_file_op.go @@ -86,7 +86,7 @@ type ReviveDBNodeCountMismatchError struct { func (e *ReviveDBNodeCountMismatchError) Error() string { return fmt.Sprintf(`[%s] nodes mismatch found on host %s: the number of the new nodes in --hosts is %d,`+ - ` but the number of the old nodes in description file is %d`, + ` but the number of primary nodes in the description file is %d`, e.ReviveDBStep, e.FailureHost, e.NumOfNewNodes, e.NumOfOldNodes) } @@ -259,6 +259,12 @@ func (op *nmaDownloadFileOp) processResult(execContext *opEngineExecContext) err return nil } + // if users provide a subset of nodes for reviving, + // we assume users intend to revive to primary subclusters + if len(descFileContent.NodeList) > len(op.newNodes) { + filterPrimaryNodes(&descFileContent) + } + if len(descFileContent.NodeList) != len(op.newNodes) { err := &ReviveDBNodeCountMismatchError{ ReviveDBStep: op.name, @@ -282,6 +288,16 @@ func (op *nmaDownloadFileOp) processResult(execContext *opEngineExecContext) err return appendHTTPSFailureError(allErrs) } +func filterPrimaryNodes(descFileContent *fileContent) { + var updatedFileContent fileContent + for _, node := range descFileContent.NodeList { + if node.IsPrimary { + updatedFileContent.NodeList = append(updatedFileContent.NodeList, node) + } + } + descFileContent.NodeList = updatedFileContent.NodeList +} + // buildVDBFromClusterConfig can build a vdb using cluster_config.json func (op *nmaDownloadFileOp) buildVDBFromClusterConfig(descFileContent fileContent) error { op.vdb.HostNodeMap = makeVHostNodeMap() diff --git a/vclusterops/re_ip.go b/vclusterops/re_ip.go index 832088c..07635b0 100644 --- a/vclusterops/re_ip.go +++ b/vclusterops/re_ip.go @@ -114,6 +114,12 @@ func (options *VReIPOptions) validateAnalyzeOptions(logger vlog.Printer) error { nodeAddresses := make(map[string]struct{}) for _, info := range options.ReIPList { // the addresses must be valid IPs + if info.NodeAddress != "" { + if info.NodeAddress == util.UnboundedIPv4 || info.NodeAddress == util.UnboundedIPv6 { + return errors.New("the re-ip list should not contain unbound addresses") + } + } + if err := util.AddressCheck(info.TargetAddress, ipv6); err != nil { return err } diff --git a/vclusterops/start_node.go b/vclusterops/start_node.go index 8d9a178..2ea8dad 100644 --- a/vclusterops/start_node.go +++ b/vclusterops/start_node.go @@ -524,7 +524,6 @@ func (options *VStartNodesOptions) separateHostsBasedOnReIPNeed( if oldIP != newIP { startNodeInfo.ReIPList = append(startNodeInfo.ReIPList, newIP) startNodeInfo.NodeNamesToStart = append(startNodeInfo.NodeNamesToStart, nodename) - logger.Info("the nodes need to be re-IP", "nodeNames", startNodeInfo.NodeNamesToStart, "IPs", startNodeInfo.ReIPList) } else { vnode, ok := vdb.HostNodeMap[newIP] if ok && vnode.State == util.NodeDownState { @@ -545,5 +544,22 @@ func (options *VStartNodesOptions) separateHostsBasedOnReIPNeed( } } + // handle unbound nodes + // some of the unbound nodes may need to re-ip + for _, vnode := range vdb.UnboundNodes { + if newIP, exists := options.Nodes[vnode.Name]; exists { + startNodeInfo.ReIPList = append(startNodeInfo.ReIPList, newIP) + startNodeInfo.NodeNamesToStart = append(startNodeInfo.NodeNamesToStart, vnode.Name) + logger.DisplayInfo("the unbound node (%s) needs to change its IP to %s", vnode.Name, newIP) + + sortedHosts = append(sortedHosts, newIP) + } + } + + // log nodes that need to re-ip + if len(startNodeInfo.NodeNamesToStart) > 0 { + logger.Info("the nodes need to be re-IP", "nodeNames", startNodeInfo.NodeNamesToStart, "IPs", startNodeInfo.ReIPList) + } + return sortedHosts, nil } diff --git a/vclusterops/util/util.go b/vclusterops/util/util.go index bedbb7c..f1ac86b 100644 --- a/vclusterops/util/util.go +++ b/vclusterops/util/util.go @@ -17,6 +17,7 @@ package util import ( "encoding/json" + "errors" "flag" "fmt" "io/fs" @@ -87,6 +88,17 @@ const ( objectNameUnsupportedCharacters = `=<>'^\".@?#&/:;{}()[] \~!%+|,` + "`$" ) +const ( + // Unbound nodes are the nodes in catalog but without IP assigned. + // These nodes can come from the following scenario: + // - a database has primary and secondary nodes + // - users run revive_db to the primary nodes only + // - the secondary nodes become "unbound nodes" after this revive + // - users need to run start_node with re-ip to bring the unbound nodes up + UnboundedIPv4 = "0.0.0.0" + UnboundedIPv6 = "0:0:0:0:0:0:0:0" +) + // NmaSecretLookup retrieves kubernetes secrets. func NmaSecretLookup(f FetchAllEnvVars) { k8port, _ := os.LookupEnv(kubernetesPort) @@ -281,6 +293,10 @@ func AddressCheck(address string, ipv6 bool) error { return fmt.Errorf("%s in the re-ip file is not a valid %s address", address, ipVersion) } + if address == UnboundedIPv4 || address == UnboundedIPv6 { + return errors.New("the re-ip list should not contain unbound addresses") + } + return nil } @@ -350,6 +366,9 @@ func ResolveRawHostsToAddresses(rawHosts []string, ipv6 bool) ([]string, error) if host == "" { return hostAddresses, fmt.Errorf("invalid empty host found in the provided host list") } + if host == UnboundedIPv4 || host == UnboundedIPv6 { + return hostAddresses, fmt.Errorf("ambiguous host address (%s) is used", host) + } addr, err := ResolveToOneIP(host, ipv6) if err != nil { return hostAddresses, err