diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index c4747de67c..9e452f8dba 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -29,7 +29,7 @@ A clear and concise description of what you expected to happen. What happened instead. -### Version (`ghpc --version`) +### Version (`gcluster --version`) ### Blueprint @@ -41,9 +41,9 @@ If applicable, attach or paste the blueprint YAML used to produce the bug. ### Expanded Blueprint -If applicable, please attach or paste the expanded blueprint. The expanded blueprint can be obtained by running `ghpc expand your-blueprint.yaml`. +If applicable, please attach or paste the expanded blueprint. The expanded blueprint can be obtained by running `gcluster expand your-blueprint.yaml`. -Disregard if the bug occurs when running `ghpc expand ...` as well. +Disregard if the bug occurs when running `gcluster expand ...` as well. ```yaml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 8380bba1cb..f3c224983f 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -8,4 +8,4 @@ Please take the following actions before submitting this pull request. * Add or modify unit tests to cover code changes * Ensure that unit test coverage remains above 80% * Update all applicable documentation -* Follow Cloud HPC Toolkit Contribution guidelines [#](https://goo.gle/hpc-toolkit-contributing) +* Follow Cluster Toolkit Contribution guidelines [#](https://goo.gle/hpc-toolkit-contributing) diff --git a/.gitignore b/.gitignore index d8853e6f8c..2d932767b9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ # Built Binary ghpc +gcluster # Expand artifact expanded.yaml # macOS Desktop Services Store diff --git a/Makefile b/Makefile index 71d6ae6511..1222be60d1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # PREAMBLE MIN_PACKER_VERSION=1.7.9 # for building images MIN_TERRAFORM_VERSION=1.2 # for deploying modules -MIN_GOLANG_VERSION=1.18 # for building ghpc +MIN_GOLANG_VERSION=1.18 # for building gcluster .PHONY: install install-user tests format install-dev-deps \ warn-go-missing warn-terraform-missing warn-packer-missing \ @@ -29,19 +29,24 @@ endif # RULES MEANT TO BE USED DIRECTLY -ghpc: warn-go-version warn-terraform-version warn-packer-version $(shell find ./cmd ./pkg ghpc.go -type f) - $(info **************** building ghpc ************************) - @go build -ldflags="-X 'main.gitTagVersion=$(GIT_TAG_VERSION)' -X 'main.gitBranch=$(GIT_BRANCH)' -X 'main.gitCommitInfo=$(GIT_COMMIT_INFO)' -X 'main.gitCommitHash=$(GIT_COMMIT_HASH)' -X 'main.gitInitialHash=$(GIT_INITIAL_HASH)'" ghpc.go +gcluster: warn-go-version warn-terraform-version warn-packer-version $(shell find ./cmd ./pkg gcluster.go -type f) + $(info **************** building gcluster ************************) + @go build -ldflags="-X 'main.gitTagVersion=$(GIT_TAG_VERSION)' -X 'main.gitBranch=$(GIT_BRANCH)' -X 'main.gitCommitInfo=$(GIT_COMMIT_INFO)' -X 'main.gitCommitHash=$(GIT_COMMIT_HASH)' -X 'main.gitInitialHash=$(GIT_INITIAL_HASH)'" gcluster.go + @ln -sf gcluster ghpc + +ghpc: gcluster install-user: - $(info ******** installing ghpc in ~/bin *********************) + $(info ******** installing gcluster in ~/bin *********************) mkdir -p ~/bin - install ./ghpc ~/bin + install ./gcluster ~/bin + ln -sf ~/bin/gcluster ~/bin/ghpc ifeq ($(shell id -u), 0) install: - $(info ***** installing ghpc in /usr/local/bin ***************) - install ./ghpc /usr/local/bin + $(info ***** installing gcluster in /usr/local/bin ***************) + install ./gcluster /usr/local/bin + ln -sf /usr/local/bin/gcluster /usr/local/bin/ghpc else install: install-user @@ -70,7 +75,7 @@ install-dev-deps: warn-terraform-version warn-packer-version check-pre-commit ch test-engine: warn-go-missing $(info **************** vetting go code **********************) go vet $(ENG) - $(info **************** running ghpc unit tests **************) + $(info **************** running gcluster unit tests **************) go test -cover $(ENG) 2>&1 | perl tools/enforce_coverage.pl ifeq (, $(shell which pre-commit)) @@ -148,11 +153,11 @@ else warn-terraform-version: endif -validate_configs: ghpc +validate_configs: gcluster $(info *********** running basic integration tests ***********) tools/validate_configs/validate_configs.sh -validate_golden_copy: ghpc +validate_golden_copy: gcluster $(info *********** running "Golden copy" tests ***********) tools/validate_configs/golden_copies/validate.sh diff --git a/README.md b/README.md index 78ac92fbc0..82b9a50fb1 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,21 @@ -# Google HPC-Toolkit +# Google Cluster Toolkit (formally HPC Toolkit) ## Description -HPC Toolkit is an open-source software offered by Google Cloud which makes it -easy for customers to deploy HPC environments on Google Cloud. +Cluster Toolkit is an open-source software offered by Google Cloud which makes it +easy for customers to deploy AI/ML and HPC environments on Google Cloud. -HPC Toolkit allows customers to deploy turnkey HPC environments (compute, +Cluster Toolkit allows customers to deploy turnkey AI/ML and HPC environments (compute, networking, storage, etc.) following Google Cloud best-practices, in a repeatable -manner. The HPC Toolkit is designed to be highly customizable and extensible, -and intends to address the HPC deployment needs of a broad range of customers. +manner. The Cluster Toolkit is designed to be highly customizable and extensible, +and intends to address the AI/ML and HPC deployment needs of a broad range of customers. ## Detailed documentation and examples The Toolkit comes with a suite of [tutorials], [examples], and full -documentation for a suite of [modules] that have been designed for HPC use cases. +documentation for a suite of [modules] that have been designed for AI/ML and HPC use cases. More information can be found on the -[Google Cloud Docs](https://cloud.google.com/hpc-toolkit/docs/overview). +[Google Cloud Docs](https://cloud.google.com/cluster-toolkit/docs/overview). [tutorials]: docs/tutorials/README.md [examples]: examples/README.md @@ -24,29 +24,29 @@ More information can be found on the ## Quickstart Running through the -[quickstart tutorial](https://cloud.google.com/hpc-toolkit/docs/quickstarts/slurm-cluster) -is the recommended path to get started with the HPC Toolkit. +[quickstart tutorial](https://cloud.google.com/cluster-toolkit/docs/quickstarts/slurm-cluster) +is the recommended path to get started with the Cluster Toolkit. --- If a self directed path is preferred, you can use the following commands to -build the `ghpc` binary: +build the `gcluster` binary: ```shell git clone https://github.com/GoogleCloudPlatform/hpc-toolkit cd hpc-toolkit make -./ghpc --version -./ghpc --help +./gcluster --version +./gcluster --help ``` > **_NOTE:_** You may need to [install dependencies](#dependencies) first. -## HPC Toolkit Components +## Cluster Toolkit Components -Learn about the components that make up the HPC Toolkit and more on how it works +Learn about the components that make up the Cluster Toolkit and more on how it works on the -[Google Cloud Docs Product Overview](https://cloud.google.com/hpc-toolkit/docs/overview#components). +[Google Cloud Docs Product Overview](https://cloud.google.com/cluster-toolkit/docs/overview#components). ## GCP Credentials @@ -105,7 +105,7 @@ minutes. Please consider it only for blueprints that are quickly deployed. ### Standard Images -The HPC Toolkit officially supports the following VM images: +The Cluster Toolkit officially supports the following VM images: * HPC CentOS 7 * HPC Rocky Linux 8 @@ -119,37 +119,37 @@ For more information on these and other images, see > **_Warning:_** Slurm Terraform modules cannot be directly used on the standard OS images. They must be used in combination with images built for the versioned release of the Terraform module. -The HPC Toolkit provides modules and examples for implementing pre-built and custom Slurm VM images, see [Slurm on GCP](docs/vm-images.md#slurm-on-gcp) +The Cluster Toolkit provides modules and examples for implementing pre-built and custom Slurm VM images, see [Slurm on GCP](docs/vm-images.md#slurm-on-gcp) ## Blueprint Validation The Toolkit contains "validator" functions that perform basic tests of the -blueprint to ensure that deployment variables are valid and that the HPC +blueprint to ensure that deployment variables are valid and that the AI/ML and HPC environment can be provisioned in your Google Cloud project. Further information can be found in [dedicated documentation](docs/blueprint-validation.md). ## Enable GCP APIs In a new GCP project there are several APIs that must be enabled to deploy your -HPC cluster. These will be caught when you perform `terraform apply` but you can +cluster. These will be caught when you perform `terraform apply` but you can save time by enabling them upfront. See -[Google Cloud Docs](https://cloud.google.com/hpc-toolkit/docs/setup/configure-environment#enable-apis) +[Google Cloud Docs](https://cloud.google.com/cluster-toolkit/docs/setup/configure-environment#enable-apis) for instructions. ## GCP Quotas -You may need to request additional quota to be able to deploy and use your HPC +You may need to request additional quota to be able to deploy and use your cluster. See -[Google Cloud Docs](https://cloud.google.com/hpc-toolkit/docs/setup/hpc-blueprint#request-quota) +[Google Cloud Docs](https://cloud.google.com/cluster-toolkit/docs/setup/hpc-blueprint#request-quota) for more information. ## Billing Reports -You can view your billing reports for your HPC cluster on the +You can view your billing reports for your cluster on the [Cloud Billing Reports](https://cloud.google.com/billing/docs/how-to/reports) page. ​​To view the Cloud Billing reports for your Cloud Billing account, including viewing the cost information for all of the Cloud projects that are @@ -279,7 +279,7 @@ hpc-slurm/ ## Dependencies See -[Cloud Docs on Installing Dependencies](https://cloud.google.com/hpc-toolkit/docs/setup/install-dependencies). +[Cloud Docs on Installing Dependencies](https://cloud.google.com/cluster-toolkit/docs/setup/install-dependencies). ### Notes on Packer @@ -303,12 +303,12 @@ applied at boot-time. ## Development The following setup is in addition to the [dependencies](#dependencies) needed -to build and run HPC-Toolkit. +to build and run Cluster-Toolkit. Please use the `pre-commit` hooks [configured](./.pre-commit-config.yaml) in this repository to ensure that all changes are validated, tested and properly documented before pushing code changes. The pre-commits configured -in the HPC Toolkit have a set of dependencies that need to be installed before +in the Cluster Toolkit have a set of dependencies that need to be installed before successfully passing. Follow these steps to install and setup pre-commit in your cloned repository: diff --git a/cmd/README.md b/cmd/README.md index 2127a14735..f6c3a48b03 100644 --- a/cmd/README.md +++ b/cmd/README.md @@ -1,54 +1,54 @@ -# HPC Toolkit Commands +# Cluster Toolkit (formally HPC Toolkit) Commands -## ghpc +## gcluster -`ghpc` is the tool used by Cloud HPC Toolkit to create deployments of HPC +`gcluster` is the tool used by Cluster Toolkit to create deployments of AI/ML and HPC clusters, also referred to as the gHPC Engine. -### Usage - ghpc +### Usage - gcluster ```bash -ghpc [FLAGS] -ghpc [SUBCOMMAND] +gcluster [FLAGS] +gcluster [SUBCOMMAND] ``` -### Subcommands - ghpc +### Subcommands - gcluster -* [`deploy`](#ghpc-deploy): Deploy an HPC cluster on Google Cloud -* [`create`](#ghpc-create): Create a new deployment -* [`expand`](#ghpc-expand): Expand the blueprint without creating a new deployment -* [`completion`](#ghpc-completion): Generate completion script -* [`help`](#ghpc-help): Display help information for any command +* [`deploy`](#gcluster-deploy): Deploy an AI/ML or HPC cluster on Google Cloud +* [`create`](#gcluster-create): Create a new deployment +* [`expand`](#gcluster-expand): Expand the blueprint without creating a new deployment +* [`completion`](#gcluster-completion): Generate completion script +* [`help`](#gcluster-help): Display help information for any command -### Flags - ghpc +### Flags - gcluster -* `-h, --help`: displays detailed help for the ghpc command. -* `-v, --version`: displays the version of ghpc being used. +* `-h, --help`: displays detailed help for the gcluster command. +* `-v, --version`: displays the version of gcluster being used. -### Example - ghpc +### Example - gcluster ```bash -ghpc --version +gcluster --version ``` -## ghpc deploy +## gcluster deploy -`ghpc deploy` deploys an HPC cluster on Google Cloud using the deployment directory created by `ghpc create` or creates one from supplied blueprint file. +`gcluster deploy` deploys a cluster on Google Cloud using the deployment directory created by `gcluster create` or creates one from supplied blueprint file. ### Usage - deploy ```bash -ghpc deploy ( | ) [flags] +gcluster deploy ( | ) [flags] ``` -## ghpc create +## gcluster create -`ghpc create` creates a deployment directory. This deployment directory is used to deploy an HPC cluster on Google Cloud. +`gcluster create` creates a deployment directory. This deployment directory is used to deploy a cluster on Google Cloud. ### Usage - create ```sh -ghpc create BLUEPRINT_FILE [FLAGS] +gcluster create BLUEPRINT_FILE [FLAGS] ``` ### Positional arguments - create @@ -59,7 +59,7 @@ ghpc create BLUEPRINT_FILE [FLAGS] * `--backend-config strings`: Comma-separated list of name=value variables to set Terraform backend configuration. Can be used multiple times. * `-h, --help`: display detailed help for the create command. -* `-o, --out string`: sets the output directory where the HPC deployment directory will be created. +* `-o, --out string`: sets the output directory where the AI/ML or HPC deployment directory will be created. * `-w, --overwrite-deployment`: If specified, an existing deployment directory is overwritten by the new deployment. * Terraform state IS preserved. @@ -85,29 +85,29 @@ For example to create a deployment folder using a blueprint named `my-blueprint` run the following command: ```bash -ghpc create my-blueprint +gcluster create my-blueprint ``` -## ghpc expand +## gcluster expand -`ghpc expand` takes as input a blueprint file and expands all the fields +`gcluster expand` takes as input a blueprint file and expands all the fields necessary to create a deployment without actually creating the deployment directory. It outputs an expanded blueprint, which can be used for debugging -purposes and can be used as input to `ghpc create`. +purposes and can be used as input to `gcluster create`. -For detailed usage information, run `ghpc help create`. +For detailed usage information, run `gcluster help create`. -## ghpc completion -Generates a script that enables command completion for `ghpc` for a given shell. +## gcluster completion +Generates a script that enables command completion for `gcluster` for a given shell. -For detailed usage information, run `ghpc help completion` +For detailed usage information, run `gcluster help completion` -## ghpc help -`ghpc help` prints the usage information for `ghpc` and subcommands of `ghpc`. +## gcluster help +`gcluster help` prints the usage information for `gcluster` and subcommands of `gcluster`. -To generate usage details for `ghpc`, run `ghpc help`. To generate usage +To generate usage details for `gcluster`, run `gcluster help`. To generate usage details for a specific command, for example `expand`, run the following command: ```bash -ghpc help expand +gcluster help expand ``` diff --git a/cmd/create.go b/cmd/create.go index 82ff5f6a12..c20fc5f121 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package cmd defines command line utilities for ghpc +// Package cmd defines command line utilities for gcluster package cmd import ( diff --git a/cmd/deploy.go b/cmd/deploy.go index 74e6210f2a..07e630eb5e 100644 --- a/cmd/deploy.go +++ b/cmd/deploy.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package cmd defines command line utilities for ghpc +// Package cmd defines command line utilities for gcluster package cmd import ( diff --git a/cmd/destroy.go b/cmd/destroy.go index bad0a14a16..b8794c3034 100644 --- a/cmd/destroy.go +++ b/cmd/destroy.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package cmd defines command line utilities for ghpc +// Package cmd defines command line utilities for gcluster package cmd import ( diff --git a/cmd/expand.go b/cmd/expand.go index 974ae86aac..17de57a1b2 100644 --- a/cmd/expand.go +++ b/cmd/expand.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package cmd defines command line utilities for ghpc +// Package cmd defines command line utilities for gcluster package cmd import ( diff --git a/cmd/export.go b/cmd/export.go index 2ebdd2cf13..f4bb1adb4a 100644 --- a/cmd/export.go +++ b/cmd/export.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package cmd defines command line utilities for ghpc +// Package cmd defines command line utilities for gcluster package cmd import ( diff --git a/cmd/import.go b/cmd/import.go index 9ef333422d..3e12fa47ec 100644 --- a/cmd/import.go +++ b/cmd/import.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package cmd defines command line utilities for ghpc +// Package cmd defines command line utilities for gcluster package cmd import ( diff --git a/cmd/root.go b/cmd/root.go index 34318e1adb..8d90f1cb6d 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package cmd defines command line utilities for ghpc +// Package cmd defines command line utilities for gcluster package cmd import ( @@ -43,7 +43,7 @@ var ( var ( annotation = make(map[string]string) rootCmd = &cobra.Command{ - Use: "ghpc", + Use: "gcluster", Short: "A blueprint and deployment engine for HPC clusters in GCP.", Long: `gHPC provides a flexible and simple to use interface to accelerate HPC deployments on the Google Cloud Platform.`, @@ -52,7 +52,7 @@ HPC deployments on the Google Cloud Platform.`, logging.Fatal("cmd.Help function failed: %s", err) } }, - Version: "v1.36.1", + Version: "v1.37.0", Annotations: annotation, } ) @@ -68,7 +68,7 @@ func init() { func Execute() error { mismatch, branch, hash, dir := checkGitHashMismatch() if mismatch { - logging.Error("WARNING: ghpc binary was built from a different commit (%s/%s) than the current git branch in %s (%s/%s). You can rebuild the binary by running 'make'", + logging.Error("WARNING: gcluster binary was built from a different commit (%s/%s) than the current git branch in %s (%s/%s). You can rebuild the binary by running 'make'", GitBranch, GitCommitHash[0:7], dir, branch, hash[0:7]) } @@ -82,7 +82,7 @@ func Execute() error { annotation["version"] = GitTagVersion annotation["branch"] = GitBranch annotation["commitInfo"] = GitCommitInfo - rootCmd.SetVersionTemplate(`ghpc version {{index .Annotations "version"}} + rootCmd.SetVersionTemplate(`gcluster version {{index .Annotations "version"}} Built from '{{index .Annotations "branch"}}' branch. Commit info: {{index .Annotations "commitInfo"}} `) @@ -92,7 +92,7 @@ Commit info: {{index .Annotations "commitInfo"}} } // checkGitHashMismatch will compare the hash of the git repository vs the git -// hash the ghpc binary was compiled against, if the git repository if found and +// hash the gcluster binary was compiled against, if the git repository if found and // a mismatch is identified, then the function returns a positive bool along with // the branch details, and false for all other cases. func checkGitHashMismatch() (mismatch bool, branch, hash, dir string) { @@ -125,7 +125,7 @@ func checkGitHashMismatch() (mismatch bool, branch, hash, dir string) { // hpcToolkitRepo will find the path of the directory containing the hpc-toolkit // starting with the working directory and evaluating the parent directories until -// the toolkit repository is found. If the HPC Toolkit repository is not found by +// the toolkit repository is found. If the Cluster Toolkit repository is not found by // traversing the path, then the executable directory is checked. func hpcToolkitRepo() (repo *git.Repository, dir string, err error) { // first look in the working directory and it's parents until a git repo is @@ -170,11 +170,11 @@ func hpcToolkitRepo() (repo *git.Repository, dir string, err error) { if isHpcToolkitRepo(*repo) { return repo, dir, nil } - return nil, "", errors.New("ghpc executable found in a git repo other than the hpc-toolkit git repo") + return nil, "", errors.New("gcluster executable found in a git repo other than the hpc-toolkit git repo") } // isHpcToolkitRepo will verify that the found git repository has a commit with -// the known hash of the initial commit of the HPC Toolkit repository +// the known hash of the initial commit of the Cluster Toolkit repository func isHpcToolkitRepo(r git.Repository) bool { h := plumbing.NewHash(GitInitialHash) _, err := r.CommitObject(h) @@ -183,16 +183,16 @@ func isHpcToolkitRepo(r git.Repository) bool { // Best effort to find the path of the executable // Possible return values: -// * "ghpc" if the executable is in the PATH +// * "gcluster" if the executable is in the PATH // AND resolved path matches Args[0]; // * Args[0]. -// If error occurs returns "ghpc" +// If error occurs returns "gcluster" func execPath() string { - const nice string = "ghpc" + const nice string = "gcluster" args0 := os.Args[0] if args0 == nice { // trivial case // but it's important to terminate here to prevent - // "simplification" of `ghpc` to `./ghpc` + // "simplification" of `gcluster` to `./gcluster` return nice } // Code below assumes that `args0` contains path to file, not a @@ -226,7 +226,7 @@ func execPath() string { } } - found, err := exec.LookPath("ghpc") + found, err := exec.LookPath("gcluster") if err != nil { // not found in PATH return args0 } diff --git a/community/examples/AMD/README.md b/community/examples/AMD/README.md index e9658455ce..77e54044b0 100644 --- a/community/examples/AMD/README.md +++ b/community/examples/AMD/README.md @@ -1,4 +1,4 @@ -# AMD solutions for the HPC Toolkit +# AMD solutions for the Cluster Toolkit (formally HPC Toolkit) > [!NOTE] > This document uses Slurm-GCP v6. If you want to use Slurm-GCP v5 version you diff --git a/community/examples/flux-framework/README.md b/community/examples/flux-framework/README.md index 5039b67fe5..c1e2d8271d 100644 --- a/community/examples/flux-framework/README.md +++ b/community/examples/flux-framework/README.md @@ -9,7 +9,7 @@ The cluster includes - A login node - Four compute nodes each of which is an instance of the c2-standard-16 machine type -> **_NOTE:_** prior to running this HPC Toolkit example the [Flux Framework GCP Images](https://github.com/GoogleCloudPlatform/scientific-computing-examples/tree/main/fluxfw-gcp/img#flux-framework-gcp-images) +> **_NOTE:_** prior to running this Cluster Toolkit example the [Flux Framework GCP Images](https://github.com/GoogleCloudPlatform/scientific-computing-examples/tree/main/fluxfw-gcp/img#flux-framework-gcp-images) > must be created in your project. ### Initial Setup for flux-framework Cluster diff --git a/community/examples/intel/README.md b/community/examples/intel/README.md index 2fcf1137cd..8d2791652e 100644 --- a/community/examples/intel/README.md +++ b/community/examples/intel/README.md @@ -1,4 +1,4 @@ -# Intel Solutions for the HPC Toolkit +# Intel Solutions for the Cluster Toolkit (formally HPC Toolkit) > **_NOTE:_** The [hpc-slurm-daos.yaml](hpc-slurm-daos.yaml) will not be compatible > for newer version of slurm-gcp v6. @@ -6,7 +6,7 @@ -- [Intel Solutions for the HPC Toolkit](#intel-solutions-for-the-hpc-toolkit) +- [Intel Solutions for the Cluster Toolkit](#intel-solutions-for-the-cluster-toolkit) - [DAOS Cluster](#daos-cluster) - [Initial Setup for DAOS Cluster](#initial-setup-for-daos-cluster) - [Deploy the DAOS Cluster](#deploy-the-daos-cluster) diff --git a/community/examples/omnia-cluster.yaml b/community/examples/omnia-cluster.yaml index 171871f60b..a54a7d376e 100644 --- a/community/examples/omnia-cluster.yaml +++ b/community/examples/omnia-cluster.yaml @@ -14,7 +14,7 @@ --- -# WARNING: this example has been deprecated as of v1.28.0 of the HPC Toolkit +# WARNING: this example has been deprecated as of v1.28.0 of the Cluster Toolkit blueprint_name: omnia-cluster diff --git a/community/front-end/ofe/README.md b/community/front-end/ofe/README.md index dd2b0e6ef2..6d9a028122 100644 --- a/community/front-end/ofe/README.md +++ b/community/front-end/ofe/README.md @@ -1,7 +1,7 @@ -# Google HPC Toolkit Open Front End +# Google Cluster Toolkit Open Front End This is a web front-end for HPC applications on GCP. It delegates to the Cloud -HPC Toolkit to create cloud resources for HPC clusters. Through the convenience +Cluster Toolkit to create cloud resources for HPC clusters. Through the convenience of a web interface, system administrators can manage the life cycles of HPC clusters and install applications; users can prepare & submit HPC jobs and run benchmarks. This web application is built upon the Django framework. diff --git a/community/front-end/ofe/cli/ghpcfe.py b/community/front-end/ofe/cli/ghpcfe.py index 21d89600ab..597927cb37 100644 --- a/community/front-end/ofe/cli/ghpcfe.py +++ b/community/front-end/ofe/cli/ghpcfe.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""The Command Line Interface to access the HPC Toolkit FrontEnd""" +"""The Command Line Interface to access the Cluster Toolkit FrontEnd""" import click import requests @@ -65,7 +65,7 @@ def config(): """ print("Configuration file will be written at $HOME/.ghpcfe/config") print() - server = input("Enter the URL of the HPC Toolkit FrontEnd website: ") + server = input("Enter the URL of the Cluster Toolkit FrontEnd website: ") try: requests.get(server, timeout=10) # pylint: disable=unused-variable diff --git a/community/front-end/ofe/deploy.sh b/community/front-end/ofe/deploy.sh index c1414753d0..7b9fd09c82 100755 --- a/community/front-end/ofe/deploy.sh +++ b/community/front-end/ofe/deploy.sh @@ -15,7 +15,7 @@ ################################################################################ # # -# HPC Toolkit FrontEnd deployment script # +# Cluster Toolkit FrontEnd deployment script # # # ################################################################################ # @@ -332,7 +332,7 @@ check_account() { echo "" echo "Warning: account is not Owner or Editor of project" echo " Please ensure account has correct permissions before proceeding." - echo " See HPC Toolkit FrontEnd Administrator's Guide for details." + echo " See Cluster Toolkit FrontEnd Administrator's Guide for details." echo "" case $(ask " Proceed [y/N] ") in [Yy]*) ;; @@ -344,7 +344,7 @@ check_account() { fi # TODO: perform more extensive check the account has all required roles. - # - these could change over, depending back-end GCP / HPC Toolkit + # - these could change over, depending back-end GCP / Cluster Toolkit # requirements, so would require maintaining. } @@ -979,7 +979,7 @@ cat <
-This document is for administrators of the HPC Toolkit FrontEnd (TKFE). An +This document is for administrators of the Cluster Toolkit FrontEnd (TKFE). An administrator can deploy the TKFE portal, manage the lifecycle of HPC clusters, set up networking and storage resources that support clusters, install applications. and manage user access. Normal HPC users should refer to the @@ -23,7 +23,7 @@ administrators, additional Django superusers can be created from the Admin site within TKFE, once it is deployed and running. The TFKE web application server uses the -[Cloud HPC Toolkit](https://github.com/GoogleCloudPlatform/hpc-toolkit) to +[Cluster Toolkit](https://github.com/GoogleCloudPlatform/hpc-toolkit) to provision resources for networks, filesystems and clusters, using a service account that has its credentials registered to TKFE. The service account is used for access management and billing. @@ -308,7 +308,7 @@ external filesystem located elsewhere on GCP. ## Cluster Management HPC clusters can be created after setting up the hosting VPC and any -additional filesystems. The HPC Toolkit FrontEnd can manage the whole life +additional filesystems. The Cluster Toolkit FrontEnd can manage the whole life cycles of clusters. Click the *Clusters* item in the main menu to list all existing clusters. @@ -496,7 +496,7 @@ Cloud resource deployment log files (from Terraform) are typically shown via the FrontEnd web site. If those logs are not being shown, they can be found on the service machine under `/opt/gcluster/hpc-toolkit/frontend/(clusters|fs|vpc)/...`. -HPC Toolkit log files will also be found in those directories. The Terraform +Cluster Toolkit log files will also be found in those directories. The Terraform log files and status files will be down a few directories, based off of the Cluster Number, Deployment ID, and Terraform directory. diff --git a/community/front-end/ofe/docs/developer_guide.md b/community/front-end/ofe/docs/developer_guide.md index 5690740236..63abc18a9a 100644 --- a/community/front-end/ofe/docs/developer_guide.md +++ b/community/front-end/ofe/docs/developer_guide.md @@ -1,15 +1,15 @@ -## HPC Toolkit FrontEnd - Developer Guide +## Cluster Toolkit FrontEnd - Developer Guide ### Architecture design -The HPC Toolkit FrontEnd is a web application integrating several front-end and +The Cluster Toolkit FrontEnd is a web application integrating several front-end and back-end technologies. *Django*, a high-level Python-based web framework, forms the foundation of the web application. The back-end business logics can mostly be delegated to *Terraform* to create GCP cloud infrastructure required by the -HPC clusters. With HPC Toolkit, there is no need to define infrastructure +HPC clusters. With Cluster Toolkit, there is no need to define infrastructure configurations from scratch. Rather, a high-level description of the clusters are provided for it to generate Terraform configurations. @@ -102,7 +102,7 @@ Here are some notes from a developer's perspective: machine and clusters. - Terraform provisions a compute engine virtual machine to be the service machine. A startup script is then executed on the service machine to set up - the software environment for HPC Toolkit and Django, and start the web and + the software environment for Cluster Toolkit and Django, and start the web and application servers. ### Access the service machine @@ -125,8 +125,8 @@ the FrontEnd files. The home directory of the *gcluster* account is at `/opt/gcluster`. For a new deployment, the following four sub-directories are created: -- `go` - the development environment of the Go programming language, required to build Google HPC Toolkit -- `hpc-toolkit` - a clone of the Google HPC Toolkit project. The `ghpc` binary +- `go` - the development environment of the Go programming language, required to build Google Cluster Toolkit +- `cluster-toolkit` - a clone of the Google Cluster Toolkit project. The `ghpc` binary should have already been built during the deployment. The `frontend` sub-directory contains the Django-based web application for the FrontEnd and other supporting files. @@ -241,7 +241,7 @@ including network components, storage components, compute instance #### Code Layout -The top few layers of the directory hierarchy of the HPC Toolkit FrontEnd +The top few layers of the directory hierarchy of the Cluster Toolkit FrontEnd define the major components: | dir | description | @@ -265,12 +265,12 @@ define the major components: These directories hold all the support infrastructure files which are used to create, provision, and initialize the cloud resources which may be created via -the HPC Toolkit FrontEnd. The VPC Terraform and Workbench Terraform files may -eventually migrate into HPC Toolkit YAML files. +the Cluster Toolkit FrontEnd. The VPC Terraform and Workbench Terraform files may +eventually migrate into Cluster Toolkit YAML files. The files under `gcs_bucket` contain the more in-depth startup scripts and configuration information for the FrontEnd webserver as well as for new -clusters. During the initial deployment of the HPC Toolkit FrontEnd, this +clusters. During the initial deployment of the Cluster Toolkit FrontEnd, this directory is copied to a new Google Cloud Storage bucket which is then used for storing these startup codes as well as additional cluster information, such as log files. When clusters are created in Google Cloud, the initial bootstrap @@ -300,11 +300,11 @@ here as well, under | `.../website/` | Django core website configuration (including `settings.py`) | | `.../manage.py` | Core Django application management script | -As with many Django-based web applications, the HPC Toolkit FrontEnd Django +As with many Django-based web applications, the Cluster Toolkit FrontEnd Django application is broken across multiple directories, each responsible for some critical subcomponent of the overall application, implementing the MVT (model, view, template) architecture. The `ghpcfe/` directory hosts the pieces -specific to the HPC Toolkit FrontEnd, whereas the other directories are more +specific to the Cluster Toolkit FrontEnd, whereas the other directories are more Django-focused. Under `ghpcfe/`, there are a variety of directories as show in the above @@ -338,7 +338,7 @@ contents to them. The workbench process is fairly straight-forward. Gather configuration values from the FrontEnd and pass them to Terraform to control the creation of the -workbench instance. This is done directly via Terraform as the HPC Toolkit does +workbench instance. This is done directly via Terraform as the Cluster Toolkit does not currently support Vertex AI Workbenches. ### Infrastructure files diff --git a/community/front-end/ofe/docs/user_guide.md b/community/front-end/ofe/docs/user_guide.md index d22ed98858..fc535ab821 100644 --- a/community/front-end/ofe/docs/user_guide.md +++ b/community/front-end/ofe/docs/user_guide.md @@ -1,9 +1,9 @@ -# HPC Toolkit FrontEnd - User Guide +# Cluster Toolkit FrontEnd - User Guide -This document is for standard users of the HPC Toolkit FrontEnd. Standard users +This document is for standard users of the Cluster Toolkit FrontEnd. Standard users can access HPC clusters and installed applications as set up by the administrators. They can prepare, submit and run jobs on the cluster through the convenience of the web interface. @@ -15,7 +15,7 @@ guidance on how to provision and manage cloud resources for HPC clusters. An administrator should have arranged access to the system for a standard user: -- A URL should be provided on which an instance of the HPC Toolkit FrontEnd is +- A URL should be provided on which an instance of the Cluster Toolkit FrontEnd is deployed. - The Google identity of the user should be whitelisted to access the instance. - The user should be set as authorised users on existing HPC clusters. diff --git a/community/front-end/ofe/infrastructure_files/gcs_bucket/clusters/ansible_setup/roles/c2_daemon/files/ghpcfe_c2daemon.py b/community/front-end/ofe/infrastructure_files/gcs_bucket/clusters/ansible_setup/roles/c2_daemon/files/ghpcfe_c2daemon.py index f01dc8a0ca..2a4a144e28 100644 --- a/community/front-end/ofe/infrastructure_files/gcs_bucket/clusters/ansible_setup/roles/c2_daemon/files/ghpcfe_c2daemon.py +++ b/community/front-end/ofe/infrastructure_files/gcs_bucket/clusters/ansible_setup/roles/c2_daemon/files/ghpcfe_c2daemon.py @@ -14,7 +14,7 @@ # limitations under the License. -"""Cluster management daemon for the Google HPC Toolkit Frontend""" +"""Cluster management daemon for the Google Cluster Toolkit Frontend""" import grp import json diff --git a/community/front-end/ofe/teardown.sh b/community/front-end/ofe/teardown.sh index 89c9390aa0..2f9a5f833e 100755 --- a/community/front-end/ofe/teardown.sh +++ b/community/front-end/ofe/teardown.sh @@ -15,7 +15,7 @@ ################################################################################ # # -# Google HPC Toolkit FrontEnd teardown script # +# Google Cluster Toolkit FrontEnd teardown script # # # ################################################################################ @@ -86,7 +86,7 @@ cat <<'HEADER' -------------------------------------------------------------------------------- - Google HPC Toolkit Open FrontEnd + Google Cluster Toolkit Open FrontEnd -------------------------------------------------------------------------------- diff --git a/community/front-end/ofe/website/ghpcfe/cluster_manager/image.py b/community/front-end/ofe/website/ghpcfe/cluster_manager/image.py index 83d55748c8..363029db9b 100644 --- a/community/front-end/ofe/website/ghpcfe/cluster_manager/image.py +++ b/community/front-end/ofe/website/ghpcfe/cluster_manager/image.py @@ -50,19 +50,19 @@ def prepare(self): Prepare the image creation process by following these steps: 1. Create the necessary directory structure for the image. - 2. Generate a HPC Toolkit blueprint to build the image. - 3. Run the HPC Toolkit (`ghpc`) to create the image based on the blueprint. + 2. Generate a Cluster Toolkit blueprint to build the image. + 3. Run the Cluster Toolkit (`ghpc`) to create the image based on the blueprint. 4. Set up the builder environment on Google Cloud Platform (GCP) using Terraform. 5. Create the image on GCP using Packer. 6. Destroy the builder environment after the image creation is complete. This method handles the entire image creation process, from setting up the necessary - directories and configuration files to executing HPC Toolkit and Packer to build + directories and configuration files to executing Cluster Toolkit and Packer to build and finalize the image. If any step encounters an error, it logs the issue and marks the image's status as "error" (status code 'e'). Note: - - This method assumes that the necessary tools (HPC Toolkit, Terraform, and Packer) + - This method assumes that the necessary tools (Cluster Toolkit, Terraform, and Packer) are properly installed and configured on the system. - The credentials file required for GCP authentication is created during the image directory setup. @@ -103,7 +103,7 @@ def _get_credentials_file(self): def _create_blueprint(self): """ - Create HPC Toolkit blueprint that will build the image. + Create Cluster Toolkit blueprint that will build the image. """ try: blueprint_file = self.image_dir / "image.yaml" diff --git a/community/front-end/ofe/website/ghpcfe/templates/base_generic.html b/community/front-end/ofe/website/ghpcfe/templates/base_generic.html index 088c981acb..6f34e55fc8 100644 --- a/community/front-end/ofe/website/ghpcfe/templates/base_generic.html +++ b/community/front-end/ofe/website/ghpcfe/templates/base_generic.html @@ -17,7 +17,7 @@ - {% block title %}HPC Toolkit FrontEnd{% endblock %} + {% block title %}Cluster Toolkit FrontEnd{% endblock %} {% block meta %}{% endblock %} @@ -48,7 +48,7 @@