From b20fb3d575646b214e3d26a20f439a9671aafa21 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 28 Jun 2023 13:09:10 -0600 Subject: [PATCH 01/33] add some example code and env --- .gitignore | 4 +++ .pre-commit-config.yaml | 60 +++++++++++++++++++++++++++++++++++++++++ code/example.py | 23 ++++++++++++++++ environment.yml | 7 +++++ 4 files changed, 94 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100644 code/example.py create mode 100644 environment.yml diff --git a/.gitignore b/.gitignore index 68bc17f..5515200 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,7 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# data ignores + +*.csv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..67d5f69 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,60 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +default_language_version: + python: python3.10 +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-toml + - repo: https://github.com/codespell-project/codespell + rev: v2.2.5 + hooks: + - id: codespell + exclude: > + (?x)^( + .*\.lock|.*\.csv|.*\.ipynb + )$ + additional_dependencies: + - tomli + - repo: https://github.com/executablebooks/mdformat + rev: 0.7.16 + hooks: + - id: mdformat + - repo: https://github.com/nbQA-dev/nbQA + rev: 1.7.0 + hooks: + - id: nbqa-black + - id: nbqa-isort + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + - repo: https://github.com/asottile/blacken-docs + rev: 1.14.0 + hooks: + - id: blacken-docs + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.4.0 + hooks: + - id: mypy + - repo: https://github.com/PyCQA/pylint + rev: v3.0.0a6 + hooks: + - id: pylint + name: pylint + entry: pylint + language: python + types: [python] + args: ["--disable=X"] + additional_dependencies: + - "numpy" + - "pandas" diff --git a/code/example.py b/code/example.py new file mode 100644 index 0000000..524eb65 --- /dev/null +++ b/code/example.py @@ -0,0 +1,23 @@ +""" +An example Python file which creates random data and exports it to a location specified +by way of a +""" +import sys + +import numpy as np +import pandas as pd + +# take an input from sys argsv +output_file = sys.argv[1] + +# setup some rows +nrows = 10000 +ncols = 500 + +# form a dataframe using randomized data +df = pd.DataFrame( + np.random.rand(nrows, ncols), columns=[f"col_{num}" for num in range(0, ncols)] +) + +# export the data to parquet +df.to_csv(output_file) diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..cb70dd8 --- /dev/null +++ b/environment.yml @@ -0,0 +1,7 @@ +name: example_env +channels: + - defaults +dependencies: + - python=3.10 + - numpy + - pandas From 7b54774ed2df2f19f34a5a257014a802000be58b Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 28 Jun 2023 16:04:19 -0600 Subject: [PATCH 02/33] add alpine diags and descriptions, begin slurm sec --- README.md | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6c01187..9fab6f1 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,104 @@ -# example-hpc-alpine-python -Demonstrating the use of Python on Alpine, a High Performance Compute (HPC) cluster hosted by the University of Colorado Boulder's Research Computing. +# ā›°ļøšŸ Example HPC Alpine Python Project + +```mermaid +flowchart LR + subgraph repo["fa:fa-github This repo"] + direction LR + subgraph conda_env[" fa:fa-globe Anaconda environment  "] + python_code["fa:fa-file Python code"] + end + run_script["fa:fa-file Run script"] + end + subgraph alpine["fa:fa-server Alpine"] + alpine_terminal["fa:fa-terminal terminal"] + end + + alpine_terminal --> run_script + run_script --> python_code + +style conda_env fill:#CFFAFE,stroke:#155E75; +style repo fill:#ffffff,stroke:#444444; +style alpine fill:#ffffff,stroke:#444444; +style alpine_terminal fill:#D1FAE5,stroke:#444444; +``` + +This repo demonstrates the use of Python on [Alpine](https://curc.readthedocs.io/en/latest/clusters/alpine/index.html), a [High Performance Compute (HPC) cluster](https://en.wikipedia.org/wiki/High-performance_computing) hosted by the [University of Colorado Boulder's Research Computing](https://www.colorado.edu/rc/). +We use Python by way of [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) environment management to run code on Alpine. + +## Background + +### Why would I use Alpine? + +```mermaid +flowchart LR + subgraph alpine["fa:fa-server Alpine"] + direction TB + subgraph resources["   šŸ’Ŗ Compute Resources  "] + check_1["fa:fa-check"] + end + subgraph time["   šŸ•‘ Long-running Jobs  "] + check_2["fa:fa-check"] + end + subgraph collaborations["   šŸ‘„ Collaborations  "] + check_3["fa:fa-check"] + end + end + +style alpine fill:#ffffff,stroke:#444444; +style check_1 fill:#D1FAE5,stroke:#D1FAE5; +style check_2 fill:#D1FAE5,stroke:#D1FAE5; +style check_3 fill:#D1FAE5,stroke:#D1FAE5; +style alpine fill:#ffffff,stroke:#444444; +``` + +Alpine is a [High Performance Compute (HPC) cluster](https://en.wikipedia.org/wiki/High-performance_computing). +HPC environments provide shared computer hardware resources like [memory](https://en.wikipedia.org/wiki/Computer_memory), [CPU](https://en.wikipedia.org/wiki/Central_processing_unit), [GPU](https://en.wikipedia.org/wiki/Graphics_processing_unit) or others to run performance-intensive work. +Reasons for using Alpine might include: + +- __Compute resources:__ Leveraging otherwise cost-prohibitive amounts of memory, CPU, GPU, etc. for processing data. +- __Long-running jobs:__ Completing long-running processes which may take hours or days to complete. +- __Collaborations:__ Sharing a single implementation environment for reproducibility within a group (avoiding "works on my machine"). + +### How does Alpine work? + +```mermaid +flowchart LR + users["fa:fa-users Users"] + subgraph alpine["fa:fa-server Alpine"] + acompile["fa:fa-file acompile"] + slurm["fa:fa-calendar Slurm"] + subgraph compute_nodes["fa:fa-cogs Compute node(s)"] + hardware["fa:fa-cog Compute\nResources"] + modules["fa:fa-cube Software via\nmodules pkg"] + end + subgraph login_nodes["fa:fa-sign-in Login node(s)"] + acompile["fa:fa-file acompile"] + slurm_cmd["fa:fa-terminal Slurm cmd's"] + end + slurm["fa:fa-calendar Slurm"] + end + + users --> | preconfigured\nSlurm access| acompile --> slurm + users --> | direct access | slurm_cmd --> slurm + slurm --> |"schedules\n(shared) use of"| hardware + slurm --> | provides\naccess to| modules --> | which may\n leverage| hardware + +style alpine fill:#ffffff,stroke:#444444; +``` + +Alpine's compute resources are managed through compute nodes in a system called [Slurm](https://github.com/SchedMD/slurm). Slurm helps coordinate shared and configurable access to the compute resources. + +> ā„¹ļø __Wait, what are "nodes"?__ +> A simplified way to understand the architecture of Slurm on Alpine is through login and compute "nodes" (computers). +Login nodes act as a way to prepare and submit processes which will be completed on compute nodes. +Login nodes have limited resource access and are not recommended for running procedures. + +One can interact with Slurm on Alpine by use of [Slurm interfaces and directives](https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html). +A quick way of accessing Alpine resources is through the use of the `acompile` command, which references a script with common Slurm configurations. +One can also access Slurm directly through [various commands](https://slurm.schedmd.com/quickstart.html#commands) on Alpine. + +Many common software packages are available through the [Modules package](https://github.com/cea-hpc/modules) on Alpine ([UCB RC documentation: The Modules System](https://curc.readthedocs.io/en/latest/compute/modules.html)). + +### How does Slurm work? + +Using Alpine effectively involves knowing how to use Slurm. From 6f26541534a276fa9a09b04318d2bd9b84669dfa Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 29 Jun 2023 15:50:11 -0600 Subject: [PATCH 03/33] implementation diagram and init content; run scrpt --- README.md | 95 +++++++++++++++++++++++++++++++++++++++++++++++++-- run_script.sh | 20 +++++++++++ 2 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 run_script.sh diff --git a/README.md b/README.md index 9fab6f1..78a3114 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ flowchart LR alpine_terminal --> run_script run_script --> python_code -style conda_env fill:#CFFAFE,stroke:#155E75; +style conda_env fill:#FEF3C7,stroke:#D97706; style repo fill:#ffffff,stroke:#444444; style alpine fill:#ffffff,stroke:#444444; style alpine_terminal fill:#D1FAE5,stroke:#444444; @@ -25,6 +25,11 @@ style alpine_terminal fill:#D1FAE5,stroke:#444444; This repo demonstrates the use of Python on [Alpine](https://curc.readthedocs.io/en/latest/clusters/alpine/index.html), a [High Performance Compute (HPC) cluster](https://en.wikipedia.org/wiki/High-performance_computing) hosted by the [University of Colorado Boulder's Research Computing](https://www.colorado.edu/rc/). We use Python by way of [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) environment management to run code on Alpine. +## Table of Contents + +1. [__Backround:__](#background) here we cover the background of Alpine and related technologies. +1. [__Implementation:__](#implementation) in this section we use the contents of this repository on Alpine. + ## Background ### Why would I use Alpine? @@ -75,7 +80,6 @@ flowchart LR acompile["fa:fa-file acompile"] slurm_cmd["fa:fa-terminal Slurm cmd's"] end - slurm["fa:fa-calendar Slurm"] end users --> | preconfigured\nSlurm access| acompile --> slurm @@ -84,6 +88,7 @@ flowchart LR slurm --> | provides\naccess to| modules --> | which may\n leverage| hardware style alpine fill:#ffffff,stroke:#444444; +style slurm fill:#F0F9FF,stroke:#075985; ``` Alpine's compute resources are managed through compute nodes in a system called [Slurm](https://github.com/SchedMD/slurm). Slurm helps coordinate shared and configurable access to the compute resources. @@ -101,4 +106,88 @@ Many common software packages are available through the [Modules package](https: ### How does Slurm work? -Using Alpine effectively involves knowing how to use Slurm. +```mermaid +flowchart LR + subgraph alpine["fa:fa-server Alpine"] + direction LR + script["fa:fa-file Job script"] + subgraph slurm["fa:fa-calendar Slurm"] + queue["fa:fa-calendar-plus-o Queue"] + processing["fa:fa-gear Processing"] + completion["fa:fa-check Completion\n(or cancellation)"] + end + end + + script --> | Submit job\nto Slurm | queue + queue --> | Scheduled job\nis processed | processing + processing --> | Processing\ncompletes | completion + queue --> | Cancellation without\nprocessing| completion + +style alpine fill:#ffffff,stroke:#444444; +style slurm fill:#F0F9FF,stroke:#075985; +``` + +Using Alpine effectively involves knowing how to leverage Slurm. +A simplified way to understand how Slurm works is through the following sequence. +Please note that some steps and additional complexity are obscured for the purposes of providing a basis of understanding. + +1. __Create a job script:__ build a script which will configure and run procedures related to the work you seek to accomplish on the HPC cluster. +1. __Submit job to Slurm:__ ask Slurm to run a set of commands or procedures. +1. __Job queue:__ Slurm will queue the submitted job alongside others (recall that the HPC cluster is a shared resource), providing information about progress as time goes on. +1. __Job processing:__ Slurm will run the procedures in the job script as scheduled. +1. __Job completion or cancellation:__ submitted jobs eventually may reach completion or cancellation states with saved information inside Slurm regarding what happened. + +## Implementation + +```mermaid +flowchart LR + + subgraph alpine["fa:fa-server Alpine"] + direction LR + alpine_terminal1["(1. preparation)\nfa:fa-terminal terminal"] + alpine_terminal2["(2. implementation)\nfa:fa-terminal terminal"] + git["fa:fa-git clone or pull"] + subgraph development_and_sync ["fa:fa-truck Code delivery to Alpine"] + subgraph repo["fa:fa-github This repo"] + direction TB + run_script["fa:fa-file Run script"] + subgraph conda_env[" fa:fa-globe Anaconda environment  "] + python_code["fa:fa-file Python code"] + end + end + end + subgraph slurm_job["fa:fa-calendar Slurm processing"] + direction LR + queue["fa:fa-calendar-plus-o Queue"] + processing["fa:fa-gear Processing"] + completion["fa:fa-check Completion\n(or cancellation)"] + end + end + + + alpine_terminal1 --> git --> | bring repo\n contents to Alpine | repo + alpine_terminal2 --> |submit\nSlurm job| queue + queue --> processing + processing --> completion + python_code -.-> | run python code\nwithin conda env |processing + run_script --> |run\nscript file| alpine_terminal2 + + + +style conda_env fill:#FEF3C7,stroke:#D97706; +style repo fill:#ffffff,stroke:#444444; +style alpine fill:#ffffff,stroke:#444444; +style alpine_terminal1 fill:#D1FAE5,stroke:#444444; +style alpine_terminal2 fill:#D1FAE5,stroke:#444444; +``` + +This section will cover how Alpine may be used with this repository to run example Python code. +Generally, we'll cover this in two primary steps: [1. preparation](#1-preparation) and [2. implementation](#2-implementation). + +### 1. Preparation + +First we need to prepare our code within Alpine. +We do this to balance the fact that we may develop and source control code outside of Alpine and needing to periodically synchronize it with updates. +In the case of this example work, we assume git as an interface for Github as the source control host. + +### 2. Implementation diff --git a/run_script.sh b/run_script.sh new file mode 100644 index 0000000..7db7503 --- /dev/null +++ b/run_script.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# referenced with modifications from: +# https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html#full-example-job-script + +#SBATCH --partition=amilan +#SBATCH --job-name=example-job +#SBATCH --output=example-job.%j.out +#SBATCH --time=01:00:00 +#SBATCH --qos=normal +#SBATCH --nodes=1 +#SBATCH --ntasks=4 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=youridentikey@colorado.edu + +module purge +module load anaconda +conda activate example_env + +python code/example.py From 2d659f3e378f61e3abba24d8adf9d3f3e996dc43 Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 29 Jun 2023 16:39:55 -0600 Subject: [PATCH 04/33] step 0 access content --- README.md | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 78a3114..9860d3a 100644 --- a/README.md +++ b/README.md @@ -144,8 +144,8 @@ flowchart LR subgraph alpine["fa:fa-server Alpine"] direction LR - alpine_terminal1["(1. preparation)\nfa:fa-terminal terminal"] - alpine_terminal2["(2. implementation)\nfa:fa-terminal terminal"] + alpine_terminal1["(1. Prepare code)\nfa:fa-terminal terminal"] + alpine_terminal2["(2. Implement code)\nfa:fa-terminal terminal"] git["fa:fa-git clone or pull"] subgraph development_and_sync ["fa:fa-truck Code delivery to Alpine"] subgraph repo["fa:fa-github This repo"] @@ -182,12 +182,22 @@ style alpine_terminal2 fill:#D1FAE5,stroke:#444444; ``` This section will cover how Alpine may be used with this repository to run example Python code. -Generally, we'll cover this in two primary steps: [1. preparation](#1-preparation) and [2. implementation](#2-implementation). +Generally, we'll cover this in two primary steps:[0. Gain Alpine access](#0-gain-alpine-access), [1. preparation](#1-preparation) and [2. implementation](#2-implementation). -### 1. Preparation +### 0. Gain Alpine access -First we need to prepare our code within Alpine. +First you will need to gain access to Alpine. +This access is provided to members of the University of Colorado Anschutz through [RMACC](https://rmacc.org/) and is separate from other credentials which may be provided by default in your role. +Please see the following guide from the University of Colorado Boulder's Research Computing covering requesting access and generally how this works for members of the University of Colorado Anschutz. + +- __RMACC Access to Alpine:__ [https://curc.readthedocs.io/en/latest/access/rmacc.html](https://curc.readthedocs.io/en/latest/access/rmacc.html) + +### 1. Prepare code + +Next we need to prepare our code within Alpine. We do this to balance the fact that we may develop and source control code outside of Alpine and needing to periodically synchronize it with updates. In the case of this example work, we assume git as an interface for Github as the source control host. -### 2. Implementation +Below you'll find the general steps associated with this process. + +### 2. Implement code From d3b50fc735c455f24070458b6e8d391a4bda97b3 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 12:42:32 -0600 Subject: [PATCH 05/33] add data transfer + prepare data on alpine content --- README.md | 180 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 143 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 9860d3a..91bf285 100644 --- a/README.md +++ b/README.md @@ -4,26 +4,35 @@ flowchart LR subgraph repo["fa:fa-github This repo"] direction LR + run_script["fa:fa-file Run script"] subgraph conda_env[" fa:fa-globe Anaconda environment  "] python_code["fa:fa-file Python code"] end - run_script["fa:fa-file Run script"] + end subgraph alpine["fa:fa-server Alpine"] - alpine_terminal["fa:fa-terminal terminal"] + subgraph spacer1[" "] + subgraph spacer2["fa:fa-gears"] + + end + end end - alpine_terminal --> run_script - run_script --> python_code + repo --> | process on | alpine + style conda_env fill:#FEF3C7,stroke:#D97706; style repo fill:#ffffff,stroke:#444444; style alpine fill:#ffffff,stroke:#444444; -style alpine_terminal fill:#D1FAE5,stroke:#444444; +style spacer1 fill:#ffffff,stroke:#ffffff; +style spacer2 fill:#ffffff,stroke:#ffffff; ``` -This repo demonstrates the use of Python on [Alpine](https://curc.readthedocs.io/en/latest/clusters/alpine/index.html), a [High Performance Compute (HPC) cluster](https://en.wikipedia.org/wiki/High-performance_computing) hosted by the [University of Colorado Boulder's Research Computing](https://www.colorado.edu/rc/). -We use Python by way of [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) environment management to run code on Alpine. +_Diagram showing this repository's work as being processed on Alpine._ + +This repository is intended to help demonstrate the use of Python on [Alpine](https://curc.readthedocs.io/en/latest/clusters/alpine/index.html), a [High Performance Compute (HPC) cluster](https://en.wikipedia.org/wiki/High-performance_computing) hosted by the [University of Colorado Boulder's Research Computing](https://www.colorado.edu/rc/). +We use Python here by way of [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) environment management to run code on Alpine. +This readme will cover a background on the technologies and how to use the contents of this repository as though it were a project you were working on and wanting to run on Alpine. ## Table of Contents @@ -56,6 +65,8 @@ style check_3 fill:#D1FAE5,stroke:#D1FAE5; style alpine fill:#ffffff,stroke:#444444; ``` +_Diagram showing common benefits of Alpine and HPC clusters._ + Alpine is a [High Performance Compute (HPC) cluster](https://en.wikipedia.org/wiki/High-performance_computing). HPC environments provide shared computer hardware resources like [memory](https://en.wikipedia.org/wiki/Computer_memory), [CPU](https://en.wikipedia.org/wiki/Central_processing_unit), [GPU](https://en.wikipedia.org/wiki/Graphics_processing_unit) or others to run performance-intensive work. Reasons for using Alpine might include: @@ -72,29 +83,40 @@ flowchart LR subgraph alpine["fa:fa-server Alpine"] acompile["fa:fa-file acompile"] slurm["fa:fa-calendar Slurm"] + subgraph login_nodes["fa:fa-sign-in Login node(s)"] + acompile["fa:fa-file acompile"] + slurm_cmd["fa:fa-terminal Slurm cmd's"] + end subgraph compute_nodes["fa:fa-cogs Compute node(s)"] hardware["fa:fa-cog Compute\nResources"] modules["fa:fa-cube Software via\nmodules pkg"] end - subgraph login_nodes["fa:fa-sign-in Login node(s)"] - acompile["fa:fa-file acompile"] - slurm_cmd["fa:fa-terminal Slurm cmd's"] + + subgraph storage["fa:fa-folder-open Storage"] + local_storage["fa:fa-folder Local Storage\n(sometimes temporary)"] end end + remote_storage["fa:fa-folder External Storage\n(user specified / configured)"] users --> | preconfigured\nSlurm access| acompile --> slurm users --> | direct access | slurm_cmd --> slurm slurm --> |"schedules\n(shared) use of"| hardware slurm --> | provides\naccess to| modules --> | which may\n leverage| hardware + hardware --> | may deliver\nresults to| local_storage + hardware --> | or deliver\nresults to| remote_storage style alpine fill:#ffffff,stroke:#444444; style slurm fill:#F0F9FF,stroke:#075985; ``` -Alpine's compute resources are managed through compute nodes in a system called [Slurm](https://github.com/SchedMD/slurm). Slurm helps coordinate shared and configurable access to the compute resources. +_Diagram showing high-level user workflow and Alpine components._ + +Alpine's compute resources are used through compute nodes in a system called [Slurm](https://github.com/SchedMD/slurm). +Slurm helps coordinate shared and configurable access to the compute resources. +Data for or from Slurm work may be stored temporarily on local storage or on user-specific external (remote) storage. > ā„¹ļø __Wait, what are "nodes"?__ -> A simplified way to understand the architecture of Slurm on Alpine is through login and compute "nodes" (computers). +A simplified way to understand the architecture of Slurm on Alpine is through login and compute "nodes" (computers). Login nodes act as a way to prepare and submit processes which will be completed on compute nodes. Login nodes have limited resource access and are not recommended for running procedures. @@ -127,6 +149,8 @@ style alpine fill:#ffffff,stroke:#444444; style slurm fill:#F0F9FF,stroke:#075985; ``` +_Diagram showing how Slurm is used at an abstract level._ + Using Alpine effectively involves knowing how to leverage Slurm. A simplified way to understand how Slurm works is through the following sequence. Please note that some steps and additional complexity are obscured for the purposes of providing a basis of understanding. @@ -137,54 +161,120 @@ Please note that some steps and additional complexity are obscured for the purpo 1. __Job processing:__ Slurm will run the procedures in the job script as scheduled. 1. __Job completion or cancellation:__ submitted jobs eventually may reach completion or cancellation states with saved information inside Slurm regarding what happened. -## Implementation +### How do I store data on Alpine? ```mermaid flowchart LR + users["fa:fa-users Users"] + subgraph alpine["fa:fa-server Alpine"] + slurm["fa:fa-calendar Slurm"] + process_jobs["Processed jobs"] + subgraph storage["fa:fa-folder-open Storage"] + local_storage["fa:fa-folder Local Storage\n(sometimes temporary)"] + end + end + remote_storage["fa:fa-folder External Storage\n(user specified / configured)"] + + users --> | run their\nwork with | slurm + slurm --> | runs code| process_jobs + process_jobs --> | may deliver\nresults to| local_storage + process_jobs --> | or deliver\nresults to| remote_storage + +style alpine fill:#ffffff,stroke:#444444; +style slurm fill:#F0F9FF,stroke:#075985; +``` + +Data used or produced by your processed jobs on Alpine may use a number of different data storage locations. +Be sure to follow [the Acceptable data storage and use policies of Alpine](https://curc.readthedocs.io/en/latest/additional-resources/policies.html#acceptable-data-storage-and-use), avoiding the use of certain sensitive information and other items. +These may be distinguished in two ways: + +1. __Alpine local storage (sometimes temporary):__ Alpine provides a number of temporary data storage locations for accomplishing your work. +āš ļø _Note: some of these locations may be periodically purged and are not a suitable location for long-term data hosting ([see here for more information](https://curc.readthedocs.io/en/latest/additional-resources/policies.html#scratch-file-purge))!_
+Storage locations available ([see this link for full descriptions](https://curc.readthedocs.io/en/latest/compute/filesystems.html)): + + - __Home filesystem:__ 2 GB of backed up space under `/home/$USER` (where `$USER` is your RMACC or Alpine username). + - __Projects filesystem:__ 250 GB of backed up space under `/projects/$USER` (where `$USER` is your RMACC or Alpine username). + - __Scratch filesystem:__ 10 TB (10,240 GB) of space __*which is not backed up*__ under `/scratch/alpine/$USER` (where `$USER` is your RMACC or Alpine username). + +2. __External / remote storage:__ Users are encouraged to explore external data storage options for long-term hosting.
+Examples may include the following: + + - __[Petalibrary](https://www.colorado.edu/rc/resources/petalibrary)__: subsidized external storage host from University of Colorado Boulder's Research Computing (requires specific arrangements outside of Alpine). + - __Cloud hosting:__ [object storage](https://en.wikipedia.org/wiki/Object_storage) and related data hosting options from cloud providers like [Microsoft Azure](https://azure.microsoft.com/en-us), [Google Cloud](https://cloud.google.com/) ([internal CU Anschutz GC information](https://www.cuanschutz.edu/offices/office-of-information-technology/tools-services/google-cloud-platform)), or [Amazon Web Services](https://aws.amazon.com/). + - __Others:__ additional options include third-party "storage as a service" offerings like Google Drive or Dropbox and/or external servers maintained by other groups. + +### How do I send or receive data on Alpine? + +```mermaid +flowchart LR + external_storage["fa:fa-folder External Storage\n(user specified / configured)"] + subgraph alpine["fa:fa-server Alpine"] + subgraph storage["fa:fa-folder-open Storage"] + local_storage["fa:fa-folder Local Storage\n(sometimes temporary)"] + end + end + external_storage --> | send data\nto Alpine | local_storage + local_storage --> | receive data\nfrom Alpine | external_storage + +style alpine fill:#ffffff,stroke:#444444; +``` + +_Diagram showing external data storage being used to send or receive data on Alpine local storage._ + +Data may be sent to or gathered from Alpine using a number of different methods. +These may vary contingent on the external data storage being referenced, the code involved, or your group's available resources. +Please reference the following documentation from the University of Colorado Boulder's Research Computing regarding data transfers. + +- __The Compute Environment - Data Transfer:__ [https://curc.readthedocs.io/en/latest/compute/data-transfer.html](https://curc.readthedocs.io/en/latest/compute/data-transfer.html) + +## Implementation + +```mermaid +flowchart LR + users["(0. Gain access)\nfa:fa-users CU Anschutz\nUsers"] subgraph alpine["fa:fa-server Alpine"] direction LR - alpine_terminal1["(1. Prepare code)\nfa:fa-terminal terminal"] - alpine_terminal2["(2. Implement code)\nfa:fa-terminal terminal"] - git["fa:fa-git clone or pull"] - subgraph development_and_sync ["fa:fa-truck Code delivery to Alpine"] - subgraph repo["fa:fa-github This repo"] - direction TB - run_script["fa:fa-file Run script"] - subgraph conda_env[" fa:fa-globe Anaconda environment  "] - python_code["fa:fa-file Python code"] - end + alpine_terminal1["(1. Prepare code)\nfa:fa-git git clone"] + alpine_terminal2["(2. Implement code)\nfa:fa-terminal Process run script"] + subgraph repo["fa:fa-github This repo"] + direction TB + run_script["fa:fa-file Run script"] + subgraph conda_env[" fa:fa-globe Anaconda environment  "] + python_code["fa:fa-file Python code"] end end subgraph slurm_job["fa:fa-calendar Slurm processing"] direction LR - queue["fa:fa-calendar-plus-o Queue"] processing["fa:fa-gear Processing"] - completion["fa:fa-check Completion\n(or cancellation)"] + end + subgraph storage["fa:fa-folder-open Storage"] + local_storage["(3. Gather data)\nfa:fa-folder Local Storage\n(sometimes temporary)"] end end - - alpine_terminal1 --> git --> | bring repo\n contents to Alpine | repo - alpine_terminal2 --> |submit\nSlurm job| queue - queue --> processing - processing --> completion - python_code -.-> | run python code\nwithin conda env |processing + users --> | gain access\nvia RMACC acct.| alpine_terminal1 + alpine_terminal1 --> | bring repo\n contents to Alpine | repo run_script --> |run\nscript file| alpine_terminal2 - - + alpine_terminal2 --> |submit\nSlurm job| processing + processing --> | completed job\n sends data to| local_storage + python_code -.-> | run python code\nwithin conda env |processing style conda_env fill:#FEF3C7,stroke:#D97706; style repo fill:#ffffff,stroke:#444444; style alpine fill:#ffffff,stroke:#444444; +style users fill:#D1FAE5,stroke:#444444; style alpine_terminal1 fill:#D1FAE5,stroke:#444444; style alpine_terminal2 fill:#D1FAE5,stroke:#444444; +style local_storage fill:#D1FAE5,stroke:#444444; ``` +_Diagram showing how this repository may be used within Alpine through primary steps and processing workflow._ + This section will cover how Alpine may be used with this repository to run example Python code. -Generally, we'll cover this in two primary steps:[0. Gain Alpine access](#0-gain-alpine-access), [1. preparation](#1-preparation) and [2. implementation](#2-implementation). +Generally, we'll cover this in two primary steps: [0. Gain Alpine access](#0-gain-alpine-access), [1. preparation](#1-preparation) and [2. implementation](#2-implementation). -### 0. Gain Alpine access +### 0. šŸ”‘ Gain Alpine access First you will need to gain access to Alpine. This access is provided to members of the University of Colorado Anschutz through [RMACC](https://rmacc.org/) and is separate from other credentials which may be provided by default in your role. @@ -192,7 +282,7 @@ Please see the following guide from the University of Colorado Boulder's Researc - __RMACC Access to Alpine:__ [https://curc.readthedocs.io/en/latest/access/rmacc.html](https://curc.readthedocs.io/en/latest/access/rmacc.html) -### 1. Prepare code +### 1. šŸ› ļø Prepare code on Alpine Next we need to prepare our code within Alpine. We do this to balance the fact that we may develop and source control code outside of Alpine and needing to periodically synchronize it with updates. @@ -200,4 +290,20 @@ In the case of this example work, we assume git as an interface for Github as th Below you'll find the general steps associated with this process. -### 2. Implement code +1. Login to the Alpine command line ([reference this guide](https://curc.readthedocs.io/en/latest/access/rmacc.html#logging-in-to-open-ondemand)). +1. Change directory into the __Projects filesystem__ (generally we'll assume processed data produced by this code are large enough to warrant the need for additional space):
`cd /projects/$USER` +1. Use `git` (built into Alpine by default) commands to clone this repo:
`git clone https://github.com/CU-DBMI/example-hpc-alpine-python` +1. Verify the contents were received as desired (this should show the contents of this repository):
`ls -l example-hpc-alpine-python` + + +> ā„¹ļø __What if I need to authenticate with Github?__ +There are times where you may need to authenticate with Github in order to accomplish your work. +From a Github perspective, you will want to use either Github Personal Access Tokens (PAT) (recommended by Github) or SSH keys associated with the `git` client on Alpine. +Note: if you are prompted for a username and password from `git` when accessing a Github resource, the password is now associated with other keys like PAT's instead of your user's password ([reference](https://github.blog/changelog/2021-08-12-git-password-authentication-is-shutting-down)). +See the following guide from Github for more information on how authentication through `git` to Github works: +> +> - __Github - Authenticating with GitHub from Git:__ [https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git](https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git) + +### 2. āš™ļø Implement code on Alpine + +### 2. šŸ“‚ Gather data results From 1792b17f089f48516c979e529c070ee9c3b87e21 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 13:44:55 -0600 Subject: [PATCH 06/33] add comments to run script --- README.md | 6 +++- run_script.sh | 77 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 91bf285..c10e1f3 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ style alpine fill:#ffffff,stroke:#444444; style slurm fill:#F0F9FF,stroke:#075985; ``` -_Diagram showing how Slurm is used at an abstract level._ +_Diagram showing how Slurm generally works._ Using Alpine effectively involves knowing how to leverage Slurm. A simplified way to understand how Slurm works is through the following sequence. @@ -295,7 +295,9 @@ Below you'll find the general steps associated with this process. 1. Use `git` (built into Alpine by default) commands to clone this repo:
`git clone https://github.com/CU-DBMI/example-hpc-alpine-python` 1. Verify the contents were received as desired (this should show the contents of this repository):
`ls -l example-hpc-alpine-python` + + > ā„¹ļø __What if I need to authenticate with Github?__ There are times where you may need to authenticate with Github in order to accomplish your work. From a Github perspective, you will want to use either Github Personal Access Tokens (PAT) (recommended by Github) or SSH keys associated with the `git` client on Alpine. @@ -306,4 +308,6 @@ See the following guide from Github for more information on how authentication t ### 2. āš™ļø Implement code on Alpine +After our code is available on Alpine we're ready to run it using Slurm and related resources. + ### 2. šŸ“‚ Gather data results diff --git a/run_script.sh b/run_script.sh index 7db7503..d51d42c 100644 --- a/run_script.sh +++ b/run_script.sh @@ -1,20 +1,89 @@ #!/bin/bash -# referenced with modifications from: +######################################################## +# File description: +# An example run script for use with: +# https://github.com/CU-DBMI/example-hpc-alpine-python +# +# Referenced with modifications from: # https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html#full-example-job-script +######################################################## +######################################################## +# Slurm directives: +# ------------------- +# Below are configurations for Slurm, letting it know +# what and how you'd like to use resources on Alpine. +# Generally documentation on these may be found here: +# https://slurm.schedmd.com/sbatch.html +######################################################## + +# Indicates which Alpine-specific hardware partition you'd +# like to make use of to accomplish the work in this script. +# See: https://curc.readthedocs.io/en/latest/running-jobs/job-resources.html#partitions #SBATCH --partition=amilan -#SBATCH --job-name=example-job -#SBATCH --output=example-job.%j.out + +# Provide a specific name used for identifying the job +# as it proceeds through Slurm. +#SBATCH --job-name=example-hpc-alpine-python + +# Tells Slurm to gather standard output from running this +# file and send to a specific file. +# Special variable symbols may be used here: +# %j - job ID +# %a - job array index +# %A - job array job ID +#SBATCH --output=example-hpc-alpine-python.%j.out + +# Sets a limit on the total time this work may take. +# The format below is in the form of hours:minutes:seconds. #SBATCH --time=01:00:00 + +# Sets certain Alpine-specific characteristics the Slurm work +# performed. Can be one of: normal, long, mem. +# See: https://curc.readthedocs.io/en/latest/running-jobs/job-resources.html#quality-of-service #SBATCH --qos=normal + +# Advises Slurm about the minimum nodes necessary for completing +# the work included in this script. #SBATCH --nodes=1 + +# Advises Slurm about the maximum number of tasks involved +# with batch processing. #SBATCH --ntasks=4 + +# Sets an email address to receive notifications from Alpine +#SBATCH --mail-user=your-email-address-here@cuanschutz.edu + +# Indicate which notifications you'd like to receive from Alpine +# this can also be set to START, END, or FAIL. #SBATCH --mail-type=ALL -#SBATCH --mail-user=youridentikey@colorado.edu +######################################################## +# Module package commands: +# ------------------------ +# Next, we use the module package to help load +# software which is pre-loaded on Alpine. +######################################################## + +# unloads all existing modules which may have been previously loaded module purge +# use module package to load anaconda software so it may +# be used by your processes module load anaconda + +######################################################## +# Anaconda environment manangement: +# --------------------------------- +# Here we load the Anaconda environment to be used +# for running the Python code below. +######################################################## conda activate example_env +######################################################## +# Run a Python file (within Anaconda environment): +# ------------------------------------------------ +# After loading the environment we run the Python +# code to perform the work we'd like to accomplish. +######################################################## python code/example.py From 3a3b450c07916cb9c8fd1bceeb669cdcc967e40a Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 13:47:04 -0600 Subject: [PATCH 07/33] quotes display tweaks --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c10e1f3..b20843c 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,8 @@ Slurm helps coordinate shared and configurable access to the compute resources. Data for or from Slurm work may be stored temporarily on local storage or on user-specific external (remote) storage. > ā„¹ļø __Wait, what are "nodes"?__ -A simplified way to understand the architecture of Slurm on Alpine is through login and compute "nodes" (computers). +> +> A simplified way to understand the architecture of Slurm on Alpine is through login and compute "nodes" (computers). Login nodes act as a way to prepare and submit processes which will be completed on compute nodes. Login nodes have limited resource access and are not recommended for running procedures. @@ -299,7 +300,8 @@ Below you'll find the general steps associated with this process. > ā„¹ļø __What if I need to authenticate with Github?__ -There are times where you may need to authenticate with Github in order to accomplish your work. +> +> There are times where you may need to authenticate with Github in order to accomplish your work. From a Github perspective, you will want to use either Github Personal Access Tokens (PAT) (recommended by Github) or SSH keys associated with the `git` client on Alpine. Note: if you are prompted for a username and password from `git` when accessing a Github resource, the password is now associated with other keys like PAT's instead of your user's password ([reference](https://github.blog/changelog/2021-08-12-git-password-authentication-is-shutting-down)). See the following guide from Github for more information on how authentication through `git` to Github works: From 4ab7765d535b0f59bd34bde9c1907e9225ee5423 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 14:02:39 -0600 Subject: [PATCH 08/33] replace fontawesome with emoji symbols github mermaid rendering does not include fontawesome compatibility. --- README.md | 102 ++++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index b20843c..2ab570b 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,17 @@ ```mermaid flowchart LR - subgraph repo["fa:fa-github This repo"] + subgraph repo["šŸ“¦ This repo"] direction LR - run_script["fa:fa-file Run script"] - subgraph conda_env[" fa:fa-globe Anaconda environment  "] - python_code["fa:fa-file Python code"] + run_script["šŸ“„ Run script"] + subgraph conda_env[" šŸŒ Anaconda environment  "] + python_code["šŸ“„ Python code"] end end - subgraph alpine["fa:fa-server Alpine"] + subgraph alpine["šŸ–„ļø Alpine"] subgraph spacer1[" "] - subgraph spacer2["fa:fa-gears"] - + subgraph spacer2["āš™ļøāš™ļøāš™ļø"] end end end @@ -45,16 +44,16 @@ This readme will cover a background on the technologies and how to use the conte ```mermaid flowchart LR - subgraph alpine["fa:fa-server Alpine"] + subgraph alpine["šŸ–„ļø Alpine"] direction TB subgraph resources["   šŸ’Ŗ Compute Resources  "] - check_1["fa:fa-check"] + check_1["āœ…"] end subgraph time["   šŸ•‘ Long-running Jobs  "] - check_2["fa:fa-check"] + check_2["āœ…"] end subgraph collaborations["   šŸ‘„ Collaborations  "] - check_3["fa:fa-check"] + check_3["āœ…"] end end @@ -79,24 +78,23 @@ Reasons for using Alpine might include: ```mermaid flowchart LR - users["fa:fa-users Users"] - subgraph alpine["fa:fa-server Alpine"] - acompile["fa:fa-file acompile"] - slurm["fa:fa-calendar Slurm"] - subgraph login_nodes["fa:fa-sign-in Login node(s)"] - acompile["fa:fa-file acompile"] - slurm_cmd["fa:fa-terminal Slurm cmd's"] + users["šŸ‘„ Users"] + subgraph alpine["šŸ–„ļø Alpine"] + slurm["šŸ—“ļø Slurm"] + subgraph login_nodes["šŸ”‘ Login node(s)"] + acompile["šŸ“„ acompile"] + slurm_cmd["āŒØļø Slurm cmd's"] end - subgraph compute_nodes["fa:fa-cogs Compute node(s)"] - hardware["fa:fa-cog Compute\nResources"] - modules["fa:fa-cube Software via\nmodules pkg"] + subgraph compute_nodes["āš™ļø Compute node(s)"] + hardware["āš™ļø Compute\nResources"] + modules["šŸ’æ Software via\nmodules pkg"] end - subgraph storage["fa:fa-folder-open Storage"] - local_storage["fa:fa-folder Local Storage\n(sometimes temporary)"] + subgraph storage["šŸ“‚ Storage"] + local_storage["šŸ“ Local Storage\n(sometimes temporary)"] end end - remote_storage["fa:fa-folder External Storage\n(user specified / configured)"] + remote_storage["šŸ“ External Storage\n(user specified / configured)"] users --> | preconfigured\nSlurm access| acompile --> slurm users --> | direct access | slurm_cmd --> slurm @@ -131,13 +129,13 @@ Many common software packages are available through the [Modules package](https: ```mermaid flowchart LR - subgraph alpine["fa:fa-server Alpine"] + subgraph alpine["šŸ–„ļø Alpine"] direction LR - script["fa:fa-file Job script"] - subgraph slurm["fa:fa-calendar Slurm"] - queue["fa:fa-calendar-plus-o Queue"] - processing["fa:fa-gear Processing"] - completion["fa:fa-check Completion\n(or cancellation)"] + script["šŸ“„ Job script"] + subgraph slurm["šŸ—“ļø Slurm"] + queue["šŸ•‘ Queue"] + processing["āš™ļø Processing"] + completion["āœ… Completion\n(or cancellation)"] end end @@ -166,15 +164,15 @@ Please note that some steps and additional complexity are obscured for the purpo ```mermaid flowchart LR - users["fa:fa-users Users"] - subgraph alpine["fa:fa-server Alpine"] - slurm["fa:fa-calendar Slurm"] + users["šŸ‘„ Users"] + subgraph alpine["šŸ–„ļø Alpine"] + slurm["šŸ—“ļø Slurm"] process_jobs["Processed jobs"] - subgraph storage["fa:fa-folder-open Storage"] - local_storage["fa:fa-folder Local Storage\n(sometimes temporary)"] + subgraph storage["šŸ“‚ Storage"] + local_storage["šŸ“ Local Storage\n(sometimes temporary)"] end end - remote_storage["fa:fa-folder External Storage\n(user specified / configured)"] + remote_storage["šŸ“ External Storage\n(user specified / configured)"] users --> | run their\nwork with | slurm slurm --> | runs code| process_jobs @@ -208,10 +206,10 @@ Examples may include the following: ```mermaid flowchart LR - external_storage["fa:fa-folder External Storage\n(user specified / configured)"] - subgraph alpine["fa:fa-server Alpine"] - subgraph storage["fa:fa-folder-open Storage"] - local_storage["fa:fa-folder Local Storage\n(sometimes temporary)"] + external_storage["šŸ“ External Storage\n(user specified / configured)"] + subgraph alpine["šŸ–„ļø Alpine"] + subgraph storage["šŸ“‚ Storage"] + local_storage["šŸ“ Local Storage\n(sometimes temporary)"] end end @@ -233,24 +231,24 @@ Please reference the following documentation from the University of Colorado Bou ```mermaid flowchart LR - users["(0. Gain access)\nfa:fa-users CU Anschutz\nUsers"] - subgraph alpine["fa:fa-server Alpine"] + users["(0. Gain access)\nšŸ‘„ CU Anschutz\nUsers"] + subgraph alpine["šŸ–„ļø Alpine"] direction LR - alpine_terminal1["(1. Prepare code)\nfa:fa-git git clone"] - alpine_terminal2["(2. Implement code)\nfa:fa-terminal Process run script"] - subgraph repo["fa:fa-github This repo"] + alpine_terminal1["(1. Prepare code)\nāŒØļø git clone"] + alpine_terminal2["(2. Implement code)\nāŒØļø Process run script"] + subgraph repo["šŸ“¦ This repo"] direction TB - run_script["fa:fa-file Run script"] - subgraph conda_env[" fa:fa-globe Anaconda environment  "] - python_code["fa:fa-file Python code"] + run_script["šŸ“„ Run script"] + subgraph conda_env[" šŸŒ Anaconda environment  "] + python_code["šŸ“„ Python code"] end end - subgraph slurm_job["fa:fa-calendar Slurm processing"] + subgraph slurm_job["šŸ—“ļø Slurm processing"] direction LR - processing["fa:fa-gear Processing"] + processing["āš™ļø Processing"] end - subgraph storage["fa:fa-folder-open Storage"] - local_storage["(3. Gather data)\nfa:fa-folder Local Storage\n(sometimes temporary)"] + subgraph storage["šŸ“‚ Storage"] + local_storage["(3. Gather data)\nšŸ“ Local Storage\n(sometimes temporary)"] end end From 784cc82a71f10764f8fa60ad2a8817cf4dbb5dba Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 14:26:51 -0600 Subject: [PATCH 09/33] add args --- README.md | 11 +++++++++++ run_script.sh | 23 ++++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2ab570b..5c83921 100644 --- a/README.md +++ b/README.md @@ -283,6 +283,17 @@ Please see the following guide from the University of Colorado Boulder's Researc ### 1. šŸ› ļø Prepare code on Alpine +```shell +[username@xsede.org@login-ciX ~]$ cd /projects/$USER +[username@xsede.org@login-ciX username@xsede.org]$ git clone https://github.com/CU-DBMI/example-hpc-alpine-python +Cloning into 'example-hpc-alpine-python'... +... git output ... +[username@xsede.org@login-ciX username@xsede.org]$ ls -l example-hpc-alpine-python +... ls output ... +``` + +_An example of what this section might look like in your Alpine terminal session._ + Next we need to prepare our code within Alpine. We do this to balance the fact that we may develop and source control code outside of Alpine and needing to periodically synchronize it with updates. In the case of this example work, we assume git as an interface for Github as the source control host. diff --git a/run_script.sh b/run_script.sh index d51d42c..9fc655c 100644 --- a/run_script.sh +++ b/run_script.sh @@ -4,6 +4,11 @@ # File description: # An example run script for use with: # https://github.com/CU-DBMI/example-hpc-alpine-python +# +# Arguments: +# $1: a filepath destination for python to create a +# CSV file. +# # # Referenced with modifications from: # https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html#full-example-job-script @@ -68,9 +73,12 @@ # unloads all existing modules which may have been previously loaded module purge + # use module package to load anaconda software so it may -# be used by your processes -module load anaconda +# be used by your processes. +# note: the numbers found after anaconda/####.## are subject +# to change depending on the versions installed by administrators. +module load anaconda/2022.10 ######################################################## # Anaconda environment manangement: @@ -78,6 +86,11 @@ module load anaconda # Here we load the Anaconda environment to be used # for running the Python code below. ######################################################## + +# first create the environment from the yaml file +conda env create -f environment.yaml + +# then activate the environment conda activate example_env ######################################################## @@ -86,4 +99,8 @@ conda activate example_env # After loading the environment we run the Python # code to perform the work we'd like to accomplish. ######################################################## -python code/example.py + +# run the python file example.py which takes an argument +# as a filepath for exporting data which we pass in here +# shell script file argument in the form of `$1` +python code/example.py $1 From 4086483199fbe274d312ae82a9aa33f17690ac5f Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 14:28:33 -0600 Subject: [PATCH 10/33] Update run_script.sh --- run_script.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/run_script.sh b/run_script.sh index 9fc655c..c8c6964 100644 --- a/run_script.sh +++ b/run_script.sh @@ -11,7 +11,7 @@ # # # Referenced with modifications from: -# https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html#full-example-job-script +# https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html ######################################################## ######################################################## @@ -25,7 +25,7 @@ # Indicates which Alpine-specific hardware partition you'd # like to make use of to accomplish the work in this script. -# See: https://curc.readthedocs.io/en/latest/running-jobs/job-resources.html#partitions +# See: https://curc.readthedocs.io/en/latest/running-jobs/job-resources.html #SBATCH --partition=amilan # Provide a specific name used for identifying the job @@ -46,7 +46,7 @@ # Sets certain Alpine-specific characteristics the Slurm work # performed. Can be one of: normal, long, mem. -# See: https://curc.readthedocs.io/en/latest/running-jobs/job-resources.html#quality-of-service +# See: https://curc.readthedocs.io/en/latest/running-jobs/job-resources.html #SBATCH --qos=normal # Advises Slurm about the minimum nodes necessary for completing From 27cbb86f8e201a5fbdaef02fcc7f8b96416450e1 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 14:31:36 -0600 Subject: [PATCH 11/33] Update run_script.sh --- run_script.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/run_script.sh b/run_script.sh index c8c6964..1eeb37c 100644 --- a/run_script.sh +++ b/run_script.sh @@ -87,6 +87,9 @@ module load anaconda/2022.10 # for running the Python code below. ######################################################## +# init conda +conda init bash + # first create the environment from the yaml file conda env create -f environment.yaml From 542985ab621e37dd7246702e87532c97fefb363e Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 14:40:35 -0600 Subject: [PATCH 12/33] Update run_script.sh --- run_script.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/run_script.sh b/run_script.sh index 1eeb37c..f694171 100644 --- a/run_script.sh +++ b/run_script.sh @@ -64,6 +64,17 @@ # this can also be set to START, END, or FAIL. #SBATCH --mail-type=ALL +######################################################## +# Initialization through acompile script: +# --------------------------------------- +# Below we use the acompile script to help +# gain access to the module package and +# prepare to run our work on Slurm. +######################################################## + +# runs the acompile script with default configurations +acompile + ######################################################## # Module package commands: # ------------------------ From b2daf690731e90103fab6a21f7292735189f8ac8 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 15:03:25 -0600 Subject: [PATCH 13/33] add env variable --- run_script.sh | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/run_script.sh b/run_script.sh index f694171..0350e18 100644 --- a/run_script.sh +++ b/run_script.sh @@ -5,10 +5,13 @@ # An example run script for use with: # https://github.com/CU-DBMI/example-hpc-alpine-python # -# Arguments: -# $1: a filepath destination for python to create a -# CSV file. +# Expects the following sbatch exports: +# CSV_FILEPATH: +# a string which indicates the filepath for +# a CSV to be created by the python process. # +# Example Alpine command line usage: +# $ sbatch --export=CSV_FILEPATH="/projects/$USER/somewhere" run_script.sh # # Referenced with modifications from: # https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html @@ -64,17 +67,6 @@ # this can also be set to START, END, or FAIL. #SBATCH --mail-type=ALL -######################################################## -# Initialization through acompile script: -# --------------------------------------- -# Below we use the acompile script to help -# gain access to the module package and -# prepare to run our work on Slurm. -######################################################## - -# runs the acompile script with default configurations -acompile - ######################################################## # Module package commands: # ------------------------ @@ -98,9 +90,6 @@ module load anaconda/2022.10 # for running the Python code below. ######################################################## -# init conda -conda init bash - # first create the environment from the yaml file conda env create -f environment.yaml @@ -117,4 +106,4 @@ conda activate example_env # run the python file example.py which takes an argument # as a filepath for exporting data which we pass in here # shell script file argument in the form of `$1` -python code/example.py $1 +python code/example.py $CSV_FILEPATH From fd5ae485eb34fa7f7e1cb9d4fa3f0317e1230adb Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 15:04:26 -0600 Subject: [PATCH 14/33] yml file ext --- run_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_script.sh b/run_script.sh index 0350e18..6dd18e1 100644 --- a/run_script.sh +++ b/run_script.sh @@ -91,7 +91,7 @@ module load anaconda/2022.10 ######################################################## # first create the environment from the yaml file -conda env create -f environment.yaml +conda env create -f environment.yml # then activate the environment conda activate example_env From b0c07059535892e1f270185544a6fc92c03753b0 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 15:06:52 -0600 Subject: [PATCH 15/33] remove the environment if it exists --- run_script.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/run_script.sh b/run_script.sh index 6dd18e1..f887222 100644 --- a/run_script.sh +++ b/run_script.sh @@ -90,7 +90,11 @@ module load anaconda/2022.10 # for running the Python code below. ######################################################## -# first create the environment from the yaml file +# remove any existing environments that happen to have +# the same exact name. +conda env remove --name example_env + +# next create the environment from the yaml file conda env create -f environment.yml # then activate the environment From 0bd36a15e3376a3781e7f8f94e140ab9ad7a9732 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 15:08:25 -0600 Subject: [PATCH 16/33] yes remove env --- run_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_script.sh b/run_script.sh index f887222..067aaf3 100644 --- a/run_script.sh +++ b/run_script.sh @@ -92,7 +92,7 @@ module load anaconda/2022.10 # remove any existing environments that happen to have # the same exact name. -conda env remove --name example_env +conda env remove --name example_env -y # next create the environment from the yaml file conda env create -f environment.yml From c00c3339cd7aa49977e14a6e26a145f64505e5a8 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 15:26:24 -0600 Subject: [PATCH 17/33] linting; named variable use in python --- .pre-commit-config.yaml | 2 +- README.md | 39 +++++++++++++++++++++------------------ code/example.py | 18 ++++++++++-------- run_script.sh | 17 ++++++++--------- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67d5f69..1ab3852 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -54,7 +54,7 @@ repos: entry: pylint language: python types: [python] - args: ["--disable=X"] + # args: ["--disable=X"] additional_dependencies: - "numpy" - "pandas" diff --git a/README.md b/README.md index 5c83921..06649fa 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ flowchart LR subgraph conda_env[" šŸŒ Anaconda environment  "] python_code["šŸ“„ Python code"] end - + end subgraph alpine["šŸ–„ļø Alpine"] subgraph spacer1[" "] @@ -89,7 +89,7 @@ flowchart LR hardware["āš™ļø Compute\nResources"] modules["šŸ’æ Software via\nmodules pkg"] end - + subgraph storage["šŸ“‚ Storage"] local_storage["šŸ“ Local Storage\n(sometimes temporary)"] end @@ -116,8 +116,8 @@ Data for or from Slurm work may be stored temporarily on local storage or on use > ā„¹ļø __Wait, what are "nodes"?__ > > A simplified way to understand the architecture of Slurm on Alpine is through login and compute "nodes" (computers). -Login nodes act as a way to prepare and submit processes which will be completed on compute nodes. -Login nodes have limited resource access and are not recommended for running procedures. +> Login nodes act as a way to prepare and submit processes which will be completed on compute nodes. +> Login nodes have limited resource access and are not recommended for running procedures. One can interact with Slurm on Alpine by use of [Slurm interfaces and directives](https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html). A quick way of accessing Alpine resources is through the use of the `acompile` command, which references a script with common Slurm configurations. @@ -188,19 +188,19 @@ Be sure to follow [the Acceptable data storage and use policies of Alpine](https These may be distinguished in two ways: 1. __Alpine local storage (sometimes temporary):__ Alpine provides a number of temporary data storage locations for accomplishing your work. -āš ļø _Note: some of these locations may be periodically purged and are not a suitable location for long-term data hosting ([see here for more information](https://curc.readthedocs.io/en/latest/additional-resources/policies.html#scratch-file-purge))!_
-Storage locations available ([see this link for full descriptions](https://curc.readthedocs.io/en/latest/compute/filesystems.html)): + āš ļø _Note: some of these locations may be periodically purged and are not a suitable location for long-term data hosting ([see here for more information](https://curc.readthedocs.io/en/latest/additional-resources/policies.html#scratch-file-purge))!_
+ Storage locations available ([see this link for full descriptions](https://curc.readthedocs.io/en/latest/compute/filesystems.html)): - - __Home filesystem:__ 2 GB of backed up space under `/home/$USER` (where `$USER` is your RMACC or Alpine username). - - __Projects filesystem:__ 250 GB of backed up space under `/projects/$USER` (where `$USER` is your RMACC or Alpine username). - - __Scratch filesystem:__ 10 TB (10,240 GB) of space __*which is not backed up*__ under `/scratch/alpine/$USER` (where `$USER` is your RMACC or Alpine username). + - __Home filesystem:__ 2 GB of backed up space under `/home/$USER` (where `$USER` is your RMACC or Alpine username). + - __Projects filesystem:__ 250 GB of backed up space under `/projects/$USER` (where `$USER` is your RMACC or Alpine username). + - __Scratch filesystem:__ 10 TB (10,240 GB) of space __*which is not backed up*__ under `/scratch/alpine/$USER` (where `$USER` is your RMACC or Alpine username). -2. __External / remote storage:__ Users are encouraged to explore external data storage options for long-term hosting.
-Examples may include the following: +1. __External / remote storage:__ Users are encouraged to explore external data storage options for long-term hosting.
+ Examples may include the following: - - __[Petalibrary](https://www.colorado.edu/rc/resources/petalibrary)__: subsidized external storage host from University of Colorado Boulder's Research Computing (requires specific arrangements outside of Alpine). - - __Cloud hosting:__ [object storage](https://en.wikipedia.org/wiki/Object_storage) and related data hosting options from cloud providers like [Microsoft Azure](https://azure.microsoft.com/en-us), [Google Cloud](https://cloud.google.com/) ([internal CU Anschutz GC information](https://www.cuanschutz.edu/offices/office-of-information-technology/tools-services/google-cloud-platform)), or [Amazon Web Services](https://aws.amazon.com/). - - __Others:__ additional options include third-party "storage as a service" offerings like Google Drive or Dropbox and/or external servers maintained by other groups. + - __[Petalibrary](https://www.colorado.edu/rc/resources/petalibrary)__: subsidized external storage host from University of Colorado Boulder's Research Computing (requires specific arrangements outside of Alpine). + - __Cloud hosting:__ [object storage](https://en.wikipedia.org/wiki/Object_storage) and related data hosting options from cloud providers like [Microsoft Azure](https://azure.microsoft.com/en-us), [Google Cloud](https://cloud.google.com/) ([internal CU Anschutz GC information](https://www.cuanschutz.edu/offices/office-of-information-technology/tools-services/google-cloud-platform)), or [Amazon Web Services](https://aws.amazon.com/). + - __Others:__ additional options include third-party "storage as a service" offerings like Google Drive or Dropbox and/or external servers maintained by other groups. ### How do I send or receive data on Alpine? @@ -212,7 +212,7 @@ flowchart LR local_storage["šŸ“ Local Storage\n(sometimes temporary)"] end end - + external_storage --> | send data\nto Alpine | local_storage local_storage --> | receive data\nfrom Alpine | external_storage @@ -306,19 +306,22 @@ Below you'll find the general steps associated with this process. 1. Verify the contents were received as desired (this should show the contents of this repository):
`ls -l example-hpc-alpine-python` + > ā„¹ļø __What if I need to authenticate with Github?__ > > There are times where you may need to authenticate with Github in order to accomplish your work. -From a Github perspective, you will want to use either Github Personal Access Tokens (PAT) (recommended by Github) or SSH keys associated with the `git` client on Alpine. -Note: if you are prompted for a username and password from `git` when accessing a Github resource, the password is now associated with other keys like PAT's instead of your user's password ([reference](https://github.blog/changelog/2021-08-12-git-password-authentication-is-shutting-down)). -See the following guide from Github for more information on how authentication through `git` to Github works: +> From a Github perspective, you will want to use either Github Personal Access Tokens (PAT) (recommended by Github) or SSH keys associated with the `git` client on Alpine. +> Note: if you are prompted for a username and password from `git` when accessing a Github resource, the password is now associated with other keys like PAT's instead of your user's password ([reference](https://github.blog/changelog/2021-08-12-git-password-authentication-is-shutting-down)). +> See the following guide from Github for more information on how authentication through `git` to Github works: > > - __Github - Authenticating with GitHub from Git:__ [https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git](https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git) ### 2. āš™ļø Implement code on Alpine After our code is available on Alpine we're ready to run it using Slurm and related resources. +The main goal of the Python code related to this work is to create a CSV file with random data at a specified location. +We'll use [Slurm's `sbatch` command](https://slurm.schedmd.com/sbatch.html), which submits batch scripts to Slurm using various options. ### 2. šŸ“‚ Gather data results diff --git a/code/example.py b/code/example.py index 524eb65..668be86 100644 --- a/code/example.py +++ b/code/example.py @@ -1,23 +1,25 @@ """ An example Python file which creates random data and exports it to a location specified -by way of a +by way of a """ -import sys +import argparse import numpy as np import pandas as pd -# take an input from sys argsv -output_file = sys.argv[1] +# gather named input from argparse +parser = argparse.ArgumentParser() +parser.add_argument("--CSV_FILENAME", help="A filepath for storing a CSV data file.") +args = parser.parse_args() # setup some rows -nrows = 10000 -ncols = 500 +NROWS = 10000 +NCOLS = 500 # form a dataframe using randomized data df = pd.DataFrame( - np.random.rand(nrows, ncols), columns=[f"col_{num}" for num in range(0, ncols)] + np.random.rand(NROWS, NCOLS), columns=[f"col_{num}" for num in range(0, NCOLS)] ) # export the data to parquet -df.to_csv(output_file) +df.to_csv(args.CSV_FILENAME) diff --git a/run_script.sh b/run_script.sh index 067aaf3..44ae1eb 100644 --- a/run_script.sh +++ b/run_script.sh @@ -4,14 +4,13 @@ # File description: # An example run script for use with: # https://github.com/CU-DBMI/example-hpc-alpine-python -# +# # Expects the following sbatch exports: # CSV_FILEPATH: -# a string which indicates the filepath for -# a CSV to be created by the python process. +# A filepath for storing a CSV data file. # # Example Alpine command line usage: -# $ sbatch --export=CSV_FILEPATH="/projects/$USER/somewhere" run_script.sh +# $ sbatch --export=CSV_FILEPATH="/projects/$USER/data.csv" run_script.sh # # Referenced with modifications from: # https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html @@ -31,7 +30,7 @@ # See: https://curc.readthedocs.io/en/latest/running-jobs/job-resources.html #SBATCH --partition=amilan -# Provide a specific name used for identifying the job +# Provide a specific name used for identifying the job # as it proceeds through Slurm. #SBATCH --job-name=example-hpc-alpine-python @@ -56,7 +55,7 @@ # the work included in this script. #SBATCH --nodes=1 -# Advises Slurm about the maximum number of tasks involved +# Advises Slurm about the maximum number of tasks involved # with batch processing. #SBATCH --ntasks=4 @@ -70,7 +69,7 @@ ######################################################## # Module package commands: # ------------------------ -# Next, we use the module package to help load +# Next, we use the module package to help load # software which is pre-loaded on Alpine. ######################################################## @@ -84,7 +83,7 @@ module purge module load anaconda/2022.10 ######################################################## -# Anaconda environment manangement: +# Anaconda environment management: # --------------------------------- # Here we load the Anaconda environment to be used # for running the Python code below. @@ -110,4 +109,4 @@ conda activate example_env # run the python file example.py which takes an argument # as a filepath for exporting data which we pass in here # shell script file argument in the form of `$1` -python code/example.py $CSV_FILEPATH +python code/example.py --CSV_FILENAME=$CSV_FILEPATH From 61f51c6972d2e3a7ad1a2ac98e963c7a4ce6307b Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 15:46:56 -0600 Subject: [PATCH 18/33] adding logging for stdout from sbatch run --- code/example.py | 10 ++++++++++ run_script.sh | 13 ++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/code/example.py b/code/example.py index 668be86..66fc807 100644 --- a/code/example.py +++ b/code/example.py @@ -3,10 +3,14 @@ by way of a """ import argparse +import logging import numpy as np import pandas as pd +# set basic logging config +logging.basicConfig(level=logging.INFO) + # gather named input from argparse parser = argparse.ArgumentParser() parser.add_argument("--CSV_FILENAME", help="A filepath for storing a CSV data file.") @@ -16,10 +20,16 @@ NROWS = 10000 NCOLS = 500 +logging.info("Creating the dataframe now!") + # form a dataframe using randomized data df = pd.DataFrame( np.random.rand(NROWS, NCOLS), columns=[f"col_{num}" for num in range(0, NCOLS)] ) +logging.info("Exporting the dataframe to CSV at %s !", args.CSV_FILENAME) + # export the data to parquet df.to_csv(args.CSV_FILENAME) + +logging.info("Python work finished!") diff --git a/run_script.sh b/run_script.sh index 44ae1eb..e932dd5 100644 --- a/run_script.sh +++ b/run_script.sh @@ -108,5 +108,16 @@ conda activate example_env # run the python file example.py which takes an argument # as a filepath for exporting data which we pass in here -# shell script file argument in the form of `$1` +# from an sbatch exported variable name which is +# received as a named argparse variable within Python +# using the same name. python code/example.py --CSV_FILENAME=$CSV_FILEPATH + +######################################################## +# Send an end signal for the logs: +# -------------------------------- +# Here we add a simple echo statement to indicate +# within the logs that the work is completed. +######################################################## + +echo "run_script.sh work finished!" From 8435b0f3cf64a986315092730f8549e8c8236d55 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 30 Jun 2023 16:03:21 -0600 Subject: [PATCH 19/33] implementation process and modified logging file --- README.md | 7 ++++++- run_script.sh | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 06649fa..5e212b0 100644 --- a/README.md +++ b/README.md @@ -321,7 +321,12 @@ Below you'll find the general steps associated with this process. ### 2. āš™ļø Implement code on Alpine After our code is available on Alpine we're ready to run it using Slurm and related resources. +We use Anaconda to build a Python environment with specified packages for reproducibility. The main goal of the Python code related to this work is to create a CSV file with random data at a specified location. We'll use [Slurm's `sbatch` command](https://slurm.schedmd.com/sbatch.html), which submits batch scripts to Slurm using various options. -### 2. šŸ“‚ Gather data results +1. Use the `sbatch` command with exported variable `CSV_FILEPATH`.
`sbatch --export=CSV_FILEPATH="/projects/$USER/example_data.csv" run_script.sh` +1. After a short moment, use the [`tail`]() command to observe the log file created by Slurm for this sbatch submission. This file can help you understand where things are at and if anything went wrong.
`tail -f example-hpc-alpine-python.out` +1. Once you see that the work has completed from the log file, take a look at the top 2 lines of the data file using the [`head`]() command to verify the data arrived as expected (column names with random values):
`head -n 2 example_data.csv` + +### 3. šŸ“‚ Transfer data results diff --git a/run_script.sh b/run_script.sh index e932dd5..593466b 100644 --- a/run_script.sh +++ b/run_script.sh @@ -40,7 +40,7 @@ # %j - job ID # %a - job array index # %A - job array job ID -#SBATCH --output=example-hpc-alpine-python.%j.out +#SBATCH --output=example-hpc-alpine-python.out # Sets a limit on the total time this work may take. # The format below is in the form of hours:minutes:seconds. From c9e6481d2cf56dfd99e945fcc270d67fac3c8e1a Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 5 Jul 2023 12:13:53 -0600 Subject: [PATCH 20/33] add globus transfer directions --- README.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5e212b0..1ee2c9d 100644 --- a/README.md +++ b/README.md @@ -223,9 +223,9 @@ _Diagram showing external data storage being used to send or receive data on Alp Data may be sent to or gathered from Alpine using a number of different methods. These may vary contingent on the external data storage being referenced, the code involved, or your group's available resources. -Please reference the following documentation from the University of Colorado Boulder's Research Computing regarding data transfers. - -- __The Compute Environment - Data Transfer:__ [https://curc.readthedocs.io/en/latest/compute/data-transfer.html](https://curc.readthedocs.io/en/latest/compute/data-transfer.html) +Please reference the following documentation from the University of Colorado Boulder's Research Computing regarding data transfers: [The Compute Environment - Data Transfer](https://curc.readthedocs.io/en/latest/compute/data-transfer.html). +__Please note:__ due to the authentication configuration of Alpine many local or SSH-key based methods are not available for CU Anschutz users. +As a result, [Globus](https://www.globus.org/) represents one of the best options available (see [3. šŸ“‚ Transfer data results](#3-šŸ“‚-transfer-data-results) below). ## Implementation @@ -270,8 +270,7 @@ style local_storage fill:#D1FAE5,stroke:#444444; _Diagram showing how this repository may be used within Alpine through primary steps and processing workflow._ -This section will cover how Alpine may be used with this repository to run example Python code. -Generally, we'll cover this in two primary steps: [0. Gain Alpine access](#0-gain-alpine-access), [1. preparation](#1-preparation) and [2. implementation](#2-implementation). +Use the following steps to understand how Alpine may be used with this repository to run example Python code. ### 0. šŸ”‘ Gain Alpine access @@ -292,7 +291,7 @@ Cloning into 'example-hpc-alpine-python'... ... ls output ... ``` -_An example of what this section might look like in your Alpine terminal session._ +_An example of what this preparation section might look like in your Alpine terminal session._ Next we need to prepare our code within Alpine. We do this to balance the fact that we may develop and source control code outside of Alpine and needing to periodically synchronize it with updates. @@ -320,13 +319,77 @@ Below you'll find the general steps associated with this process. ### 2. āš™ļø Implement code on Alpine +```shell +[username@xsede.org@login-ciX ~]$ sbatch --export=CSV_FILEPATH="/projects/$USER/example_data.csv" example-hpc-alpine-python/run_script.sh +[username@xsede.org@login-ciX username@xsede.org]$ tail -f example-hpc-alpine-python.out +... tail output (ctrl/cmd + c to cancel) ... +[username@xsede.org@login-ciX username@xsede.org]$ head -n 2 example_data.csvexample-hpc-alpine-python +... data output ... +``` + +_An example of what this implementation section might look like in your Alpine terminal session._ + After our code is available on Alpine we're ready to run it using Slurm and related resources. We use Anaconda to build a Python environment with specified packages for reproducibility. The main goal of the Python code related to this work is to create a CSV file with random data at a specified location. We'll use [Slurm's `sbatch` command](https://slurm.schedmd.com/sbatch.html), which submits batch scripts to Slurm using various options. -1. Use the `sbatch` command with exported variable `CSV_FILEPATH`.
`sbatch --export=CSV_FILEPATH="/projects/$USER/example_data.csv" run_script.sh` +1. Use the `sbatch` command with exported variable `CSV_FILEPATH`.
`sbatch --export=CSV_FILEPATH="/projects/$USER/example_data.csv" example-hpc-alpine-python/run_script.sh` 1. After a short moment, use the [`tail`]() command to observe the log file created by Slurm for this sbatch submission. This file can help you understand where things are at and if anything went wrong.
`tail -f example-hpc-alpine-python.out` 1. Once you see that the work has completed from the log file, take a look at the top 2 lines of the data file using the [`head`]() command to verify the data arrived as expected (column names with random values):
`head -n 2 example_data.csv` ### 3. šŸ“‚ Transfer data results + +```mermaid +flowchart LR + subgraph alpine["šŸ–„ļø Alpine"] + local_storage["šŸ“„ /projects/$USER/example_data.csv"] + end + subgraph globus["ā˜ļø Globus"] + globus_web["šŸ” Globus web interface"] + end + subgraph local_machine["šŸ–„ļø Local device"] + personal_connect["šŸ” Globus Connect Personal"] + local_dir["šŸ“„ /a_local_dir/example_data.csv"] + end + + local_storage --> | moves data\nfrom Alpine | globus_web + globus_web --> | interface \n from Globus | personal_connect + personal_connect --> | downloads \n local file | local_dir + +style alpine fill:#ffffff,stroke:#444444; +style globus fill:#ffffff,stroke:#444444; +style local_machine fill:#ffffff,stroke:#444444; +``` + +_Diagram showing how example_data.csv may be transferred from Alpine to a local machine using Globus solutions._ + +Now that the example data output from the Slurm work is available we need to transfer that data to a local system for further use. +In this example we'll use [Globus](https://www.globus.org/) as a data transfer method from Alpine to our local machine. +__Please note:__ always be sure to check data privacy and policy which change the methods or storage locations you may use for your data! + +1. __Globus local machine configuration__ + 1. Install [Globus Connect Personal](https://www.globus.org/globus-connect-personal) on your local machine. + 1. During installation, you will be prompted to login to Globus. Use your ACCESS credentials to login. + 1. During installation login, note the label you provide to Globus. This will be used later, referenced as "Globus Connect Personal label". + 1. Ensure you add and (__importantly:__) provide write access to a local directory via __Globus Connect Personal - Preferences - Access__ where you'd like the data to be received from Alpine to your local machine.

+1. __Globus web interface__ + 1. Use your ACCESS credentials to login to the [Globus web interface](https://app.globus.org/login). + 1. __Configure File Manager left side (source selection)__ + 1. Within the Globus web interface on the File Manager tab, use the __Collection__ input box to search or select __"CU Boulder Research Computing ACCESS"__. + 1. Within the Globus web interface on the File Manager tab, use the __Path__ input box to enter: `/projects/your_username_here/` (replacing "your_username_here" with your username from Alpine, including the "@" symbol if it applies). + 1. __Configure File Manager right side (destination selection)__ + 1. Within the Globus web interface on the File Manager tab, use the __Collection__ input box to search or select the __Globus Connect Personal label you provided in earlier steps. + 1. Within the Globus web interface on the File Manager tab, use the __Path__ input box to enter the local path which you made accessible in earlier steps. + 1. __Begin Globus transfer__ + 1. Within the Globus web interface on the File Manager tab on the left side (source selection), check the box next to the file `example_data.csv`. + 1. Within the Globus web interface on the File Manager tab on the left side (source selection), click the "Start ā–¶ļø" button to begin the transfer from Alpine to your local directory. + 1. After clicking the "Start ā–¶ļø" button, you may see a message in the top right with the message "Transfer request submitted successfully". You can click the link to view the details associated with the transfer. + 1. After a short period, the file will be transferred and you should be able to verify the contents on your local machine. + +## Further References + +- [University of Colorado Boulder's Research Computing](https://www.colorado.edu/rc/) +- [HPC Cluster Alpine Documentation](https://curc.readthedocs.io/en/latest/clusters/alpine/index.html) +- [Slurm Documentation](https://slurm.schedmd.com/) +- [Globus Documentation](https://docs.globus.org/) From 28409bb7742b93406147fa0c15bb071207eb778f Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 5 Jul 2023 12:15:23 -0600 Subject: [PATCH 21/33] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1ee2c9d..509423e 100644 --- a/README.md +++ b/README.md @@ -391,5 +391,6 @@ __Please note:__ always be sure to check data privacy and policy which change th - [University of Colorado Boulder's Research Computing](https://www.colorado.edu/rc/) - [HPC Cluster Alpine Documentation](https://curc.readthedocs.io/en/latest/clusters/alpine/index.html) +- [Github: Getting started with Git](https://docs.github.com/en/get-started/getting-started-with-git) - [Slurm Documentation](https://slurm.schedmd.com/) - [Globus Documentation](https://docs.globus.org/) From 4569e707761ef1f439c2bbf5705fb813928e0078 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 5 Jul 2023 12:16:34 -0600 Subject: [PATCH 22/33] spacing --- README.md | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 509423e..4b5cd76 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,6 @@ flowchart LR repo --> | process on | alpine - style conda_env fill:#FEF3C7,stroke:#D97706; style repo fill:#ffffff,stroke:#444444; style alpine fill:#ffffff,stroke:#444444; @@ -225,7 +224,7 @@ Data may be sent to or gathered from Alpine using a number of different methods. These may vary contingent on the external data storage being referenced, the code involved, or your group's available resources. Please reference the following documentation from the University of Colorado Boulder's Research Computing regarding data transfers: [The Compute Environment - Data Transfer](https://curc.readthedocs.io/en/latest/compute/data-transfer.html). __Please note:__ due to the authentication configuration of Alpine many local or SSH-key based methods are not available for CU Anschutz users. -As a result, [Globus](https://www.globus.org/) represents one of the best options available (see [3. šŸ“‚ Transfer data results](#3-šŸ“‚-transfer-data-results) below). +As a result, [Globus](https://www.globus.org/) represents one of the best options available (see [3. šŸ“‚ Transfer data results](#3-%F0%9F%93%82-transfer-data-results) below). ## Implementation @@ -369,23 +368,23 @@ In this example we'll use [Globus](https://www.globus.org/) as a data transfer m __Please note:__ always be sure to check data privacy and policy which change the methods or storage locations you may use for your data! 1. __Globus local machine configuration__ - 1. Install [Globus Connect Personal](https://www.globus.org/globus-connect-personal) on your local machine. - 1. During installation, you will be prompted to login to Globus. Use your ACCESS credentials to login. - 1. During installation login, note the label you provide to Globus. This will be used later, referenced as "Globus Connect Personal label". - 1. Ensure you add and (__importantly:__) provide write access to a local directory via __Globus Connect Personal - Preferences - Access__ where you'd like the data to be received from Alpine to your local machine.

+ 1. Install [Globus Connect Personal](https://www.globus.org/globus-connect-personal) on your local machine. + 1. During installation, you will be prompted to login to Globus. Use your ACCESS credentials to login. + 1. During installation login, note the label you provide to Globus. This will be used later, referenced as "Globus Connect Personal label". + 1. Ensure you add and (__importantly:__) provide write access to a local directory via __Globus Connect Personal - Preferences - Access__ where you'd like the data to be received from Alpine to your local machine.

1. __Globus web interface__ - 1. Use your ACCESS credentials to login to the [Globus web interface](https://app.globus.org/login). - 1. __Configure File Manager left side (source selection)__ - 1. Within the Globus web interface on the File Manager tab, use the __Collection__ input box to search or select __"CU Boulder Research Computing ACCESS"__. - 1. Within the Globus web interface on the File Manager tab, use the __Path__ input box to enter: `/projects/your_username_here/` (replacing "your_username_here" with your username from Alpine, including the "@" symbol if it applies). - 1. __Configure File Manager right side (destination selection)__ - 1. Within the Globus web interface on the File Manager tab, use the __Collection__ input box to search or select the __Globus Connect Personal label you provided in earlier steps. - 1. Within the Globus web interface on the File Manager tab, use the __Path__ input box to enter the local path which you made accessible in earlier steps. - 1. __Begin Globus transfer__ - 1. Within the Globus web interface on the File Manager tab on the left side (source selection), check the box next to the file `example_data.csv`. - 1. Within the Globus web interface on the File Manager tab on the left side (source selection), click the "Start ā–¶ļø" button to begin the transfer from Alpine to your local directory. - 1. After clicking the "Start ā–¶ļø" button, you may see a message in the top right with the message "Transfer request submitted successfully". You can click the link to view the details associated with the transfer. - 1. After a short period, the file will be transferred and you should be able to verify the contents on your local machine. + 1. Use your ACCESS credentials to login to the [Globus web interface](https://app.globus.org/login). + 1. __Configure File Manager left side (source selection)__ + 1. Within the Globus web interface on the File Manager tab, use the __Collection__ input box to search or select __"CU Boulder Research Computing ACCESS"__. + 1. Within the Globus web interface on the File Manager tab, use the __Path__ input box to enter: `/projects/your_username_here/` (replacing "your_username_here" with your username from Alpine, including the "@" symbol if it applies). + 1. __Configure File Manager right side (destination selection)__ + 1. Within the Globus web interface on the File Manager tab, use the __Collection__ input box to search or select the \_\_Globus Connect Personal label you provided in earlier steps. + 1. Within the Globus web interface on the File Manager tab, use the __Path__ input box to enter the local path which you made accessible in earlier steps. + 1. __Begin Globus transfer__ + 1. Within the Globus web interface on the File Manager tab on the left side (source selection), check the box next to the file `example_data.csv`. + 1. Within the Globus web interface on the File Manager tab on the left side (source selection), click the "Start ā–¶ļø" button to begin the transfer from Alpine to your local directory. + 1. After clicking the "Start ā–¶ļø" button, you may see a message in the top right with the message "Transfer request submitted successfully". You can click the link to view the details associated with the transfer. + 1. After a short period, the file will be transferred and you should be able to verify the contents on your local machine. ## Further References From f96786efa708432f378c25050ee8b5b93d84ed2e Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:18:39 -0600 Subject: [PATCH 23/33] more descriptive text Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4b5cd76..fa4449f 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Reasons for using Alpine might include: - __Compute resources:__ Leveraging otherwise cost-prohibitive amounts of memory, CPU, GPU, etc. for processing data. - __Long-running jobs:__ Completing long-running processes which may take hours or days to complete. -- __Collaborations:__ Sharing a single implementation environment for reproducibility within a group (avoiding "works on my machine"). +- __Collaborations:__ Sharing a single implementation environment for reproducibility within a group (avoiding "works on my machine" inconsistency issues). ### How does Alpine work? From 746012719c3ee2d77ec09e0ccb8f221f32a319c9 Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:21:59 -0600 Subject: [PATCH 24/33] appropriate product naming capitalizations Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index fa4449f..cdb2cff 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ These may be distinguished in two ways: 1. __External / remote storage:__ Users are encouraged to explore external data storage options for long-term hosting.
Examples may include the following: - - __[Petalibrary](https://www.colorado.edu/rc/resources/petalibrary)__: subsidized external storage host from University of Colorado Boulder's Research Computing (requires specific arrangements outside of Alpine). + - __[PetaLibrary](https://www.colorado.edu/rc/resources/petalibrary)__: subsidized external storage host from University of Colorado Boulder's Research Computing (requires specific arrangements outside of Alpine). - __Cloud hosting:__ [object storage](https://en.wikipedia.org/wiki/Object_storage) and related data hosting options from cloud providers like [Microsoft Azure](https://azure.microsoft.com/en-us), [Google Cloud](https://cloud.google.com/) ([internal CU Anschutz GC information](https://www.cuanschutz.edu/offices/office-of-information-technology/tools-services/google-cloud-platform)), or [Amazon Web Services](https://aws.amazon.com/). - __Others:__ additional options include third-party "storage as a service" offerings like Google Drive or Dropbox and/or external servers maintained by other groups. @@ -294,7 +294,7 @@ _An example of what this preparation section might look like in your Alpine term Next we need to prepare our code within Alpine. We do this to balance the fact that we may develop and source control code outside of Alpine and needing to periodically synchronize it with updates. -In the case of this example work, we assume git as an interface for Github as the source control host. +In the case of this example work, we assume git as an interface for GitHub as the source control host. Below you'll find the general steps associated with this process. @@ -307,14 +307,14 @@ Below you'll find the general steps associated with this process. -> ā„¹ļø __What if I need to authenticate with Github?__ +> ā„¹ļø __What if I need to authenticate with GitHub?__ > -> There are times where you may need to authenticate with Github in order to accomplish your work. -> From a Github perspective, you will want to use either Github Personal Access Tokens (PAT) (recommended by Github) or SSH keys associated with the `git` client on Alpine. -> Note: if you are prompted for a username and password from `git` when accessing a Github resource, the password is now associated with other keys like PAT's instead of your user's password ([reference](https://github.blog/changelog/2021-08-12-git-password-authentication-is-shutting-down)). -> See the following guide from Github for more information on how authentication through `git` to Github works: +> There are times where you may need to authenticate with GitHub in order to accomplish your work. +> From a GitHub perspective, you will want to use either GitHub Personal Access Tokens (PAT) (recommended by GitHub) or SSH keys associated with the `git` client on Alpine. +> Note: if you are prompted for a username and password from `git` when accessing a GitHub resource, the password is now associated with other keys like PAT's instead of your user's password ([reference](https://github.blog/changelog/2021-08-12-git-password-authentication-is-shutting-down)). +> See the following guide from GitHub for more information on how authentication through `git` to GitHub works: > -> - __Github - Authenticating with GitHub from Git:__ [https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git](https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git) +> - __GitHub - Authenticating with GitHub from Git:__ [https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git](https://docs.github.com/en/get-started/quickstart/set-up-git#authenticating-with-github-from-git) ### 2. āš™ļø Implement code on Alpine @@ -390,6 +390,6 @@ __Please note:__ always be sure to check data privacy and policy which change th - [University of Colorado Boulder's Research Computing](https://www.colorado.edu/rc/) - [HPC Cluster Alpine Documentation](https://curc.readthedocs.io/en/latest/clusters/alpine/index.html) -- [Github: Getting started with Git](https://docs.github.com/en/get-started/getting-started-with-git) +- [GitHub: Getting started with Git](https://docs.github.com/en/get-started/getting-started-with-git) - [Slurm Documentation](https://slurm.schedmd.com/) - [Globus Documentation](https://docs.globus.org/) From b3ada82399301e68021b7b2717dce1b903dc7592 Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:25:25 -0600 Subject: [PATCH 25/33] simplifying code preparation description Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cdb2cff..89a7882 100644 --- a/README.md +++ b/README.md @@ -292,8 +292,8 @@ Cloning into 'example-hpc-alpine-python'... _An example of what this preparation section might look like in your Alpine terminal session._ -Next we need to prepare our code within Alpine. -We do this to balance the fact that we may develop and source control code outside of Alpine and needing to periodically synchronize it with updates. +Next we will prepare our code within Alpine. +We do this to balance the fact that we may develop and source control code outside of Alpine. In the case of this example work, we assume git as an interface for GitHub as the source control host. Below you'll find the general steps associated with this process. From eaa5cfed156b5b5c7554a4de033be7d1e772a200 Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:29:22 -0600 Subject: [PATCH 26/33] more descriptive slurm directive documentation Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- run_script.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/run_script.sh b/run_script.sh index 593466b..758a592 100644 --- a/run_script.sh +++ b/run_script.sh @@ -19,8 +19,10 @@ ######################################################## # Slurm directives: # ------------------- -# Below are configurations for Slurm, letting it know -# what and how you'd like to use resources on Alpine. +# Below are configurations for Slurm that specify +# which resources you'd like to use and how you'd like +# to use them on Alpine. +# # Generally documentation on these may be found here: # https://slurm.schedmd.com/sbatch.html ######################################################## From 5e39224947f340c4c3d78e464ff973e96e00fef5 Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:31:02 -0600 Subject: [PATCH 27/33] write instead of send Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- run_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_script.sh b/run_script.sh index 758a592..f2b0c3b 100644 --- a/run_script.sh +++ b/run_script.sh @@ -37,7 +37,7 @@ #SBATCH --job-name=example-hpc-alpine-python # Tells Slurm to gather standard output from running this -# file and send to a specific file. +# file and write it to a specific file. # Special variable symbols may be used here: # %j - job ID # %a - job array index From 95659dbfc53677a8cdf8600ec004444260573add Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:34:18 -0600 Subject: [PATCH 28/33] simplify run script python docs Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- run_script.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/run_script.sh b/run_script.sh index f2b0c3b..62762de 100644 --- a/run_script.sh +++ b/run_script.sh @@ -108,11 +108,12 @@ conda activate example_env # code to perform the work we'd like to accomplish. ######################################################## -# run the python file example.py which takes an argument -# as a filepath for exporting data which we pass in here -# from an sbatch exported variable name which is -# received as a named argparse variable within Python -# using the same name. +# run the python file example.py which takes an +# argparse argument for use within python processing. +# +# note: $CSV_FILEPATH is received as an +# sbatch exported variable and sent to python using +# the same name. python code/example.py --CSV_FILENAME=$CSV_FILEPATH ######################################################## From 400eba845c55d7b615edc3346477e8220ca1de0c Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:39:51 -0600 Subject: [PATCH 29/33] capitalizations and sentence endings Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- run_script.sh | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/run_script.sh b/run_script.sh index 62762de..31aa3b5 100644 --- a/run_script.sh +++ b/run_script.sh @@ -64,8 +64,8 @@ # Sets an email address to receive notifications from Alpine #SBATCH --mail-user=your-email-address-here@cuanschutz.edu -# Indicate which notifications you'd like to receive from Alpine -# this can also be set to START, END, or FAIL. +# Indicate which notifications you'd like to receive from Alpine. +# This can also be set to START, END, or FAIL. #SBATCH --mail-type=ALL ######################################################## @@ -75,12 +75,12 @@ # software which is pre-loaded on Alpine. ######################################################## -# unloads all existing modules which may have been previously loaded +# Unloads all existing modules which may have been previously loaded. module purge -# use module package to load anaconda software so it may +# Use module package to load Anaconda software so it may # be used by your processes. -# note: the numbers found after anaconda/####.## are subject +# Note: the numbers found after anaconda/####.## are subject # to change depending on the versions installed by administrators. module load anaconda/2022.10 @@ -91,14 +91,14 @@ module load anaconda/2022.10 # for running the Python code below. ######################################################## -# remove any existing environments that happen to have +# Remove any existing environments that happen to have # the same exact name. conda env remove --name example_env -y -# next create the environment from the yaml file +# Next create the environment from the yaml file. conda env create -f environment.yml -# then activate the environment +# Then activate the environment. conda activate example_env ######################################################## @@ -108,11 +108,11 @@ conda activate example_env # code to perform the work we'd like to accomplish. ######################################################## -# run the python file example.py which takes an -# argparse argument for use within python processing. +# Run the Python file example.py which takes an +# argparse argument for use within Python processing. # -# note: $CSV_FILEPATH is received as an -# sbatch exported variable and sent to python using +# Note: $CSV_FILEPATH is received as an +# sbatch exported variable and sent to Python using # the same name. python code/example.py --CSV_FILENAME=$CSV_FILEPATH From 6e582d77a8483f479d1b8dec539a61b2a8e75e1b Mon Sep 17 00:00:00 2001 From: d33bs Date: Thu, 6 Jul 2023 12:41:23 -0600 Subject: [PATCH 30/33] docstring sentence Co-Authored-By: Vincent Rubinetti <8326331+vincerubinetti@users.noreply.github.com> --- code/example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/example.py b/code/example.py index 66fc807..7fd0f49 100644 --- a/code/example.py +++ b/code/example.py @@ -1,6 +1,6 @@ """ -An example Python file which creates random data and exports it to a location specified -by way of a +An example Python file which creates random data and exports +it to a location specified in a command line argument. """ import argparse import logging From 839c1e49db01f38b89a6c2dc5c6df5155c621a32 Mon Sep 17 00:00:00 2001 From: Dave Bunten Date: Fri, 7 Jul 2023 07:13:39 -0600 Subject: [PATCH 31/33] Apply suggestions from code review Co-authored-by: Faisal Alquaddoomi --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 89a7882..50c4d92 100644 --- a/README.md +++ b/README.md @@ -109,17 +109,21 @@ style slurm fill:#F0F9FF,stroke:#075985; _Diagram showing high-level user workflow and Alpine components._ Alpine's compute resources are used through compute nodes in a system called [Slurm](https://github.com/SchedMD/slurm). -Slurm helps coordinate shared and configurable access to the compute resources. +Slurm is a system that a large number of users to run jobs on a cluster of computers; the system figures out how to use all the computers in the cluster to execute all the user's jobs fairly (i.e., giving each user approximately equal time and resources on the cluster). A *job* is a request to run something, e.g. a bash script or a program, along with specifications about how much RAM and CPU it needs, how long it can run, and how it should be executed. + +Slurm's role in general is to take in a job (submitted via the `sbatch` command) and put it into a *queue* (also called a "partition" in Slurm). For each job in the queue, Slurm constantly tries to find a computer in the cluster with enough resources to run that job, then when an available computer is found runs the program the job specifies on that computer. As the program runs, Slurm records its output to files and finally reports the program's exit status (either completed or failed) back to the job manager. + +Importantly, jobs can either be marked as *interactive* or *batch*. When you submit an interactive job, `sbatch` will pause while waiting for the job to start and then connect you to the program, so you can see its output and enter commands in real time. On the other hand, a *batch* job will return immediately; you can see the progress of your job using `squeue`, and you can typically see the output of the job in the folder from which you ran `sbatch` unless you specify otherwise. Data for or from Slurm work may be stored temporarily on local storage or on user-specific external (remote) storage. > ā„¹ļø __Wait, what are "nodes"?__ > > A simplified way to understand the architecture of Slurm on Alpine is through login and compute "nodes" (computers). -> Login nodes act as a way to prepare and submit processes which will be completed on compute nodes. +> Login nodes act as a place to prepare and submit jobs which will be completed on compute nodes. Login nodes are never used to execute Slurm jobs, whereas compute nodes are exclusively accessed via a job. > Login nodes have limited resource access and are not recommended for running procedures. One can interact with Slurm on Alpine by use of [Slurm interfaces and directives](https://curc.readthedocs.io/en/latest/clusters/alpine/examples.html). -A quick way of accessing Alpine resources is through the use of the `acompile` command, which references a script with common Slurm configurations. +A quick way of accessing Alpine resources is through the use of the `acompile` command, which starts an interactive job on a compute node with some typical default parameters for the job. Since `acompile` requests very modest resources (1 hour and 1 CPU core at the time of writing), you'll typically quickly be connected to a compute node. For more intensive or long-lived interactive jobs, consider using `sinteractive`, which allows for more customization [Interactive Jobs](https://curc.readthedocs.io/en/latest/running-jobs/interactive-jobs.html). One can also access Slurm directly through [various commands](https://slurm.schedmd.com/quickstart.html#commands) on Alpine. Many common software packages are available through the [Modules package](https://github.com/cea-hpc/modules) on Alpine ([UCB RC documentation: The Modules System](https://curc.readthedocs.io/en/latest/compute/modules.html)). @@ -151,7 +155,7 @@ _Diagram showing how Slurm generally works._ Using Alpine effectively involves knowing how to leverage Slurm. A simplified way to understand how Slurm works is through the following sequence. -Please note that some steps and additional complexity are obscured for the purposes of providing a basis of understanding. +Please note that some steps and additional complexity are omitted for the purposes of providing a basis of understanding. 1. __Create a job script:__ build a script which will configure and run procedures related to the work you seek to accomplish on the HPC cluster. 1. __Submit job to Slurm:__ ask Slurm to run a set of commands or procedures. @@ -224,7 +228,7 @@ Data may be sent to or gathered from Alpine using a number of different methods. These may vary contingent on the external data storage being referenced, the code involved, or your group's available resources. Please reference the following documentation from the University of Colorado Boulder's Research Computing regarding data transfers: [The Compute Environment - Data Transfer](https://curc.readthedocs.io/en/latest/compute/data-transfer.html). __Please note:__ due to the authentication configuration of Alpine many local or SSH-key based methods are not available for CU Anschutz users. -As a result, [Globus](https://www.globus.org/) represents one of the best options available (see [3. šŸ“‚ Transfer data results](#3-%F0%9F%93%82-transfer-data-results) below). +As a result, [Globus](https://www.globus.org/) represents one of the best options available (see [3. šŸ“‚ Transfer data results](#3-%F0%9F%93%82-transfer-data-results) below). While the Globus tutorial in this document describes how you can download data from Alpine to your computer, note that you can also use Globus to transfer data to Alpine from your computer. ## Implementation From 5c1a8fec516d7ec195418eac37782a9271c60a0c Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 7 Jul 2023 07:17:07 -0600 Subject: [PATCH 32/33] linting --- .pre-commit-config.yaml | 2 +- code/example.py | 2 +- run_script.sh | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1ab3852..626a887 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.4.0 + rev: v1.4.1 hooks: - id: mypy - repo: https://github.com/PyCQA/pylint diff --git a/code/example.py b/code/example.py index 7fd0f49..cb6c1b1 100644 --- a/code/example.py +++ b/code/example.py @@ -1,5 +1,5 @@ """ -An example Python file which creates random data and exports +An example Python file which creates random data and exports it to a location specified in a command line argument. """ import argparse diff --git a/run_script.sh b/run_script.sh index 31aa3b5..ee9a607 100644 --- a/run_script.sh +++ b/run_script.sh @@ -19,7 +19,7 @@ ######################################################## # Slurm directives: # ------------------- -# Below are configurations for Slurm that specify +# Below are configurations for Slurm that specify # which resources you'd like to use and how you'd like # to use them on Alpine. # @@ -112,7 +112,7 @@ conda activate example_env # argparse argument for use within Python processing. # # Note: $CSV_FILEPATH is received as an -# sbatch exported variable and sent to Python using +# sbatch exported variable and sent to Python using # the same name. python code/example.py --CSV_FILENAME=$CSV_FILEPATH From 5c932d3c050ed9c93123d7d4a0cdf0d66130ba57 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 7 Jul 2023 07:25:40 -0600 Subject: [PATCH 33/33] add authorship team / department links --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 50c4d92..762c098 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ This repository is intended to help demonstrate the use of Python on [Alpine](ht We use Python here by way of [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) environment management to run code on Alpine. This readme will cover a background on the technologies and how to use the contents of this repository as though it were a project you were working on and wanting to run on Alpine. +Content here was developed by the [Software Engineering Team (SET)](https://cu-dbmi.github.io/set-website/) in the [Department of Biomedical Informatics (DBMI)](https://medschool.cuanschutz.edu/dbmi) with the [University of Colorado Anschutz School of Medicine](https://medschool.cuanschutz.edu/). + ## Table of Contents 1. [__Backround:__](#background) here we cover the background of Alpine and related technologies. @@ -109,11 +111,11 @@ style slurm fill:#F0F9FF,stroke:#075985; _Diagram showing high-level user workflow and Alpine components._ Alpine's compute resources are used through compute nodes in a system called [Slurm](https://github.com/SchedMD/slurm). -Slurm is a system that a large number of users to run jobs on a cluster of computers; the system figures out how to use all the computers in the cluster to execute all the user's jobs fairly (i.e., giving each user approximately equal time and resources on the cluster). A *job* is a request to run something, e.g. a bash script or a program, along with specifications about how much RAM and CPU it needs, how long it can run, and how it should be executed. +Slurm is a system that a large number of users to run jobs on a cluster of computers; the system figures out how to use all the computers in the cluster to execute all the user's jobs fairly (i.e., giving each user approximately equal time and resources on the cluster). A _job_ is a request to run something, e.g. a bash script or a program, along with specifications about how much RAM and CPU it needs, how long it can run, and how it should be executed. -Slurm's role in general is to take in a job (submitted via the `sbatch` command) and put it into a *queue* (also called a "partition" in Slurm). For each job in the queue, Slurm constantly tries to find a computer in the cluster with enough resources to run that job, then when an available computer is found runs the program the job specifies on that computer. As the program runs, Slurm records its output to files and finally reports the program's exit status (either completed or failed) back to the job manager. +Slurm's role in general is to take in a job (submitted via the `sbatch` command) and put it into a _queue_ (also called a "partition" in Slurm). For each job in the queue, Slurm constantly tries to find a computer in the cluster with enough resources to run that job, then when an available computer is found runs the program the job specifies on that computer. As the program runs, Slurm records its output to files and finally reports the program's exit status (either completed or failed) back to the job manager. -Importantly, jobs can either be marked as *interactive* or *batch*. When you submit an interactive job, `sbatch` will pause while waiting for the job to start and then connect you to the program, so you can see its output and enter commands in real time. On the other hand, a *batch* job will return immediately; you can see the progress of your job using `squeue`, and you can typically see the output of the job in the folder from which you ran `sbatch` unless you specify otherwise. +Importantly, jobs can either be marked as _interactive_ or _batch_. When you submit an interactive job, `sbatch` will pause while waiting for the job to start and then connect you to the program, so you can see its output and enter commands in real time. On the other hand, a _batch_ job will return immediately; you can see the progress of your job using `squeue`, and you can typically see the output of the job in the folder from which you ran `sbatch` unless you specify otherwise. Data for or from Slurm work may be stored temporarily on local storage or on user-specific external (remote) storage. > ā„¹ļø __Wait, what are "nodes"?__