diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..990d48570 --- /dev/null +++ b/404.html @@ -0,0 +1,4768 @@ + + + + + + + + + + + + + + + + + + + NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ +

404 - Not found

+ +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Matlab/batch/index.html b/Documentation/Applications/Matlab/batch/index.html new file mode 100644 index 000000000..5e44d4c1a --- /dev/null +++ b/Documentation/Applications/Matlab/batch/index.html @@ -0,0 +1,4979 @@ + + + + + + + + + + + + + + + + + + + + + + + Batch Mode - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Running MATLAB in Batch Mode#

+

Learn how to run MATLAB software in batch mode

+

Below is an example MATLAB script, matlabTest.m, that creates and populates a +vector using a simple for-loop and writes the result to a binary file, +x.dat. The shell script matlabTest.sb can be passed to the scheduler to run the +job in batch (non-interactive) mode.

+

To try the example out, create both matlabTest.sb and matlabTest.m files in an +appropriate directory, cd to that directory, and call sbatch:

+
$ sbatch matlabTest.sb
+
+
+

Note

+

Note: MATLAB comprises many independently licensed components, and in your work +it might be necessary to wait for multiple components to become +available. Currently, the scheduler does not handle this automatically. Because +of this, we strongly recommend using compiled MATLAB code for batch processing.

+
+

Calling squeue should show that your job is queued:

+
JOBID       PARTITION       NAME       USER       ST       TIME       NODES       NODELIST(REASON)
+<JobID>     <partition>     matlabTe   username   PD       0:00       1           (<reason>)
+
+

Once the job has finished, the standard output is saved in a file called +slurm-<JobID>.out, standard error to slurm-<JobID>.out, and the binary file +x.dat contains the result of the MATLAB script.

+

Notes on matlabTest.sb File#

+
    +
  • Setting a low walltime increases the chances that the job will be scheduled + sooner due to backfill.
  • +
  • The --account=<account_string> flag must include a valid account string or + the job will encounter a permanent hold (it will appear in the queue but will + never run). For more information, see user + accounts.
  • +
  • The environment variable $SLURM_SUBMIT_DIR is set by the scheduler to the + directory from which the sbatch command was executed, e.g., /scratch/$USER. + In this example, it is also the directory into which MATLAB will write the + output file x.dat.
  • +
+

matlabTest.sb

+
#!/bin/bash
+#SBATCH --time=05:00                   # Maximum time requested for job (5 min.)
+#SBATCH --nodes=1                      # Number of nodes
+#SBATCH --job-name=matlabTest          # Name of job
+#SBATCH --account=<your_account>       # account associated with job
+
+module load matlab
+
+# execute code
+cd $SLURM_SUBMIT_DIR                   # Change directories (output will save here)
+matlab -nodisplay -r matlabTest        # Run the MATLAB script
+
+

matlabTest.m

+
format long
+xmin = 2;
+xmax = 10;
+x = zeros(xmax-xmin+1,1);
+for i = xmin:xmax
+    display(i);
+    x(i-xmin+1) = i
+end
+savefile = 'x.dat';
+save(savefile,'x','-ASCII')
+exit
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Matlab/index.html b/Documentation/Applications/Matlab/index.html new file mode 100644 index 000000000..9bd42b0b0 --- /dev/null +++ b/Documentation/Applications/Matlab/index.html @@ -0,0 +1,4922 @@ + + + + + + + + + + + + + + + + + + + + + + + Using MATLAB Software - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Using MATLAB Software#

+

Learn how to use MATLAB software on the NREL HPC systems.

+

Running MATLAB in Batch Mode#

+

Details on how to run MATLAB scripts in batch mode. Steps are illustrated by a +simple example.

+

Running MATLAB Interactively#

+

How to run interactively using either a terminal or FastX.

+

Using the Parallel Computing Toolbox#

+

Toolbox used to run parallel MATLAB code on a single, multi-core compute +node. Use of the toolbox is demonstrated via a parallel "hello world" example +and a Monte Carlo example that leverages MATLAB's parfor command.

+

Understanding Versions and Licenses#

+

Learn about the MATLAB software versions and licenses available for use.

+

Additional Resources#

+

If you're an NREL user, on GitHub view MATLAB presentations and code +examples.

+

For all users, see a summary PowerPoint deck on the MATLAB Compiler, MATLAB +Coder, and MATLAB Engine for +Python.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Matlab/interactive/index.html b/Documentation/Applications/Matlab/interactive/index.html new file mode 100644 index 000000000..6ac56aee4 --- /dev/null +++ b/Documentation/Applications/Matlab/interactive/index.html @@ -0,0 +1,4978 @@ + + + + + + + + + + + + + + + + + + + + + + + Interactive Mode - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Running MATLAB Software Interactively#

+

Learn how to run MATLAB software interactively on NREL HPC systems.

+

To run MATLAB interactively there are two ways to proceed: you can choose to start an +interactive job and use a basic MATLAB terminal (no GUI), or you can use the GUI +with a FastX session on a DAV +node. For information on how to connect to NREL HPC systems, see System +Connection.

+

Running MATLAB via an Interactive Job#

+

After connecting to the login node, the next step is to start an interactive job. For example, the following command gets a user-selected number of nodes for interactive use, taking as input tasks per node, job duration, and account.

+
$ salloc --nodes=<number of nodes> --ntasks-per-node=<tasks per node> --account=<your account here> --time=<desired time>
+
+

When your job starts, you will have a shell on a compute node.

+
+

Note

+
    +
  1. To submit an interactive job you must include the --account=<handle> flag + and include a valid project allocation handle. For more information, see + User Accounts.
  2. +
  3. For more information on interactive jobs, see Running Interactive + Jobs.
  4. +
+
+

From the shell on the compute node, the next steps are to load the MATLAB module +to set up your user environment, which includes setting the location of the +license server,

+
$ module load matlab
+
+

and starting a simple MATLAB terminal (no GUI),

+
$ matlab -nodisplay
+
+

Running MATLAB via a FastX Session on a DAV Node#

+

For instructions on starting a FastX session on a DAV node, see the FastX +page. Once you have +started a FastX session and have access to a terminal, load the MATLAB module to +set up your user environment, which includes setting the location of the license +server,

+
$ module load matlab
+
+

and start the MATLAB GUI,

+
$ matlab &
+
+

With FastX, this will enable you to use the GUI as if MATLAB was running +directly on your laptop. The ampersand "&" lets MATLAB run as a background job +so the terminal is freed up for other uses.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Matlab/parallel/index.html b/Documentation/Applications/Matlab/parallel/index.html new file mode 100644 index 000000000..ee69e0f29 --- /dev/null +++ b/Documentation/Applications/Matlab/parallel/index.html @@ -0,0 +1,5200 @@ + + + + + + + + + + + + + + + + + + + + + + + Parallel Computing Toolbox - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Using the Parallel Computing Toolbox with MATLAB#

+

Learn how to use the Parallel Computing Toolbox (PCT) with MATLAB software on the NREL HPC systems.

+
+

Note

+

Due to an issue with the scheduler and software licenses, we strongly recommend +the use of compiled MATLAB code for batch processing. Using the PCT with MATLAB +in batch mode may lead to failed jobs due to unavailability of licenses.

+
+

PCT provides the simplest way for users to run parallel MATLAB code on a single, +multi-core compute node. Here, we describe how to configure your local MATLAB +settings to utilize the PCT and provide some basic examples of running parallel +code on NREL HPC systems.

+

For more extensive examples of PCT usage and code examples, see the MathWorks +documentation.

+

Configuration in MATLAB R2023a#

+

Configuration of the PCT is done most easily through the interactive +GUI. However, the opening of parallel pools can be significantly slower in +interactive mode than in non-interactive (batch) mode. For this reason, the +interactive GUI will only be used to set up your local configuration. Runtime +examples will include batch scripts that submit jobs directly to the scheduler.

+

To configure your local parallel settings, start an interactive MATLAB session +with X11 forwarding (see Running Interactive Jobs on +Kestrel and Environment +Modules on the Kestrel System). Open MATLAB +R2023a and do the following:

+
    +
  1. Under the Home tab, go to Parallel > Parallel Preferences.
  2. +
  3. In the Parallel Pool box, set the "Preferred number of workers in a parallel + pool" to at least 104 (the max number of cores currently available on a standard Kestrel + compute node).
  4. +
  5. Click OK.
  6. +
  7. Exit MATLAB.
  8. +
+

For various reasons, you might not have 104 workers available at runtime. In this +case, MATLAB will just use the largest number available.

+
+

Note

+

Specifying the number of tasks for an interactive job (i.e., using salloc +--ntasks-per-node=<n> to start your interactive job) will interfere with parallel +computing toolbox. We recommend not specifying the number of tasks.

+
+

Examples#

+

Here we demonstrate how to use the PCT on a single compute node on NREL HPC systems. Learn +how to open a local parallel pool with some examples of how to use it for +parallel computations. Because the opening of parallel pools can be extremely +slow in interactive sessions, the examples here will be restricted to +non-interactive (batch) job submission.

+
+

Note

+

Each example below will check out one "MATLAB" and one +"Distrib_Computing_Toolbox" license at runtime.

+
+

Hello World Example#

+

In this example, a parallel pool is opened and each worker identifies itself via +spmd ("single program +multiple data"). Create the MATLAB script helloWorld.m:

+
+MATLAB Hello World script +
% open the local cluster profile
+p = parcluster('Processes');
+
+% open the parallel pool, recording the time it takes
+tic;
+parpool(p); % open the pool
+fprintf('Opening the parallel pool took %g seconds.\n', toc)
+
+% "single program multiple data"
+spmd
+  fprintf('Worker %d says Hello World!\n', labindex)
+end
+
+delete(gcp); % close the parallel pool
+exit
+
+
+

To run the script on a compute node, create the file helloWorld.sb:

+
+Slurm batch script for Hello World +
#!/bin/bash
+#SBATCH --time=05:00
+#SBATCH --nodes=1
+#SBATCH --job-name=helloWorld
+#SBATCH --account=<account_string>
+
+# load modules
+module purge
+module load matlab/R2023a
+
+# define an environment variable for the MATLAB script and output
+BASE_MFILE_NAME=helloWorld
+MATLAB_OUTPUT=${BASE_MFILE_NAME}.out
+
+# execute code
+cd $SLURM_SUBMIT_DIR
+matlab -nodisplay -r $BASE_MFILE_NAME > $MATLAB_OUTPUT
+
+
+

where, again, the fields in < > must be properly specified. Finally, at the +terminal prompt, submit the job to the scheduler:

+
$ sbatch helloWorld.sb
+
+

The output file helloWorld.out should contain messages about the parallel pool +and a "Hello World" message from each of the available workers.

+

Example of Speed-Up Using Parfor#

+

MATLAB's parfor +("parallel for-loop") can be used to parallelize tasks that require no +communication between workers. In this example, the aim is to solve a stiff, +one-parameter system of ordinary differential equations (ODE) for different +(randomly sampled) values of the parameter and to compare the compute time when +using serial and parfor loops. This is a quintessential example of Monte Carlo +simulation that is suitable for parfor: the solution for each value of the +parameter is time-consuming to compute but can be computed independently of the +other values.

+

First, create a MATLAB function stiffODEfun.m that defines the right-hand side +of the ODE system:

+
+MATLAB code stiffODEfun.m +
function dy = stiffODEfun(t,y,c)
+  % This is a modified example from MATLAB's documentation at:
+  % http://www.mathworks.com/help/matlab/ref/ode15s.html
+  % The difference here is that the coefficient c is passed as an argument.
+    dy = zeros(2,1);
+    dy(1) = y(2);
+    dy(2) = c*(1 - y(1)^2)*y(2) - y(1);
+end
+
+
+

Second, create a driver file stiffODE.m that samples the input parameter and +solves the ODE using the ode15s function.

+
+MATLAB script stiffODE.m +
%{
+   This script samples a parameter of a stiff ODE and solves it both in
+   serial and parallel (via parfor), comparing both the run times and the
+   max absolute values of the computed solutions. The code -- especially the
+   serial part -- will take several minutes to run on Eagle.
+%}
+
+% open the local cluster profile
+p = parcluster('Processes');
+
+% open the parallel pool, recording the time it takes
+time_pool = tic;
+parpool(p);
+time_pool = toc(time_pool);
+fprintf('Opening the parallel pool took %g seconds.\n', time_pool)
+
+% create vector of random coefficients on the interval [975,1050]
+nsamples = 10000; % number of samples
+coef = 975 + 50*rand(nsamples,1); % randomly generated coefficients
+
+% compute solutions within serial loop
+time_ser = tic;
+y_ser = cell(nsamples,1); % cell to save the serial solutions
+for i = 1:nsamples
+  if mod(i,10)==0
+    fprintf('Serial for loop, i = %d\n', i);
+  end
+  [~,y_ser{i}] = ode15s(@(t,y) stiffODEfun(t,y,coef(i)) ,[0 10000],[2 0]);
+end
+time_ser = toc(time_ser);
+
+% compute solutions within parfor
+time_parfor = tic;
+y_par = cell(nsamples,1); % cell to save the parallel solutions
+err = zeros(nsamples,1); % vector of errors between serial and parallel solutions
+parfor i = 1:nsamples
+  if mod(i,10)==0
+    fprintf('Parfor loop, i = %d\n', i);
+  end
+  [~,y_par{i}] = ode15s(@(t,y) stiffODEfun(t,y,coef(i)) ,[0 10000],[2 0]);
+  err(i) = norm(y_par{i}-y_ser{i}); % error between serial and parallel solutions
+end
+time_parfor = toc(time_parfor);
+time_par = time_parfor + time_pool;
+
+% print results
+fprintf('RESULTS\n\n')
+fprintf('Serial time : %g\n', time_ser)
+fprintf('Parfor time : %g\n', time_par)
+fprintf('Speedup : %g\n\n', time_ser/time_par)
+fprintf('Max error between serial and parallel solutions = %e\n', max(abs(err)))
+
+% close the parallel pool
+delete(gcp)
+exit
+
+
+

Finally, create the batch script stiffODE.sb:

+
+Slurm batch script stiffODE.sb +
#!/bin/bash
+#SBATCH --time=20:00
+#SBATCH --nodes=1
+#SBATCH --job-name=stiffODE
+#SBATCH --account=<account_string>
+
+# load modules
+module purge
+module load matlab/R2023a
+
+# define environment variables for MATLAB script and output
+BASE_MFILE_NAME=stiffODE
+MATLAB_OUTPUT=${BASE_MFILE_NAME}.out
+
+# execute code
+cd $SLURM_SUBMIT_DIR
+matlab -nodisplay -r $BASE_MFILE_NAME > MATLAB_OUTPUT
+
+
+

Next, submit the job (which will take several minutes to complete):

+
$ sbatch stiffODE.sb
+
+

If the code executed correctly, the end of the text file stiffODE.out should +contain the times needed to compute the solutions in serial and parallel as well +as the error between the serial and parallel solutions (which should be +0!). There should be a significant speed-up — how much depends on the runtime +environment — for the parallelized computation.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Matlab/versions/index.html b/Documentation/Applications/Matlab/versions/index.html new file mode 100644 index 000000000..46c0bc8fb --- /dev/null +++ b/Documentation/Applications/Matlab/versions/index.html @@ -0,0 +1,4964 @@ + + + + + + + + + + + + + + + + + + + + + + + Versions and Licenses - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

MATLAB Software Versions and Licenses#

+

Learn about the MATLAB software versions and licenses available for the NREL HPC systems.

+

Versions#

+

The latest version available on NREL HPC systems is R2023a.

+

Licenses#

+

MATLAB is proprietary software. As such, users have access to a limited number +of licenses both for the base MATLAB software as well as some specialized +toolboxes.

+

To see which toolboxes are available, regardless of how they are licensed, start +an interactive MATLAB session and run:

+
>> ver
+
+

For a comprehensive list of available MATLAB-related licenses (including those not under active maintenance, such as the Database Toolbox), as +well as their current availability, run the following terminal command:

+
$ lmstat.matlab
+
+

Among other things, you should see the following:

+
Feature usage info:
+
+Users of MATLAB: (Total of 6 licenses issued; Total of ... licenses in use)
+
+Users of Compiler: (Total of 1 license issued; Total of ... licenses in use)
+
+Users of Distrib_Computing_Toolbox: (Total of 4 licenses issued; Total of ... licenses in use)
+
+Users of MATLAB_Distrib_Comp_Engine: (Total of 16 licenses issued; Total of ... licenses in use)
+
+

This documentation only covers the base MATLAB package and the Parallel +Computing Toolbox, which check out the "MATLAB" and "Distrib_Computing_Toolbox" +licenses, respectively.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Plexos/index.html b/Documentation/Applications/Plexos/index.html new file mode 100644 index 000000000..e636b5851 --- /dev/null +++ b/Documentation/Applications/Plexos/index.html @@ -0,0 +1,4913 @@ + + + + + + + + + + + + + + + + + + + + + + + Plexos - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

PLEXOS#

+

PLEXOS is a simulation software for modeling electric, gas, and water systems for optimizing energy markets.

+

Users can run PLEXOS models on NREL's computing clusters. However, users need to build the PLEXOS models on a Windows system as there is no GUI available on the clusters and on Linux in general

+

Available Modules#

+ + + + + + + + + + + + + + + + + + + + +
KestrelSwiftVermilion
plexos/9.000R09plexos/9.000R09
plexos/9.200R06
+
+

Info

+

A user can only run PLEXOS with Gurobi solvers at this time. Please set up your model accordingly.

+
+

Contents#

+
    +
  1. Setting up PLEXOS
  2. +
  3. Running PLEXOS
  4. +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Plexos/run_plexos/index.html b/Documentation/Applications/Plexos/run_plexos/index.html new file mode 100644 index 000000000..45bacff71 --- /dev/null +++ b/Documentation/Applications/Plexos/run_plexos/index.html @@ -0,0 +1,5159 @@ + + + + + + + + + + + + + + + + + + + + + + + Running Plexos - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Running Models

+ +

Please follow the setup instructions before running the examples. Example scripts for new users are available within the master branch.

+
+

Note

+

Sometimes newer modules may be available in a test directory which is hidden by default from the general user base. This obscured release is done to iron out any bugs that may arise during the installation and use of the module while avoiding breaking users existing jobs and workflows. You can use these test modules by running

+
module use /nopt/nrel/apps/software/plexos/modules/test
+module avail
+
+

This should display all of the test modules available in addition to the defaults. We encourage you to reach out to us at HPC-Help@nrel.gov for access if you would like access to these modules.

+
+

Example Run#

+

We will load the requisite modules for running PLEXOS 9.2R06 for this example. Please see the module compatibility chart for loading the correct modules

+
module load gurobi/10.0.2
+module load plexos/9.200R06
+
+

Recall that we can only use the Gurobi solver while running the PLEXOS on the NREL cluster. Now that we have the modules loaded, PLEXOS can be called as follows

+
$PLEXOS/PLEXOS64 -n 5_bus_system_v2.xml -m 2024_yr_15percPV_MT_Gurobi
+
+

The command above assumes that we are running the model 2024_yr_15percPV_MT_Gurobi from file 5_bus_system_v2.xml. PLEXOS 9.0RX requires validating user-credentials for a local +PLEXOS account for each run. Therefore, if we ran the above command in an interactive session, we would need to enter the following username and password

+
username : nrelplexos
+password : Nr3lplex0s
+
+

Fortunately, we can bypass the prompt for a local PLEXOS account username and password (useful for slurm batch jobs) by passing them as command line arguments as follows.

+
$PLEXOS/PLEXOS64 -n 5_bus_system_v2.xml -m 2024_yr_15percPV_MT_Gurobi -cu nrelplexos -cp Nr3lplex0s
+
+
+

Warning

+

Not providing the username and password in batch jobs WILL cause your jobs to fail.

+
+

Example Scripts#

+

The example scripts are available here. Please clone the repository to run those examples.

+
+

Note

+

The slurm output files generated by PLEXOS may not load correctly because of special characters that PLEXOS output introduces. To remove thoses special characters, open the slurm output file and run the following command

+
# On PC
+:%s/<CTRL-2>//g
+# On Mac
+%s/<CTRL-SHIFT-2>//g
+
+

<CTRL-2> or <CTRL-SHIFT-2> should generate the symbol ^@ that is messing up the output. Please refer to this stack exchange post for further information

+
+

1: Basic Functionality Test#

+

The basic functionality test is the same as the example run in the section above. We will

+
    +
  1. Request an interactive node
  2. +
  3. Go to the correct example directory
  4. +
  5. Run the PLEXOS example interactively
  6. +
+
+Simple 5 bus problem +
# Request an interactive session on the cluster
+salloc -N 1 --account=<your_hpc_allocation_name> --time=1:00:00 --partition=debug
+
+# Go to the working directory that contains the 5_bus_system_v2.xml example
+cd /to/you/XML/file/
+
+# Load the requisite modules
+module load gurobi/10.0.2
+module load plexos/9.200R06
+
+# Finally run the PLEXOS executable
+$PLEXOS/PLEXOS64 -n 5_bus_system_v2.xml -m 2024_yr_15percPV_MT_Gurobi -cu nrelplexos -cp Nr3lplex0s
+
+
+

2: Simple batch script submission#

+

We will run the same example by submitting the job to the SLURM queue. This example uses the batch file submit_simple.sh. In order to run this example as is, run the following commands

+
+Submit job in a batch file. +
# SSH into Kestrel or your cluster of choice
+ssh $USER@kestrel.hpc.nrel.gov
+
+# Clone the HPC master branch in your scratch folder
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go to the appropriate folder and submit the job on the HPC
+cd HPC/applications/plexos/RunFiles
+sbatch -A account_name --mail-user=your.email@nrel.gov submit_simple.sh
+
+
+

3: Enhanced batch script submission#

+

This builds upon the previous example where it tries to run the same model as before, but adds redundancy where the job doesn't fail if a license is not found. The submission script submit_enhanced.sh attempts to re-run the job after waiting 120 seconds for each attempt.

+
+Slightly enhanced batch submission script +
# Skip this if you already have the repo cloned in your scratch directory
+ssh $USER@kestrel.hpc.nrel.gov
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go into the appropriate directory
+cd /scratch/${USER}/HPC/applications/plexos/RunFiles
+sbatch -A account_name --mail-user=your.email@nrel.gov submit_enhanced.sh
+
+
+

4: Submitting multiple PLEXOS jobs#

+

This example demonstrates how to submit multiple PLEXOS jobs. The model names are present in a file called models.txt. submit_multiple.sh is simply a wrapper that calls the batch file submit_plexos.sh.

+
+Submit multiple PLEXOS jobs +
# Skip this if you already have the repo cloned in your scratch directory
+ssh $USER@kestrel.hpc.nrel.gov
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go into the appropriate directory
+cd /scratch/${USER}/HPC/applications/plexos/RunFiles
+./submit_multiple.sh 5_bus_system_v2.xml models.txt
+
+
+

5: Running PLEXOS with SLURM array jobs#

+

This example demonstrates the use of SLURM job arrays to run multiple PLEXOS jobs using the script submit_job_array.sh

+
+Submit Slurm job-array for PLEXOS +
# Skip this if you already have the repo cloned in your scratch directory
+ssh $USER@kestrel.hpc.nrel.gov
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go into the appropriate directory
+cd /scratch/${USER}/HPC/applications/plexos/RunFiles
+export filename=5_bus_system_v2 # Export the XML dataset name
+export models.txt # Export the file that contains the models names within the XML dataset
+sbatch -A account_name -t 5 --mail-user=your.email@nrel.gov --array=1-4 submit_job_array.sh
+
+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Plexos/setup_plexos/index.html b/Documentation/Applications/Plexos/setup_plexos/index.html new file mode 100644 index 000000000..97b31247c --- /dev/null +++ b/Documentation/Applications/Plexos/setup_plexos/index.html @@ -0,0 +1,5169 @@ + + + + + + + + + + + + + + + + + + + + + + + Setting Up Plexos - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Setting Up

+ +

Loading the Appropriate Modules#

+
+

Info

+

A user can only run PLEXOS with Gurobi solvers on the clusters at this time. Please set up your model accordingly.

+
+

PLEXOS XML model files can only run with the Gurobi solver specified while creating the models. The most common combinations you may encounter are

+ + + + + + + + + + + + + + + + + + + + + +
PLEXOS ModuleGurobi Module
plexos/9.000R09gurobi/9.5.1
plexos/9.200R05gurobi/10.0.1
plexos/9.200R06gurobi/10.0.2
+

Please contact us if you encounter any issues or require a newer version.

+

Setting up the License#

+

Before we can run PLEXOS, we need to create a license file on the cluster. For this, run the following commands with some minor modifications

+
+EE_reg.xml +
mkdir -p ~/.config/PLEXOS
+echo '<?xml version="1.0"?>
+<XmlRegistryRoot>
+  <comms>
+    <licServer_IP val="10.60.3.188" />
+    <licServer_CommsPort val="399" />
+    <licServer_IP_Secondary />
+    <connect>
+      <PrimaryServer_Port />
+      <SecondaryServer_Port />
+    </connect>
+    <licServer_CommsPort_Secondary />
+    <LastLicTypeUsed val="server" />
+  </comms>
+  <server>
+    <licServer_LogFolder val="/tmp/" />
+    <licServer_LogEvents val="true" />
+  </server>
+  <proxy_cred>
+    <proxy_ip val="" />
+    <proxy_port val="" />
+    <proxy_uname val="" />
+    <proxy_pass val="" />
+  </proxy_cred>
+  <BannedList>
+    <BanListedMachines val="true" />
+  </BannedList>
+  <ProductUpdates>
+    <LastUpdateDate val="10/10/2021 13:11:10" />
+  </ProductUpdates>
+  <UserName />
+  <Company />
+  <UserEmail />
+  <CompanyCode />
+  <LicenseServerRequestCount />
+</XmlRegistryRoot>'   > ~/.config/PLEXOS/EE_reg.xml
+
+
+

Optional: Conda environment for PLEXOS with Python and R#

+
+

Note

+

The following instructions are NOT required for only running PLEXOS. One only needs to load the relevant Gurobi and PLEXOS modules to run a PLEXOS XML database. Users may combine these runs with conda, Julia, or other software simply by loading the relevant modules and activating the appropriate conda and Julia environments.

+
+
    +
  1. We need to load a few modules and create the requisite conda environment. First, we need to create a conda environment for PLEXOS. +
    module purge
    +module load conda
    +conda create -n plex1 r-essentials
    +
  2. +
  3. Log out and log back in. Load the following modules and activate the conda environment +
    module purge
    +module load comp-intel intel-mpi mkl conda
    +conda activate plex1
    +
  4. +
  5. +

    Install additional R libraries using conda +

    conda install r-doParallel
    +conda install r-RSQLite
    +conda install r-testthat
    +conda install r-covr
    +

    +
    +

    Note

    +

    Most of the R libraries should be added as part of the initial install, but keep an eye out for the following packages.

    +
    +
    +

    Info

    +

    See below if you wish to use your own version of R and Python for PLEXOS.

    +
    +
  6. +
  7. +

    We need to install one, rplexos library from source. To do this, execute the following commands +

    mkdir /home/$USER/temporary    
    +cd /home/$USER/temporary
    +git clone https://github.com/NREL/rplexos.git
    +cd rplexos
    +CXX=`which icpc` R CMD INSTALL .
    +

    +
    +

    Note

    +

    rplexos needs to be built using an Intel compiler and R always wishes to build libraries using the same compilers that was used in its creation. If setting CXX=which icpc shown above does not work, we need to fool R by renaming the intel C++ compiler using a symbolic link. This is a hack and should only be used if the above way of installation fails. In order for the hack run the following after replacing username in the 3rd line with your own username. +

    ln -s `which icpc` x86_64-conda_cos6-linux-gnu-c++
    +export PATH=`pwd`:$PATH
    +Rscript -e  "install.packages('/home/username/temporary/rplexos/',repos=NULL,type='source')"
    +rm x86_64-conda_cos6-linux-gnu-c++
    +

    +
    +
  8. +
  9. +

    For some PLEXOS examples, we need to install an additional package called plexos-coad. For this run the following +

    cd /scratch/$USER
    +git clone https://github.com/Computational-Energy/plexos-coad.git
    +cd plexos-coad
    +
    +#patch for python 3.9
    +tofix=`grep -lr getchild`
    +for f in $tofix ; do sed -i3x "s/for el_data in elem.getchildren()/for el_data in list\(elem\)/" $f ; done
    +pip install Cython
    +python setup.py install
    +

    +
  10. +
  11. +

    Finally make sure we have numpy and pandas in the plex1 conda environment. +

    pip install numpy pandas
    +

    +
  12. +
+

Loading an existing PLEXOS environment#

+

If you have successfully followed all the instructions in the previous subsection and installed PLEXOS, you can simply load the following modules and activate the conda environment

+
module purge
+module load comp-intel intel-mpi mkl conda
+conda activate plex1
+
+

Using your own version of R and Python#

+

This section is in regards to Point 3 in setting up the PLEXOS environment. +The following R libraries will need to be installed manually in this case.

+
install.packages("data.table")
+install.packages("DBI")
+install.packages("dbplyr")
+install.packages("doParallel")
+install.packages("dplyr")
+install.packages("foreach")
+install.packages("lubridate")
+install.packages("magrittr")
+install.packages("parallel")
+install.packages("Rcpp")
+install.packages("RSQLite")
+install.packages("stringi")
+install.packages("tidyr")
+install.packages("knitr")
+install.packages("testthat")
+install.packages("ggplot2")
+install.packages("covr")
+install.packages("tidyverse")
+
+

After installing the above, follow the remainder of the installation starting with +point 4.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Templates/applications_template/index.html b/Documentation/Applications/Templates/applications_template/index.html new file mode 100644 index 000000000..afb22d6e5 --- /dev/null +++ b/Documentation/Applications/Templates/applications_template/index.html @@ -0,0 +1,4983 @@ + + + + + + + + + + + + + + + + + + + Template for an Application Page - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Template for an Application Page#

+

Documentation: link to documentation

+

Write a brief description of the program here. Keep the italics.

+

Getting Started#

+

This section provides the minimum amount of information necessary to successfully run a basic job on an NREL Cluster. +This information should be as complete and self-contained as possible.

+

Instructions should be step-by-step and include copy-and-pastable commands where applicable.

+

For example, describe how the user can load the program module with module avail and module load:

+
module avail program
+   program/2.0.0    program/1.0.0
+
+
module load program/2.0.0
+
+

Include a section on how to run the job, e.g., with job script examples or commands for an interactive session.

+

Example Job Scripts#

+
+Kestrel CPU +
#!/bin/bash
+
+# In a comment summarize the hardware requested, e.g. number of nodes, 
+    # number of tasks per node, and number of threads per task
+
+#SBATCH --time=
+#SBATCH --nodes=
+#SBATCH --ntasks-per-node=
+#SBATCH --cpus-per-task=
+#SBATCH --partition=
+#SBATCH --account=
+
+# include a section of relevant export and module load commands, e.g.:
+
+module load gcc/8.4.0
+
+export OMP_NUM_THREADS=
+
+# include a sample srun command or similar
+srun program.x
+
+
+
+Vermillion +

If the submit script for Vermillion differs from Kestrel, then include a Vermillion example script here. +If the submit script does not differ, then remove this section (starting from the ??? example "Vermillion" line)

+
+
+Swift +

If the submit script for Swift differs from Kestrel, then include a Swift example script here. +If the submit script does not differ, then remove this section (starting from the ??? example "Swift" line)

+
+
+Template +

Here's a template of a collapsible example.

+
You can include blocked sections
+
+

And unblocked sections.

+
+
+

Note

+

You can use a note to draw attention to information.

+
+

Include instructions on how to submit the job script

+

Supported Versions#

+ + + + + + + + + + + + + + + +
KestrelSwiftVermillion
0.0.00.0.00.0.0
+

Advanced#

+

Include advanced user information about the code here (see BerkeleyGW page for some examples)

+

One common "advanced case" might be that users want to build their own version of the code.

+

Building From Source#

+

Here, give detailed and step-by-step instructions on how to build the code, if this step is necessary. Include detailed instructions for how to do it on each applicable HPC system. Be explicit in your instructions. Ideally a user reading one of the build sections can follow along step-by-step +and have a functioning build by the end.

+

If building from source is not something anyone would reasonably want to do, remove this section.

+

Be sure to include where the user can download the source code

+
+Building on Kestrel +

Include here, for example, a Kestrel-specific makefile (see berkeleygw example page). This template assumes that we build the code with only one toolchain, which may not be the case. If someone might reasonably want to build with multiple toolchains, use the "Multiple toolchain instructions on Kestrel" template instead.

+

Include relevant commands in blocks.
+
+or as in-line blocks

+

Be sure to state how to set-up the necessary environment, e.g.:

+
module load gcc/8.4.0
+module load openmpi/3.1.6/gcc-8.4.0
+module load hdf5/1.10.6/gcc-ompi
+
+

Give instructions on compile commands. E.g., to view the available make targets, type make. To compile all program executables, type:

+
make cleanall
+make all
+
+
+
+Building on Vermillion +

information on how to build on Vermillion

+
+
+Building on Swift +

information on how to build on Swift

+
+

Troubleshooting#

+

Include known problems and workarounds here, if applicable

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/Templates/berkeleygw_example/index.html b/Documentation/Applications/Templates/berkeleygw_example/index.html new file mode 100644 index 000000000..39528e0a3 --- /dev/null +++ b/Documentation/Applications/Templates/berkeleygw_example/index.html @@ -0,0 +1,5074 @@ + + + + + + + + + + + + + + + + + + + BerkeleyGW - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

BerkeleyGW#

+

Documentation: BerkeleyGW

+

BerkeleyGW is a massively parallel many-body perturbation theory code capable of performing RPA, GW, and GW-BSE calculations, which can be used to investigate properties of materials with high accuracy.

+

Getting Started#

+

This section provides the minimum amount of information needed to run a BerkeleyGW job on an NREL cluster.

+

First, see which versions of BerkeleyGW are available with module avail and load your preferred version with module load:

+

module avail berkeleygw
+   berkeleygw/3.0.1-cpu    berkeleygw/3.0.1-gpu
+
+The module avail berkeleygw command shows that two BerkeleyGW modules are available. To select the GPU-enabled version of BerkeleyGW, for example, we use the module load command:

+
module load berkeleygw/3.0.1-gpu
+
+

Next, create a job script. Below are example job scripts for the available NREL systems. Continuing the above example, we would select the "Kestrel GPU" example script.

+

Sample Job Scripts#

+
+Kestrel CPU +
#!/bin/bash
+
+# This job requests 72 MPI tasks across 2 nodes (36 tasks/node) and no threading
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=36
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module load berkeleygw/3.0.1-cpu
+
+srun epsilon.cplx.x
+
+
+
+Kestrel GPU +

Put job example here

+
+
+Vermillion +

Put job example here

+
+
+Swift +

Put job example here

+
+

Save the submit file as bgw.in, and submit with the command:

+

sbatch bgw.in

+

Supported Versions#

+ + + + + + + + + + + + + + + +
KestrelSwiftVermillion
2.0, 3.00.0.00.0.0
+

Advanced#

+

Wavefunction file: HDF5 vs Fortran binary#

+

For large systems, the wavefunction binary file format yields significantly slower read-in times relative to an HDF5-format wavefunction file. The BerkeleyGW code includes utilities to convert wavefunction binary files to HDF5 format and vice-versa called hdf2wfn.x and wfn2hdf.x (see documentation). It is recommended to use HDF5-formatted wavefunction files where possible.

+

Lustre File Striping#

+

BerkeleyGW supports wavefunction files in HDF5 format and binary format. Wavefunction inputs to BerkeleyGW can become large depending on the system under investigation. Large (TODO: define large for Kestrel. Probably > 10 GB) HDF5 wavefunction files benefit from Lustre file striping, and the BerkeleyGW code can see major runtime speed-ups when using this feature.

+
+

Tip

+

Binary format wavefunction files do not benefit from Lustre file striping

+
+

For more on Lustre file striping, see (TODO: documentation section on Lustre file striping?)

+

Advanced submission script example#

+

Because multiple executables in BerkeleyGW require the WFN input files (WFN and WFNq), we can streamline the file linking inside a submission script. We can also include the Lustre file striping step in our submission script. The below example script shows how this can be done for the BerkeleyGW epsilon executable.

+
+Advanced submit script +

This script assumes you build your own version of BerkeleyGW. If not, remove the BGW=/path/to/where/you/built/BerkeleyGW/bin and ln -s $BGW/epsilon.cplx.x . lines.

+

Be sure to load the proper modules (see Getting Started if not building your own version.)

+
#!/bin/bash
+#SBATCH -t 00:20:00
+#SBATCH -N 8
+#SBATCH --gpus-per-node=4
+#SBATCH -C gpu
+#SBATCH -o BGW_EPSILON_%j.out
+#SBATCH --account=
+
+BGW=/path/to/where/you/built/BerkeleyGW/bin
+WFN_folder=/path/to/folder/that/contains/WFN/and/WFNq
+
+mkdir BGW_EPSILON_$SLURM_JOBID
+lfs setstripe -c 60 BGW_EPSILON_$SLURM_JOBID
+cd    BGW_EPSILON_$SLURM_JOBID
+ln -s $BGW/epsilon.cplx.x .
+ln -s  ../epsilon.inp .
+ln -sfn  ${WFN_folder}/WFNq.h5      .   
+ln -sfn  ${WFN_folder}/WFN.h5   ./WFN.h5
+
+ulimit -s unlimited
+export OMP_PROC_BIND=true
+export OMP_PLACES=threads
+export BGW_WFN_HDF5_INDEPENDENT=1
+
+export OMP_NUM_THREADS=16
+srun -n 32 -c 32 --cpu-bind=cores epsilon.cplx.x
+
+

This script will create a directory "BGW_EPSILON_$SLURM_JOBID" (where $SLURM_JOBID will be a numeric ID), stripe the directory with a stripe count of 60, link the epsilon executable, WFNq, and WFN files to the directory, and run BerkeleyGW with 32 GPUs.

+
+

Building Instructions#

+

First, download BerkeleyGW.

+

Then, follow the build instructions in the "building" drop-downs below for the cluster you will be running on.

+
+Building on Kestrel +

The following arch.mk file was used to build BerkeleyGW-3.0 on Kestrel on (date). + Copy this arch.mk file into your BerkeleyGW directory.

+
COMPFLAG  = -DGNU
+PARAFLAG  = -DMPI -DOMP
+MATHFLAG  = -DUSESCALAPACK -DUNPACKED -DUSEFFTW3 -DHDF5
+
+FCPP    = /usr/bin/cpp -C
+F90free = mpifort -ffree-form -ffree-line-length-none -fopenmp -fno-second-underscore -cpp
+LINK    = mpifort -fopenmp
+# FHJ: -funsafe-math-optimizations breaks Haydock and doesn't give any significant speedup
+FOPTS   = -O3 -funroll-loops 
+FNOOPTS = $(FOPTS)
+MOD_OPT = -J  
+INCFLAG = -I
+
+C_PARAFLAG  = -DPARA
+CC_COMP = mpiCC
+C_COMP  = mpicc
+C_LINK  = mpicc
+C_OPTS  = -O3 -ffast-math
+C_DEBUGFLAG = 
+
+REMOVE  = /bin/rm -f
+
+# Math Libraries                                                                                                                                                                                            
+FFTWPATH     =  /projects/scatter/mylibraries_CentOS77/
+#/nopt/nrel/apps/fftw/3.3.3-impi-intel/
+#FFTWLIB      = $(FFTWPATH)/lib/libfftw3.a
+FFTWLIB      =  $(FFTWPATH)/lib/libfftw3_omp.a $(FFTWPATH)/lib/libfftw3.a
+FFTWINCLUDE  =  $(FFTWPATH)/include
+
+LAPACKLIB = /projects/scatter/mylibraries_CentOS77/lib/libopenblas.a
+
+SCALAPACKLIB = /projects/scatter/mylibraries_CentOS77/lib/libscalapack.a
+
+HDF5PATH      = /nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hdf5-1.10.6-dj4jq2ffttkdxksimqe47245ryklau4a
+HDF5LIB      =  ${HDF5PATH}/lib/libhdf5hl_fortran.a \
+                ${HDF5PATH}/lib/libhdf5_hl.a \
+                ${HDF5PATH}/lib/libhdf5_fortran.a \
+                ${HDF5PATH}/lib/libhdf5.a /home/ohull/.conda-envs/bgw/lib/libsz.a -lz -ldl
+HDF5INCLUDE  = ${HDF5PATH}/include
+
+PERFORMANCE  =
+
+TESTSCRIPT = 
+
+

Then, load the following modules:

+
module load gcc/8.4.0
+module load openmpi/3.1.6/gcc-8.4.0
+module load hdf5/1.10.6/gcc-ompi
+
+

Choose whether to use the real or complex flavor of BerkeleyGW by copying the corresponding file to flavor.mk. For example, for the complex version:

+

cp flavor_cplx.mk flavor.mk

+

Finally, compile the code. To view the available make targets, type make. To compile all BerkeleyGW executables, type: +

make cleanall
+make all
+

+
+
+Building on Swift +

TODO: add Swift build instructions

+
+

Troubleshooting#

+

Include known problems and workarounds here, if applicable

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/amrwind/index.html b/Documentation/Applications/amrwind/index.html new file mode 100644 index 000000000..7d1e9d005 --- /dev/null +++ b/Documentation/Applications/amrwind/index.html @@ -0,0 +1,5036 @@ + + + + + + + + + + + + + + + + + + + + + + + AMR-Wind - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

AMR-Wind#

+

AMR-Wind is a massively parallel, block-structured adaptive-mesh, +incompressible flow solver for wind turbine and wind farm +simulations. The primary applications for AMR-Wind are: performing +large-eddy simulations (LES) of atmospheric boundary layer (ABL) +flows, simulating wind farm turbine-wake interactions using actuator +disk or actuator line models for turbines, and as a background solver +when coupled with a near-body solver (e.g., Nalu-Wind) with overset +methodology to perform blade-resolved simulations of multiple wind +turbines within a wind farm. For more information see the AMR-Wind documentation.

+

AMR-Wind is only supported on Kestrel.

+

Installation of AMR-Wind on GPU Nodes#

+

AMR-wind can be installed by following the instructions here. +On Kestrel GPU nodes, this can be achieved by first loading the following modules:

+
module restore 
+ml gcc
+ml PrgEnv-nvhpc
+ml nvhpc/24.1
+ml cray-libsci/22.12.1.1
+ml cmake/3.27.9
+ml python/3.9.13
+
+

Make sure the following modules are loaded using module list.

+
libfabric/1.15.2.0
+craype-x86-genoa 
+curl/8.6.0   
+bzip2/1.0.8  
+tar/1.34  
+python/3.9.13
+cray-dsmml/0.2.2 
+cray-libsci/22.10.1.2 
+gcc/10.1.0
+craype-network-ofi  
+nvhpc/24.1
+cmake/3.27.9 
+libxml2/2.10.3 
+gettext/0.22.4 
+craype/2.7.30 
+cray-mpich/8.1.28 
+PrgEnv-nvhpc/8.5.0
+
+

You can clone the latest version of AMR-wind from here. +Once cloned, cd into the AMR directory and create a build folder.

+

You can create a file with the cmake instructions,

+
vim conf_instructions
+
+

and copy the content below.

+
cmake .. -DAMR_WIND_ENABLE_CUDA=ON \
+    -DAMReX_CUDA_ERROR_CAPTURE_THIS:BOOL=ON \
+    -DCMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION:BOOL=ON \
+    -DMPI_CXX_COMPILER=/opt/cray/pe/mpich/8.1.28/ofi/nvidia/23.3/bin/mpicxx \
+    -DMPI_C_COMPILER=/opt/cray/pe/mpich/8.1.28/ofi/nvidia/23.3/bin/mpicc \
+    -DMPI_Fortran_COMPILER=/opt/cray/pe/mpich/8.1.28/ofi/nvidia/23.3/bin/mpifort \
+    -DAMReX_DIFFERENT_COMPILER=ON \
+    -DCMAKE_CUDA_ARCHITECTURES=90 \
+    -DAMR_WIND_ENABLE_CUDA=ON \
+    -DAMR_WIND_ENABLE_CUDA:BOOL=ON \
+    -DAMR_WIND_ENABLE_OPENFAST:BOOL=OFF \
+    -DAMR_WIND_ENABLE_NETCDF:BOOL=OFF \
+    -DAMR_WIND_ENABLE_MPI:BOOL=ON \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DAMR_WIND_ENABLE_HYPRE:BOOL=OFF \
+    -DAMR_WIND_ENABLE_MASA:BOOL=OFF \
+    -DAMR_WIND_ENABLE_TESTS:BOOL=ON \
+    -DCMAKE_INSTALL_PREFIX:PATH=./install
+
+

You can execute the file using:

+
bash conf_instructions
+
+

Once the cmake step is done, you can:

+
make -j 
+
+

then

+
make install -j 
+
+

You should now have a successful installation of AMR-Wind.

+

At runtime, make sure to follow this sequence of module loads.

+
module restore 
+source /nopt/nrel/apps/gpu_stack/env_cpe23.sh
+ml PrgEnv-nvhpc
+ml cray-libsci/22.12.1.1
+
+

Running on the GPUs Using Modules#

+

NREL makes available different modules for using AMR-Wind for CPUs and GPUs for +different toolchains. It is recommended that AMR-Wind be run on GPU nodes for obtaining the most optimal +performance.

+

Here is a sample script for submitting an AMR-Wind application run on multiple GPU nodes, with the user's input file and mesh grid in the current working directory.

+
+Sample job script: Kestrel - Full GPU node +
#!/bin/bash
+#SBATCH --time=1:00:00 
+#SBATCH --account=<user-account>
+#SBATCH --nodes=2
+#SBATCH --gpus=h100:4
+#SBATCH --exclusive
+#SBATCH --mem=0
+
+module restore 
+module load PrgEnv-nvhpc
+module load amr-wind/main-craympich-nvhpc
+
+srun -K1 -n 16 --gpus-per-node=4 amr_wind abl_godunov-512.i >& ablGodunov-512.log
+
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/ansys/index.html b/Documentation/Applications/ansys/index.html new file mode 100644 index 000000000..94677f7b2 --- /dev/null +++ b/Documentation/Applications/ansys/index.html @@ -0,0 +1,5195 @@ + + + + + + + + + + + + + + + + + + + + + + + ANSYS CFD - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

ANSYS CFD

+ +

Ansys#

+

The NREL Computational Science Center (CSC) maintains an Ansys license pool for general use, including two seats of CFD, one seat of Ansys Mechanical, and four Ansys HPC Packs to support running a model on many cores/parallel solves.

+

The main workflow that we support has two stages. The first is interactive graphical usage, e.g., for interactively building meshes or visualizing boundary geometry. For this, Ansys should be run on a FastX desktop. The second stage is batch (i.e., non-interactive) parallel processing, which should be run on compute nodes via a Slurm job script. Of course, if you have Ansys input from another location ready to run in batch mode, the first stage is not needed. We unfortunately cannot support running parallel jobs on the DAV nodes, nor launching parallel jobs from interactive sessions on compute nodes.

+

Shared License Etiquette#

+

License usage can be checked on Kestrel with the command lmstat.ansys. Network floating licenses are a shared resource. Whenever you open an Ansys Fluent window, a license is pulled from the pool and becomes unavailable to other users. Please do not keep idle windows open if you are not actively using the application, close it and return the associated licenses to the pool. Excessive retention of software licenses falls under the inappropriate use policy.

+

A Note on Licenses and Job Scaling#

+

HPC Pack licenses are used to distribute Ansys batch jobs to run in parallel across many compute cores. The HPC Pack model is designed to enable exponentially more computational resources per each additional license, roughly 2x4^(num_hpc_packs). A table summarizing this relationship is shown below.

+
| HPC Pack Licenses Used | Total Cores Enabled           |
+|------------------------|-------------------------------|
+| 0                      | 4 (0 `hpc_pack` + 4 solver)     |
+| 1                      | 12 (8 `hpc_pack` + 4 solver)    |
+| 2                      | 36 (32 `hpc_pack` + 4 solver)   |
+| 3                      | 132 (128 `hpc_pack` + 4 solver) |
+| 4                      | 516 (512 `hpc_pack` + 4 solver) |
+
+

Additionally, Ansys allows you to use up to four cores without consuming any of the HPC Pack licenses. When scaling these jobs to more than four cores, the four cores are added to the total amount made available by the HPC Pack licenses. For example, a batch job designed to completely fill a node with 36 cores requires one cfd_base license and two HPC Pack licenses (32 + 4 cores enabled).

+

Building Models in the Ansys GUI#

+

GUI access is provided through FastX desktops. Open a terminal, load, and launch the Ansys Workbench with:

+
module load ansys/<version>
+vglrun runwb2
+
+

where <version> will be replaced with an Ansys version/release e.g., 2024R1. Press tab to auto-suggest all available versions. Because FastX desktop sessions are supported from DAV nodes shared between multiple HPC users, limits are placed on how much memory and compute resources can be consumed by a single user/job. For this reason, it is recommended that the GUI be primarily used to define the problem and run small-scale tests to validate its operation before moving the model to a compute node for larger-scale runs.

+

Running Ansys Model in Parallel Batch Mode#

+

Ansys Fluent#

+

Ansys Fluent is a general-purpose computational fluid dynamics (CFD) software used to model fluid flow, heat and mass transfer, chemical reactions, and more. It comes with the features of advanced physics modeling, turbulence modeling, single and multiphase flows, combustion, battery modeling, fluid-structure interaction.

+

To launch Ansys Fluent jobs in parallel batch mode, you can build on the batch script presented below.

+
bash
+#!/bin/bash
+#SBATCH --job-name=jobname
+#SBATCH --account=<your_account>
+#SBATCH -o fluent_%j.out
+#SBATCH -e fluent_%j.err
+#SBATCH --nodes=2
+#SBATCH --time=1:00:00
+#SBATCH --ntasks-per-node=104
+#SBATCH --exclusive
+
+cd $SLURM_SUBMIT_DIR
+module load ansys/<version>
+
+export FLUENT_AFFINITY=0
+export SLURM_ENABLED=1
+export SCHEDULER_TIGHT_COUPLING=13
+
+scontrol show hostnames > nodelist
+
+FLUENT=`which fluent`
+VERSION=3ddp
+JOURNAL=journal_name.jou
+LOGFILE=fluent.log
+MPI=intel
+
+OPTIONS="-i$JOURNAL -t$SLURM_NPROCS -mpi=$MPI -cnf=$PWD/nodelist"
+
+nodelist > fluent.log
+
+$FLUENT $VERSION -g $OPTIONS > $LOGFILE 2>&1
+
+

Once this script file (assumed to be named ansys-job.slurm) is saved, it can be submitted to the job scheduler with

+
[user@kl3 ~]$ sbatch ansys-job.slurm
+
+

In this example batch script, 3ddp can be replaced with the version of FLUENT your job requires (2d, 3d, 2ddp, or 3ddp), -g specifies that the job should run without the GUI, -t specifies the number of processors to use (in this example, 2 x 104 processors), -cnf specifies the hosts file (the list of nodes allocated to this job), -mpi specifies the MPI implementation (intel or openmpi, Ansys uses its own mpi comes with the package instead of the mpi installed on our cluster, the current Ansys version only supports intel or openmpi), and -i is used to specify the job input file. For more Fluent options, you can run fluent -help to show after load the Ansys module.

+

In addition, the following commands in the slurm script are included to make sure the right bootstrap is used:

+
export FLUENT_AFFINITY=0
+export SLURM_ENABLED=1
+export SCHEDULER_TIGHT_COUPLING=13
+
+

Ansys Mechanical#

+

Ansys Mechanical is a finite element analysis (FEA) software used to perform structural analysis using advanced solver options, including linear dynamics, nonlinearities, thermal analysis, materials, composites, hydrodynamic, explicit, and more. The slurm script for Ansys Mechanical jobs is presented as follows.

+

#!/bin/bash
+#
+#SBATCH --job-name=jobname
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=2
+#SBATCH --time=1:00:00
+#SBATCH --exclusive
+#SBATCH --account=<your_account>
+#SBATCH --output="ansys-%j.out"
+#SBATCH --error="ansys-%j.err"
+
+cd $SLURM_SUBMIT_DIR
+
+module load ansys
+
+machines=$(srun hostname | sort | uniq -c | awk '{print $2 ":" $1}' | paste -s -
+d ":" -)
+
+ansys241 -dis -mpi intelmpi2018 -machines $machines -i inputfilename.
+dat -o joboutput.out
+
+In the slurm script, ansys241 starts the Ansys mechanical module, -dis enables distributed-memory parallel processing, -mpi specifies the mpi to be used (intelmpi2018 or openmpi), -machine specifies the host names, -i is used to specify the job input file, and -o is used to specify the job output file.

+

A Few Nodes#

+

When running an Ansys job, the out of memory error (OOM) is commonly encountered. To overcome the out of memory issue, you can try the following:

+

If you are running on shared nodes, by default, your job will be allocated about 1G of RAM per core requested. To change this amount, you can use the --mem or --mem-per-cpu flag in your job submission. To allocate all of the memory available on a node, use the --mem=0 flag (https://nrel.github.io/HPC/Documentation/Systems/Kestrel/Running/).

+

Try to run the job on nodes with local disk by using the --tmp option in your job submission script (e.g. --tmp=1600000 https://nrel.github.io/HPC/Documentation/Systems/Kestrel/Running/)

+

Connect to Your Own License#

+

At NREL, a few groups own their own Ansys license. In order to connect to the private license, the user can set the environment variable ANSYSLMD_LICENSE_FILE (e.g. export ANSYSLMD_LICENSE_FILE=1055@10.60.1.85, replacing your corresponding port and license server hostname or IP address).

+

Contact#

+

For information about accessing licenses beyond CSC's base capability, please contact Emily Cousineau.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/comsol/index.html b/Documentation/Applications/comsol/index.html new file mode 100644 index 000000000..2b7f8fde9 --- /dev/null +++ b/Documentation/Applications/comsol/index.html @@ -0,0 +1,5066 @@ + + + + + + + + + + + + + + + + + + + + + + + Comsol - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

COMSOL Multiphysics#

+

COMSOL Multiphysics is a versatile finite element analysis and simulation package. The COMSOL graphical user interface (GUI) environment is supported primarily for building and solving small models while operation in batch mode allows users to scale their models to larger, higher-fidelity studies. Currently, we host three floating network licenses and a number of additional modules. Two COMSOL versions are available on Kestrel, they are 6.1 and 6.2.

+

Building a COMSOL Model#

+

Extensive documentation is available in the menu: Help > Documentation. For beginners, it is highly recommended to follow the steps in Introduction to COMSOL Multiphysics found in Help > Documentation.

+

For instructional videos, see the COMSOL website Video Gallery.

+

Building Models in the COMSOL GUI#

+

Before beginning, it is a good practice to check the license status. To do so, you need to run the following script command:

+
[user@kl3 ~]$ ./lmstat.comsol
+
+

When licenses are available, COMSOL can be used by starting the COMSOL GUI which allows you to build models, run the COMSOL computational engine, and analyze results. The COMSOL GUI can be accessed through a FastX desktop by opening a terminal in a FastX window and running the following commands:

+
[user@kl3 ~]$ module load comsol
+[user@kl3 ~]$ vglrun comsol
+
+

Because FastX desktop sessions are supported from DAV nodes shared between multiple HPC users, limits are placed on how much memory and compute resources can be consumed by a single user/job. For this reason, it is recommended that the GUI be primarily used to define the problem and run small-scale tests to validate its operation before moving the model to a compute node for larger-scale runs. For jobs that require both large-scale compute resources and GUI interactivity simultaneously, there is partial support for running the GUI from an X-enabled shell on a compute node by replacing the vglrun comosl command with:

+
[user@kl3 ~]$ comsol -3drend sw
+
+

However, the performance may be slow and certain display features may behave unexpectedly.

+

Running a Single-Node COMSOL Model in Batch Mode#

+

You can save your model built in FastX+GUI mode into a file such as myinputfile.mph. Once that's available, the following job script shows how to run a single process multithreaded job in batch mode:

+
+Example Submission Script +
#!/bin/bash                                                                                                                                                                                     
+#SBATCH --job-name="comsol-batch-single-node"                                                                                                                                                   
+#SBATCH --nodes=1                                                                                                                                                                               
+#SBATCH --ntasks-per-node=104                                                                                                                                                                   
+#SBATCH --cpus-per-task=1                                                                                                                                                                       
+#SBATCH --time=00:10:0        
+#SBATCH --partition=debug
+#SBATCH --account=<allocation handle>
+#SBATCH --output="comsol-%j.out"
+#SBATCH --error="comsol-%j.err"
+
+# This helps ensure your job runs from the directory
+# from which you ran the sbatch command
+SLURM_SUBMIT_DIR=<your working directory>
+cd $SLURM_SUBMIT_DIR
+
+# Set up environment, and list to stdout for verification
+module load comsol
+echo " "
+module list
+echo " "
+
+inputfile=$SLURM_SUBMIT_DIR/myinputfile.mph
+outputfile=$SLURM_SUBMIT_DIR/myoutputfilename
+logfile=$SLURM_SUBMIT_DIR/mylogfilename
+
+# Run a COMSOL job with 104 threads.
+
+comsol batch -np 104 -inputfile $inputfile -outputfile $outputfile –batchlog $logfile
+
+
+

Once this script file (e.g., submit_single_node_job.sh) is saved, it can be submitted to the job scheduler with

+
[user@kl3 ~]$ sbatch ./submit_single_node_job.sh
+
+

Running a Multi-Node COMSOL Model in Batch Mode#

+

To configure a COMSOL job with multiple MPI ranks, required for any job where the number of nodes >1, you can build on the following template:

+
+Example Multiprocess Submission Script +
#!/bin/bash                                                                                                                                                                                     
+#SBATCH --job-name="comsol-batch-multinode-hybrid"                                                                                                                                                  
+#SBATCH --nodes=4                                                                                                                                                                               
+#SBATCH --ntasks-per-node=8                                                                                                                                                                     
+#SBATCH --cpus-per-task=13                                                                                                                                                                      
+#SBATCH --time=00:10:0                                                                                                                                                                          
+#SBATCH --partition=debug                                                                                                                                                                       
+#SBATCH --exclusive                                                                                                                                                                             
+#SBATCH --account=<allocation handle>                                                                                                                                                                  
+#SBATCH --output="comsol-%j.out"                                                                                                                                                                
+#SBATCH --error="comsol-%j.err"                                                                                                                                                                 
+
+# This helps ensure your job runs from the directory                                                                                                                                            
+# from which you ran the sbatch command                                                                                                                                                         
+SLURM_SUBMIT_DIR= <your working directory>
+cd $SLURM_SUBMIT_DIR
+
+# Set up environment, and list to stdout for verification                                                                                                                                       
+module load comsol
+echo " "
+module list
+echo " "
+
+export SLURM_MPI_TYPE=pmi2
+export OMP_NUM_THREADS=13
+
+inputfile=$SLURM_SUBMIT_DIR/myinputfile.mph
+outputfile=$SLURM_SUBMIT_DIR/myoutputfilename
+logfile=$SLURM_SUBMIT_DIR/mylogfilename
+
+# Run a 4-node job with 32 MPI ranks and 13 OpenMP threads per each rank.                                                                                                                        
+comsol batch -mpibootstrap slurm -inputfile $inputfile -outputfile $outputfile –batchlog $logfile
+
+
+

The job script can be submitted to SLURM just the same as above for the single-node example. The option -mpibootstrap slurm helps COMSOL to deduce runtime parameters such as -nn, -nnhost and -np. For large jobs that require more than one node, this approach, which uses MPI and/or OpenMP, can be used to efficiently utilize the available resources. Note that in this case, we choose 32 MPI ranks, 8 per node, and each rank using 13 threads for demonstration purpose, but not as an optimal performance recommendation. The optimal configuration depends on your particular problem, workload, and choice of solver, so some experimentation may be required.

+

The Complex Systems Simulation and Optimization group has hosted introductory and advanced COMSOL trainings. The introductory training covered how to use the COMSOL GUI and run COMSOL in batch mode on Kestrel. The advanced training showed how to do a parametric study using different sweeps (running an interactive session is also included) and introduced equation-based simulation and parameter estimation. To learn more about using COMSOL on Kestrel, please refer to the training. The recording can be accessed at Computational Sciences Tutorials and the slides and models used in the training can be downloaded from Github.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/fenics/index.html b/Documentation/Applications/fenics/index.html new file mode 100644 index 000000000..28e9ead2c --- /dev/null +++ b/Documentation/Applications/fenics/index.html @@ -0,0 +1,5066 @@ + + + + + + + + + + + + + + + + + + + + + + + FEniCS - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

FEniCS/FEniCSx#

+

Documentation: FEniCS 2019.1.0, FEniCSx

+

FEniCS is a collection of open-source software components designed to enable the automated solution of differential equations by the finite element method.

+
+

Note

+

There are two version of FEniCS. The original FEniCS ended in 2019 with version 2019.1.0 and development began on a complete refactor known as FEniCSx. FEniCS 2019.1.0 is still actively used and the main focus of this documentation. Since FEniCSx is in pre-release, HPC support is a work in progress.

+
+

Getting Started#

+

FEniCS is organized as a collection of interoperable components that together form the FEniCS Project. These components include the problem-solving environment DOLFIN, the form compiler FFC, the finite element tabulator FIAT, the just-in-time compiler Instant, the form language UFL, and a range of additional components.

+

FEniCS can be programmed both in C++ and Python, but Python programming is the simplest approach to exploring FEniCS and can give high performance.

+

Currently, FEniCS is supported through Anaconda. Users are required to build their own FEniCS environment with the following commands after loading the conda/anaconda module (see Example Job Scripts):

+
module load conda
+conda create -n myfenics -c conda-forge fenics  matplotlib scipy jupyter 
+
+

The packages matplotlib, scipy, and jupyter are not required, but they are very handy to have.

+

These commands will create a new environment named myfenics which contains all necessary packages as well as some commonly-used packages for programming FEniCS simulations. By default, this Conda environment will be installed in the directory /home/<username>/.conda-envs/myfenics. It will take roughly 3 GB of storage. Please make sure you have enough storage quota in the home directory before installation by running the du -hs ~ command (which will take a minute or two to complete).

+

FEniCSx can also be installed via conda using: +

conda create -n myfenics -c conda-forge fenics-dolfinx
+

+

Example Job Scripts#

+
+Kestrel CPU +
#!/bin/bash
+
+# This test file is designed to run the Poisson demo on one node with a 4 cores
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module purge
+module load conda
+
+# This is to prevent FEniCS from unnecessarily attempting to multi-thread
+export OMP_NUM_THREADS=1
+
+cd /scratch/USERNAME/poisson_demo/
+srun -n 4 python poisson_demo.py
+
+
+
+Vermilion +
#!/bin/bash
+
+# This test file is designed to run the Poisson demo on one node with a 4 cores
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module purge
+module load anaconda3
+
+# This is to prevent FEniCS from unnecessarily attempting to multi-thread
+export OMP_NUM_THREADS=1
+
+cd /scratch/USERNAME/poisson_demo/
+srun -n 4 python poisson_demo.py
+
+
+
+Swift +
#!/bin/bash
+
+# This test file is designed to run the Poisson demo on one node with a 4 cores
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module purge
+module load conda
+
+# This is to prevent FEniCS from unnecessarily attempting to multi-thread
+export OMP_NUM_THREADS=1
+
+cd /home/USERNAME/poisson_demo/
+srun -n 4 python poisson_demo.py
+
+
+

To run this script, first download the Poisson demo here and place it in a folder titled "poisson_demo" in your scratch directory (home for Swift). Next, replace "USERNAME" in the script with your username. Then save the script as "demo_script.sh" and submit it with sbatch demo_script.sh. This demo is only supported by FEniCS 2019.1.0 and not FEniCSx.

+

Supported Versions#

+ + + + + + + + + + + + + + + +
KestrelVermilionSwift
2019.1.02019.1.02019.1.0
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/gams/index.html b/Documentation/Applications/gams/index.html new file mode 100644 index 000000000..8bcf93e2c --- /dev/null +++ b/Documentation/Applications/gams/index.html @@ -0,0 +1,5111 @@ + + + + + + + + + + + + + + + + + + + + + + + GAMS - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Using the General Algebraic Modeling System#

+

The General Algebraic Modeling System (GAMS) is a commercial high-level modeling system for mathematical programming and optimization. It is licensed software.

+

GAMS includes a DSL compiler and also a stable of integrated high-performance solvers. GAMS is able to solve complex, large-scale modeling problems. For documentation, forums, and FAQs, see the GAMS website.

+

A 60-user license of GAMS is made available to NREL users. This GAMS license requires users to be a member of the "gams workgroup." If you need the GAMS software package or a specific solver from GAMS, or if you have trouble running GAMS, please contact us.

+

Initializing Your Environment#

+

To initialize your environment to use GAMS, simply type module load gams/<version> — see module avail gams output to see available versions. GAMS is run with the command format gams <input filename>. A file <input filename>.lst will be created as the output file.

+

For a test run, in your home directory, type the following:

+
module load gams/<version>
+cp /nopt/nrel/apps/gams/example/trnsport.gms .
+gams trnsport
+
+

A result of 153.675 should be found from screen output. More detailed output is in the file trnsport.lst.

+

Selecting an Alternative Solver#

+

The available solvers for different procedures are shown in the following with the default solver being the first one:

+
    +
  • LP: GUROBI BDMLP CBC IPOPT SOPLEX
  • +
  • MIP: GUROBI BDMP CBC SCIP
  • +
  • RMIP: GUROBI BDMLP CBC IPOPT SOPLEX
  • +
  • NLP: SCIP COUENNE IPOPT
  • +
  • MCP: NLPEC MILES
  • +
  • MPEC: NLPEC
  • +
  • RMPEC: NLPEC
  • +
  • CNS: SCIP COUENNE IPOPT
  • +
  • DNLP: SCIP COUENNE IPOPT
  • +
  • RMINLP: SCIP COUENNE IPOPT
  • +
  • MINLP: SCIP BONMIN COUENNE
  • +
  • QCP: GUROBI COUENNE IPOPT SCIP
  • +
  • MIQCP: GUROBI BONMIN COUENNE SCIP
  • +
  • RMIQCP: GUROBI COUENNE IPOPT SCIP
  • +
  • EMP: JAMS LOGMIP SELKIE
  • +
+

By typing gams <input_filename> on the command line, the default procedure LP and the default solver Gurobi will be used. In order to override the default option to use, e.g., Soplex, you can try the following two methods:

+
    +
  1. +

    Use the option statement in your GAMS input file. For example, if your model input uses LP procedure and you want to use Gurobi solver to solve it, just add option lp=soplex to your input file.

    +
  2. +
  3. +

    Specify the solver in the command line, e.g., gams <input_filename> lp=soplex.

    +
  4. +
+

A sample script for batch submission is provided here:

+

Sample Submission Script

+
#!/bin/bash --login
+#SBATCH --name gams_run
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=36
+#SBATCH --time=00:05:00
+#SBATCH --account=<allocation-id>
+#SBATCH --error=gams-%j.err
+#SBATCH --output=gams-%j.out
+
+# Ensure script location
+cd $SLURM_SUBMIT_DIR
+
+# Create runtime environment
+module load gams/<version>
+
+# Run GAMS
+gams trnsport lp=gurobi
+
+

For a certain solver, necessary control parameters for the algorithm—such as convergence criteria—can be loaded from the option file named as <solver_name>.opt in the directory that you run GAMS. For example, for the Gurobi solver, its option file would be "gurobi.opt". For the details of how to set those parameters, please see the GAMS Solver Manuals.

+

Important Tip#

+

When using the Gurobi solver in GAMS, the user should NOT try to load the Gurobi module. Simply using "module load gams" will automatically load the Gurobi solver.

+

Using GAMS Python API#

+

For GAMS version < 40.0#

+

In order to use GAMS python API, the environment parameter $PYTHONPATH should include these two directories:

+

$GAMS_PYTHON_API_FILES/gams +$GAMS_PYTHON_API_FILES/api_[version-of-python]

+

where version-of-python = 27, 36, 37, or 38 for python version 2.7, 3.6, 3.7, or 3.8, respectively. The python version can be obtained by using command python --version.

+

For example, for python 3.7 and the bash shell, $PYTHONPATH can be set using the following script:

+
module purge
+module load gams/31.1.0
+if [ -z ${PYTHONPATH+x} ]
+then
+        export PYTHONPATH=$GAMS_PYTHON_API_FILES/api_37:$GAMS_PYTHON_API_FILES/gams
+else
+        export PYTHONPATH=$GAMS_PYTHON_API_FILES/api_37:$GAMS_PYTHON_API_FILES/gams:$PYTHONPATH
+fi
+
+

For GAMS version > 40.0#

+

The GAMS API can be installed using Anaconda and Pip. Please follow the instruction on the GAMS website. Currently GAMS supports python version 3.7~3.11. In general, it can be installed using the following command:

+
pip install gams[your choice of sub-module] --find-links $GAMS_PYTHON_API_FILES
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/gaussian/index.html b/Documentation/Applications/gaussian/index.html new file mode 100644 index 000000000..adbc8e62f --- /dev/null +++ b/Documentation/Applications/gaussian/index.html @@ -0,0 +1,5014 @@ + + + + + + + + + + + + + + + + + + + + + + + Gaussian - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Running Gaussian16 Software Jobs#

+

Learn about the Gaussian16 electronic structure program and how to run Gaussian16 jobs at NREL.

+
+

Important

+

To run Gaussian16, users must be a member of the Gaussian user group. To be added to the group, contact HPC-Help. In your email message, include your username and copy the following text agreeing not to compete with Gaussian, Inc.:

+
I am not actively developing applications for a competing software program, or for a project in 
+collaboration with someone who is actively developing for a competing software program. I agree 
+that Gaussian output cannot be provided to anyone actively developing for a competing software program.
+
+I agree to this statement.
+
+
+

Configuration and Default Settings#

+

NREL currently has Gaussian16 Revision C.01 installed, and the user manual can be found at the Gaussian website. Gaussian currently doesn't have support for H100 GPUs.

+

Previous Gaussian 09 users sometimes may feel Gaussian 16 runs slower than Gaussian 09. That's because Gaussian G16 has changed the default accuracy into Int=Acc2E=12 Grid=Ultrafine, which means that individual SCF iterations will take longer with G16 than with G09.

+

Sample Job Scripts#

+

Gaussian may be configured to run on one or more physical nodes, with or without shared memory parallelism. Distributed memory, parallel setup is taken care of automatically based on settings in the SLURM script example below.

+
+Sample Submission Script +
#!/bin/bash
+#SBATCH --job-name G16_test
+#SBATCH --nodes=2
+#SBATCH --time=1:00:00
+#SBATCH --account=[your account]
+#SBATCH --error=std.err
+#SBATCH --output=std.out
+#SBATCH --exclusive
+#SBATCH -p debug
+
+# Load Gaussian module to set environment
+module load gaussian python
+module list
+
+cd $SLURM_SUBMIT_DIR
+
+INPUT_BASENAME=G16_test
+GAUSSIAN_EXEC=g16
+
+if [ -e /dev/nvme0n1 ]; then
+SCRATCH=$TMPDIR
+echo "This node has a local storage and will use $SCRATCH as the scratch path"
+else
+SCRATCH=/scratch/$USER/$SLURM_JOB_ID
+echo "This node does not have a local storage drive and will use $SCRATCH as the scratch path"
+fi
+
+mkdir -p $SCRATCH
+
+export GAUSS_SCRDIR=$SCRATCH
+
+# Run gaussian NREL script (performs much of the Gaussian setup)
+g16_nrel
+
+#Setup Linda parameters
+if [ $SLURM_JOB_NUM_NODES -gt 1 ]; then 
+export GAUSS_LFLAGS='-vv -opt "Tsnet.Node.lindarsharg: ssh"' 
+export GAUSS_EXEDIR=$g16root/g16/linda-exe:$GAUSS_EXEDIR 
+fi 
+
+# Run Gaussian job 
+$GAUSSIAN_EXEC < $INPUT_BASENAME.com >& $INPUT_BASENAME.log 
+
+rm $SCRATCH/*
+rmdir $SCRATCH
+
+
+

This script and sample Gaussian input are located at /nopt/nrel/apps/gaussian/examples. The gaussian module is loaded by the script automatically, so the user does not need to have loaded the module before submitting the job. The g16_nrel python script edits the Default.Route file based on the SLURM environment set when the script is submitted to the queue. The user also must supply the name of the input file (INPUT_BASENAME).

+

The user scratch space is set to a directory in the default scratch space, with a name containing the job ID so different jobs will not overwrite the disk space. The default scratch space is /tmp/scratch when a local disk is available or /scratch/$USER. The script sets the directories for scratch files and environment variables needed by Gaussian (eg GAUSS_SCRDIR).

+

To submit a job with the example script, named g16.slurm, one would type:

+

sbatch g16.slurm

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/gurobi/index.html b/Documentation/Applications/gurobi/index.html new file mode 100644 index 000000000..8bb731e59 --- /dev/null +++ b/Documentation/Applications/gurobi/index.html @@ -0,0 +1,5021 @@ + + + + + + + + + + + + + + + + + + + + + + + Gurobi - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Gurobi#

+

Gurobi Optimizer is a suite of solvers for mathematical programming.

+

For documentation, forums, and FAQs, see the Gurobi +website.

+

Gurobi includes a linear programming solver (LP), quadratic programming solver +(QP), quadratically constrained programming solver (QCP), mixed-integer linear +programming solver (MILP), mixed-integer quadratic programming solver (MIQP), +and a mixed-integer quadratically constrained programming solver (MIQCP).

+

Gurobi is available on multiple systems. There are 24 license tokens available for +concurrent use - 6 are for general use (including commercial) and 18 standalone license tokens +are for academic/government use. After logging +onto the appropriate cluster, load the default Gurobi module using +module load gurobi. The Gurobi interactive shell is run by typing +"gurobi.sh". Gurobi can also be interfaced with C/C++/Java/MATLAB/R codes by +linking with the Gurobi libraries.

+
+

Tip

+

You can check how many Gurobi licenses are available for use by running the following command +after loading the Gurobi module +

gurobi_cl -t
+

+
+

For details on Gurobi programming, see the Gurobi Resource +Center and Gurobi +documentation.

+

Available Modules#

+ + + + + + + + + + + + + + + + + + + + + + + + + +
KestrelSwift
gurobi/11.0.0
gurobi/10.0.2
gurobi/10.0.1
gurobi/9.5.1gurobi/9.5.1
+

Gurobi and MATLAB#

+

To use the Gurobi solver with MATLAB, make sure you have the Gurobi and MATLAB +environment modules loaded, then issue the following two commands from the +MATLAB prompt or your script:

+
>> grb = getenv('GRB_MATLAB_PATH')
+>> path(path,grb)
+
+

Gurobi and General Algebraic Modeling System#

+

The General Algebraic Modeling System (GAMS) is a high-level modeling system for +mathematical programming and optimization. The GAMS package installed at NREL +includes Gurobi solvers. For more information, see using GAMS.

+

Note that the Gurobi license for this interface is separate from the standalone +Gurobi license, and supports far more instances.

+
+

Important

+

When using the Gurobi solver in GAMS, the user should NOT load the +Gurobi module. Simply using "module load gams" will be enough to load the +required Gurobi components and access rights.

+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/idaes_solvers/index.html b/Documentation/Applications/idaes_solvers/index.html new file mode 100644 index 000000000..d32dec1a5 --- /dev/null +++ b/Documentation/Applications/idaes_solvers/index.html @@ -0,0 +1,5154 @@ + + + + + + + + + + + + + + + + + + + + + + + IDAES Solvers - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

IDAES Solvers#

+

Institute for Design of Advanced Energy Systems (IDAES) Solvers are a collection of pre-compiled optimizer binaries with efficient linear algebra solvers that enable solving a variety of MINLP problems.

+

Available optimizers include:

+
    +
  1. Bonmin
  2. +
  3. CBC
  4. +
  5. CLP
  6. +
  7. Couenne
  8. +
  9. IPOPT + HSL
  10. +
+

Available Modules#

+
+

Info

+

IDAES solvers are currently not available on GPU compute nodes.

+
+ + + + + + + + + + + + + + + + + +
Kestrel (CPU nodes)
idaes_solvers/3.4.0-cray-libsci
idaes_solvers/3.4.0-intel-oneapi-mkl
idaes_solvers/3.4.0-netlib-lapack
+

v3.4.0#

+

IDAES Solvers v3.4.0 contains the following optimizer versions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
OptimizerVersion
Bonmin1.8.8
CBC2.10.10
CLP1.17.8
Couenne0.5.8
IPOPT + HSL3.13.2
+
+

Note

+

IPOPT is available with performant HSL MA27, MA57, and MA97 linear solvers. These have been shown to perform better than the default MUMPS solver for a variety of renewable energy optimization problems. Please see documentation here.

+
+

Usage#

+

Users can run any of the IDAES solvers simply by loading the appropriate module, e.g.,

+
module load idaes_solvers/3.4.0-cray-libsci # OR 
+module load idaes_solvers/3.4.0-netlib-lapack # OR
+module load idaes_solvers/3.4.0-intel-oneapi-mkl
+
+

Bonmin#

+

Bonmin (Basic Open-source Nonlinear Mixed Integer) is an open source solver that leverages CBC and IPOPT to solve general mixed integer nonlinear programs (MINLP). +Please refer to the Bonmin documentation here

+

CBC#

+

COIN-OR Branch and Cut (CBC) solver is an opensource optimizer for solving mixed integer programs (MIP). Please refer to the documentation here for more details.

+

CLP#

+

COIN-OR Linear Program (CLP) is an open-source solver for solving linear programs. Please refer to the documentaion here for further details.

+

Couenne#

+

Convex Over and Under Envelopes for Nonlinear Estimation (Couenne) is an open-source mixed integer nonlinear programming (MINLP) global optimization solver. Please visit the following website for more details regarding the solver.

+

IPOPT + HSL#

+

Interior Point Optimizer (IPOPT) is an open-source nonlinear optimizer. +Harwell Subroutine Library (HSL) is a collection of efficient linear solvers used by IPOPT. +HSL solvers have been demonstrated to be more performant than the default MUMPS (Multifrontal Massively Parallel sparse direct Solver) solver that comes with IPOPT, and are highly recommended.

+

IPOPT that is distributed as part of IDAES solvers comes pre-compiled with 3 HSL solvers:

+
    +
  1. MA27 is a serial linear solver suitable for small problems
  2. +
  3. MA57 has threaded BLAS operations and is suitable for small to medium-sized problems.
  4. +
  5. MA97 is a parallel direct linear solver for sparse symmetric systems. It is more suitable for medium and large problem sizes. Users will may see worse performance on small problems when compared to MA27 and MA57.
  6. +
+

All three solvers produce repeatable answers unlike their sibling MA86.

+
+

Info

+

For additional details regarding IPOPT on Kestrel, e.g., building a custom version, please visit here. Please click here for additional details regarding HSL solvers on Kestrel.

+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/index.html b/Documentation/Applications/index.html new file mode 100644 index 000000000..f44d99d0b --- /dev/null +++ b/Documentation/Applications/index.html @@ -0,0 +1,4950 @@ + + + + + + + + + + + + + + + + + + + + + + + Applications - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Applications#

+

NREL maintains a variety of applications for use on the HPC systems. Please see the navigation bar on the left under "Applications" for more information on a specific application.

+

These applications can be accessed through environment modules on the systems. Some may not be available on all systems, and there may be some additional packages installed that don't have a dedicated page here. Please run the module avail command on a system to see what is available.

+

The following are packages that the NREL Computational Science Center supports:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameDescription
AnsysEnables modeling, simulation, and visualization of flow, turbulence, heat transfer and reactions for industrial applications
AMR-WindA massively parallel, block-structured adaptive-mesh, incompressible flow solver for wind turbine and wind farm simulations
BonminOpen source solver that leverages CBC and IPOPT to solve general mixed integer nonlinear programs (MINLP)
CBCOpen-source optimizer for solving mixed integer programs (MIP)
CLPOpen-source linear program solver
COMSOLMultiphysics simulation environment
ConvergeHPC CFD+, focused on engine modeling and simulation
CouenneOpen-source mixed integer nonlinear programming (MINLP) global optimization solver
FEniCSSolving partial differential equations by the finite element method
GAMSHigh-level modeling system for mathematical programming and optimization
GaussianProgram for calculating molecular electronic structure and reactivity
GurobiSolver for mathematical programming
IPOPTOpen-source interior point nonlinear optimizer
LAMMPSOpen-source classical molecular dynamics program designed for massively parallel systems
MATLABGeneral technical computing framework
OpenFOAMSoftware for computational fluid dynamics
PLEXOSSimulation software for modeling electric, gas, and water systems for optimizing energy markets
Q-Chemab initio quantum chemistry package for predicting molecular structures, reactivities, and vibrational, electronic and NMR spectra
STAR-CCM+Engineering simulation package from CD-adapco for solving problems involving flow of fluids or solids, heat transfer, and stress
VASPAtomic scale materials modeling, e.g., electronic structure calculations and quantum-mechanical molecular dynamics, from first principles
WRFMesoscale numerical weather prediction system designed for both atmospheric research and operational forecasting applications
XpressOptimization algorithms and technologies to solve linear, mixed integer and non-linear problems
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/ipopt/index.html b/Documentation/Applications/ipopt/index.html new file mode 100644 index 000000000..ca1eb2ef0 --- /dev/null +++ b/Documentation/Applications/ipopt/index.html @@ -0,0 +1,5252 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + IPOPT - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

IPOPT#

+

IPOPT (Interior Point OPTimizer, pronounced "Eye-Pea-Opt") is an open-source non-linear optimizer using the interior point method.

+

IPOPT is commonly used in solving power flow, e.g., AC Optimal Power Flow, and controls problems. Please refer to their project website for the source code. The documentation can be found here.

+
+

Note

+

IPOPT with HSL linear solvers is available as a module on Kestrel. Please see IDAES Solvers for additional details. We recommend using the system module for ease-of-use and only build if the module does not meet your needs.

+
+

Installation from source#

+
+

Info

+

We advise building all applications on a compute node using an interactive session. Please see Running Interactive Jobs for additional details.

+
+

Optional Pre-requisites#

+

We will build IPOPT using all prerequisites mentioned below. Users may pick and +choose depending on their needs.

+

Metis#

+

It is highly recommended to install Metis +- Serial Graph Partitioning and Fill-reducing Matrix Ordering software to +improve the performance of linear solvers such as MUMPS and HSL.

+
+

Warning

+

Using HSL linear solvers requires installing Metis. Metis is optional for MUMPS.

+
+

We will install Metis using Anaconda. However, it can also be installed from source. +To install using Anaconda, we will create a clean environment with only Metis. +For this example, the conda environment is being constructed within a directory in the hpcapps project on +Kestrel. Users can create a conda environment in any place of their choice.

+
module load conda
+conda create -p /projects/hpcapps/kpanda/conda-envs/metis python
+conda activate /projects/hpcapps/kpanda/conda-envs/metis
+conda install conda-forge::metis
+
+

Coinbrew#

+

Coinbrew is a package manager to install +COIN-OR tools. It makes installing IPOPT and its dependencies easier. However, it +is not necessary to the installation if one clones the repositories individually. +A user can download coinbrew by running the following command

+
wget https://raw.githubusercontent.com/coin-or/coinbrew/master/coinbrew
+
+

Intel oneAPI MKL#

+

Intel oneAPI MKL provides BLAS and LAPACK libraries for efficient linear algebra. +Additionally, it also provides access to oneMKL PARDISO linear solver that is +compatible with IPOPT.

+
+

Note

+

oneMKL PARDISO is not available on Kestrel GPU nodes since they consist of AMD processors.

+
+

HSL#

+

HSL (Harwell Subroutine Library) is a set of linear solvers +that can greatly accelerate the speed of optimization over other linear solvers, e.g., MUMPS. +HSL can be installed separately as well using ThirdParty-HSL. +Please see here for installation on Kestrel.

+

Installation#

+

In this demonstration, we will install Ipopt within +/projects/msoc/kpanda/apps/Ipopt/install. However, one is free to set their +install directory as they wish. Starting with the base working directory +/projects/msoc/kpanda/apps/ we will do the following

+
cd /projects/msoc/kpanda/apps/ # go into the base working directory
+wget https://raw.githubusercontent.com/coin-or/coinbrew/master/coinbrew # install coinbrew
+coinbrew fetch Ipopt # Fetch Ipopt and its dependencies
+
+

This will download 2 additional directories Ipopt and ThirdParty. +ThirdParty, furthermore, contains 3 subdirectories ASL, HSL, and Mumps. +The source code of all but HSL will be downloaded.

+

Next, we will create our install directories and subdirectories

+
mkdir -p /projects/msoc/kpanda/apps/Ipopt/install # create the install directory
+cd /projects/msoc/kpanda/apps/Ipopt/install # enter the directory
+mkdir bin lib include # create some subdirectories
+
+

We then add symbolic links to Metis in the install directory.

+
+

Note

+

If libmetis.so is in your LD_LIBRARY_PATH you do not need to do this step.

+
+
cd /projects/msoc/kpanda/apps/Ipopt/install/lib
+ln -s /projects/hpcapps/kpanda/conda-envs/metis/lib/libmetis.so libmetis.so
+cd ../include
+ln -s /projects/hpcapps/kpanda/conda-envs/metis/include/metis.h metis.h
+cd /projects/msoc/kpanda/apps/ # go back base directory
+
+

This has two advantages. +First, we don't need to add /projects/hpcapps/kpanda/conda-envs/metis/lib/ to +the LD_LIBRARY_PATH. The second advantage is that anaconda puts all the +environments libraries and include files in the same directories with +libmetis.so and metis.h. Many of these libraries overlap with those used +by HSL, Mumps and IPOPT but are not necessarily the same versions. Loading a +different version of a library than those compiled against can cause unexpected behavior.

+

Next, we will load additional modules. If users require oneMKL PARDISO or would +like to leverage intel performance optimization, run the following commands

+
module load intel-oneapi-mkl
+
+

Alternatively, users can load the open source Netlib LAPACK using the command

+
module load netlib-lapack # Please ensure you do not have intel-oneapi-mkl loaded
+
+

We will now copy the HSL source code tarball into +/projects/msoc/kpanda/apps/ThirdParty/HSL/, unpack it, and rename or (create a +symbolic link to the unpacked directory) as coinhsl.

+

We are now ready to install IPOPT and its dependencies. We will use the default +compilers available in the Kestrel programming environment. Going back to the base +directory, we will run the following commands

+
cd /projects/msoc/kpanda/apps/ # go back base directory
+./coinbrew build Ipopt --disable-java \
+--prefix=/kfs2/projects/msoc/kpanda/apps/Ipopt/install \
+--with-metis \
+--with-metis-cflags=-I/projects/hpcapps/kpanda/conda-envs/metis/include \
+--with-metis-lflags="-L/projects/hpcapps/kpanda/conda-envs/metis/lib -lmetis" \
+--parallel-jobs 4 \
+--verbosity 4 \
+--reconfigure
+
+

Usage#

+

The installed Ipopt is now ready to be used. We need to update our PATH AND +LD_LIBRARY_PATH environment variables. In our demonstrations this will be

+
export PATH=/projects/msoc/kpanda/apps/Ipopt/install/bin:${PATH}
+export LD_LIBRARY_PATH=/projects/msoc/kpanda/apps/Ipopt/install/lib:${LD_LIBRARY_PATH}
+
+
+

Note

+

Do not forget to load intel-oneapi-mkl or netlib-lapack before running IPOPT else your runs will fail.

+
+

Using Custom IPOPT with JuMP#

+

To use our custom installation of IPOPT with Ipopt.jl, we do the following:

+
    +
  1. Open the Julia REPL and activate an environment that has IPOPT installed
  2. +
  3. Tell Julia and Ipopt.jl the location of our IPOPT library and executable +
    ENV["JULIA_IPOPT_LIBRARY_PATH"] = ENV["/projects/msoc/kpanda/apps/Ipopt/install/lib"]
    +ENV["JULIA_IPOPT_EXECUTABLE_PATH"] = ENV["/projects/msoc/kpanda/apps/Ipopt/install/bin"]
    +
  4. +
  5. Rebuild Ipopt.jl with the above environment variables set to pick up the new library and executable +
    using Pkg; Pkg.build("Ipopt");
    +
  6. +
  7. Print the path Ipopt.jl has stored for libipopt.so. This should be the location of your compiled version. +
    using Ipopt; println(Ipopt.libipopt_path)
    +
  8. +
+
+

Info

+

The IPOPT build that comes with Ipopt.jl seems to expect the HSL library to have the name libhsl.so. The repo ThirdParty-HSL builds the library libcoinhsl.so. The simplest fix is to do the following:

+
cd /projects/msoc/kpanda/apps/Ipopt/install/lib # install directory
+# Create a symbolic link called libhsl.so
+ln -s libcoinhsl.so libhsl.so
+
+
+

The following Julia code is useful for testing the HSL linear solvers are working

+
using JuMP, IPOPT
+
+m = JuMP.Model(()->IPOPT.Optimizer(linear_solver="ma97"))
+@variable(m, x)
+@objective(m, Min, x^2)
+JuMP.optimize!(m)
+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/lammps/index.html b/Documentation/Applications/lammps/index.html new file mode 100644 index 000000000..96a7c72c9 --- /dev/null +++ b/Documentation/Applications/lammps/index.html @@ -0,0 +1,4977 @@ + + + + + + + + + + + + + + + + + + + + + + + LAMMPS - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Using LAMMPS Software#

+

Learn how to use LAMMPS software — an open-source, classical molecular dynamics program designed for massively parallel systems. It is distributed by Sandia National Laboratories.

+

LAMMPS has numerous built-in potentials for simulations of solid-state, soft matter, and coarse-grained systems. It can be run on a single processor or in parallel using MPI. To learn more, see the LAMMPS website.

+

The most recent version of LAMMPS on Eagle and Swift at the time of this page being published is the 23Jun22 version. The following packages have been installed in this version: asphere, body, bocs, class2, colloid, dielectric, diffraction, dipole, dpd-basic, drude, eff, electrode, extra-fix, extra-pair, fep, granular, h5md, intel, interlayer, kspace, manifold, manybody, mc, meam, misc, molecule, mpiio, openmp, opt, python, phonon, qep, qmmm, reaction, reaxff, replica, rigid, shock, spin, voronoi.

+

Sample Slurm Script#

+

A sample Slurm script for LAMMPS is given below:

+
+Sample Slurm script +
#!/bin/bash
+#SBATCH --time=48:00:00 
+#SBATCH --nodes=4
+#SBATCH --job-name=lammps_test
+#SBATCH --output=std.out
+#SBATCH --error=std.err
+
+module purge
+module load lammps/20220623 
+cd $SLURM_SUBMIT_DIR
+
+srun -n 144 lmp -in lmp.in -l lmp.out
+
+
+

where lmp.inp is the input and lmp.out is the output. This runs LAMMPS using four nodes with 144 cores.

+

Additional Resources#

+

For instructions on running LAMMPS with OpenMP, see the HPC Github code repository.

+

Contact#

+

If you need other packages, please contact us.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/namd/index.html b/Documentation/Applications/namd/index.html new file mode 100644 index 000000000..a64ccb243 --- /dev/null +++ b/Documentation/Applications/namd/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Namd - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Namd

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/openfoam/index.html b/Documentation/Applications/openfoam/index.html new file mode 100644 index 000000000..66e137bac --- /dev/null +++ b/Documentation/Applications/openfoam/index.html @@ -0,0 +1,4988 @@ + + + + + + + + + + + + + + + + + + + + + + + OpenFOAM - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

OpenFOAM#

+

OpenFOAM Installation#

+

Building OpenFOAM with cray-mpich and gcc#

+

Instructions for installing OpenFOAM are available here.

+

In the instructions, you will be cloning the OpenFOAM folder which we will refer to as $OPENFOAM.

+

In order to build OpenFOAM with cray-mpich, two files need to be edited.

+
    +
  1. +

    $OPENFOAM/etc/bashrc

    +

    In this file, the variable WM_MPLIB will be defined as MPICH. +Search for the line where the variable is exported and replace it with

    +
    export WM_MPLIB=MPICH
    +
    +
  2. +
  3. +

    $OPENFOAM/etc/config.sh/mpi

    +

    This file defines where mpich is defined on the system. +You will search for the mpich definition block and replace it with

    +
    export MPI_ARCH_PATH=/opt/cray/pe/mpich/8.1.28/ofi/gnu/10.3
    +export LD_LIBRARY_PATH="${MPI_ARCH_PATH}/lib:${LD_LIBRARY_PATH}"
    +export PATH="${MPI_ARCH_PATH}/bin:${PATH}"
    +export FOAM_MPI=mpich-8.1.28
    +export MPI_HOME=/opt/cray/pe/mpich/8.1.28/ofi/gnu/10.3
    +#export FOAM_MPI=mpich2-1.1.1p1
    +#export MPI_HOME=$WM_THIRD_PARTY_DIR/$FOAM_MPI
    +#export MPI_ARCH_PATH=$WM_THIRD_PARTY_DIR/platforms/$WM_ARCH$WM_COMPILER/$FOAM_MPI
    +
    +
    +_foamAddPath    $MPI_ARCH_PATH/bin
    +
    +
    +# 64-bit on OpenSuSE 12.1 uses lib64 others use lib
    +_foamAddLib     $MPI_ARCH_PATH/lib$WM_COMPILER_LIB_ARCH
    +_foamAddLib     $MPI_ARCH_PATH/lib
    +
    +
    +_foamAddMan     $MPI_ARCH_PATH/share/man
    +;;
    +
    +
  4. +
+

Before you install OpenFOAM, make sure to load Prgenv-gnu. +This will load gcc and cray-mpich. +Make sure the same module is loaded at runtime.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/qchem/index.html b/Documentation/Applications/qchem/index.html new file mode 100644 index 000000000..f7b0801b3 --- /dev/null +++ b/Documentation/Applications/qchem/index.html @@ -0,0 +1,4965 @@ + + + + + + + + + + + + + + + + + + + + + + + Q-Chem - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Using Q-Chem#

+

Q-Chem is a comprehensive ab initio quantum chemistry package with special strengths in excited state methods, non-adiabatic coupling, solvation models, explicitly correlated wave-function methods, and cutting-edge density functional theory (DFT).

+

Running Q-Chem#

+

The q-chem module should be loaded to set up the necessary environment. The module help output can provide more detail. In particular, the modulefile does not set the needed environment variable QCSCRATCH, as this is likely unique for each run. QCLOCALSCR is set by default to /tmp/scratch, but one may wish to point to a more persistent location if files written to local scratch need to be accessed after the job completes. Users can easily do this in their Slurm scripts or at the command line via export (Bash) or setenv (csh).

+

The simplest means of starting a Q-Chem job is via the supplied qchem wrapper. The general syntax is:

+

qchem -slurm <-nt number_of_OpenMP_threads> <input file> <output file> <savename>

+

For example, to run a job with 36 threads:

+

qchem -slurm -nt 36 example.in

+
+

Note

+

The Q-Chem input file must be in the same directory in which you issue the qchem command. In other words, qchem ... SOMEPATH/<input file> won't work.

+
+

For a full list of which types of calculation are parallelized and the types of parallelism, see the Q-Chem User's Manual.

+

To save certain intermediate files for, e.g., restart, a directory name needs to be provided. If not provided, all scratch files will be automatically deleted at job's end by default. If provided, a directory $QCSCRATCH/savename will be created and will hold saved files. In order to save all intermediate files, you can add the -save option.

+

A template Slurm script to run Q-Chem with 36 threads is:

+
+Sample Submission Script +
#SBATCH --job-name=my_qchem_job
+#SBATCH --account=my_allocation_ID
+#SBATCH --ntasks=36
+#SBATCH --time=01:00:00
+#SBATCH --mail-type=BEGIN,END,FAIL
+#SBATCH --mail-user=your_email@domain.name
+#SBATCH --output=std-%j.out
+#SBATCH --error=std-%j.err
+
+# Load the Q-Chem environment
+module load q-chem
+
+# Go to the location of job files, presumably from where this file was submitted
+cd $SLURM_SUBMIT_DIR
+
+# Set up scratch space
+SCRATCHY=/scratch/$USER/${SLURM_JOB_NAME:?}
+if [ -d $SCRATCHY ]
+then
+   rm -r $SCRATCHY
+fi
+mkdir -p $SCRATCHY
+export QCSCRATCH=$SCRATCHY
+
+# Move files over
+cp * $SCRATCHY/.
+cd $SCRATCHY
+
+# Start run. Keep restart files without intermediate temp files in directory called "my_save"
+qchem -nt 36 job.in job.out my_save
+
+
+

To run this script on Swift, the number of threads can be changed to 64.

+

A large number of example Q-Chem input examples are available in /nopt/nrel/apps/q-chem/<version>/samples.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/starccm/index.html b/Documentation/Applications/starccm/index.html new file mode 100644 index 000000000..e780e2943 --- /dev/null +++ b/Documentation/Applications/starccm/index.html @@ -0,0 +1,5078 @@ + + + + + + + + + + + + + + + + + + + + + + + STAR-CCM+ - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + +

Running STAR-CCM+ Software#

+

Simcenter STAR-CCM+ is a multiphysics CFD software that enables CFD engineers to model the complexity and explore the possibilities of products operating under real-world conditions. For information about the software's features, see the STAR-CCM+ +website.

+

STAR-CCM+ is installed on Kestrel but it is not supported on Vermilion or Swift. The network +licenses are checked out from the license server running on 1wv11lic02.nrel.gov.

+
+

Important

+

To run STAR-CCM+, users must be a member of the STAR-CCM+ user group. To be added to the group, contact HPC-Help.

+
+

Running STAR-CCM+ in GUI#

+

STAR-CCM+ can be run interactively on Kestrel using X windows by running the following commands in the terminal of an X window.

+
module load starccm
+starccm+
+
+

Running STAR-CCM+ in Batch Mode#

+

To run STAR-CCM+ in batch mode, first, you need to build your simulation <your_simulation.sim> and +put it in your project directory:

+
ls /projects/<your_project>/sim_dir
+your_simulation.sim
+
+

Then you need to create a Slurm script <your_scriptfile> as shown below to submit the job:

+
+Example Submission Script +
#!/bin/bash -l
+#SBATCH --time=2:00:00             # walltime limit of 2 hours
+#SBATCH --nodes=2                  # number of nodes
+#SBATCH --ntasks-per-node=104       # number of tasks per node (<=104 on Kestrel)
+#SBATCH --ntasks=72                # total number of tasks
+#SBATCH --job-name=your_simulation # name of job
+#SBATCH --account=<allocation-id>  # name of project allocation
+
+module load starccm                # load starccm module
+
+rm -rf /projects/<your_project>/sim_dir/simulation.log   # remove the log file from last run
+# Run Job
+
+echo "------ Running Starccm+ ------"
+
+starccm+ -np $SLURM_NTASKS -batch /projects/<your_project>/sim_dir/your_simulation.sim >> simulation.log
+
+echo "------ End of the job ------"
+
+
+

Note that you must give the full path of your input file in the script.

+

By default, STAR-CCM+ uses OpenMPI. However, the performance of OpenMPI on Kestrel is poor when running on multiple nodes. Intel MPI and Cray MPI are recommended for STAR-CCM+ on Kestrel. Cray MPI is expected to have a better performance than Intel MPI.

+

Running STAR-CCM+ with Intel MPI#

+

STAR-CCM+ comes with its own Intel MPI. To use the Intel MPI, the Slurm script should be modified to be:

+
+Example Intel MPI Submission Script +
#!/bin/bash -l
+#SBATCH --time=2:00:00             # walltime limit of 2 hours
+#SBATCH --nodes=2                  # number of nodes
+#SBATCH --ntasks-per-node=104       # number of tasks per node (<=104 on Kestrel)
+#SBATCH --ntasks=72                # total number of tasks
+#SBATCH --job-name=your_simulation # name of job
+#SBATCH --account=<allocation-id>  # name of project allocation
+
+module load starccm                # load starccm module
+
+export UCX_TLS=tcp                 # telling IntelMPI to treat the network as ethernet (Kestrel Slingshot can be thought of as ethernet) 
+                                   # by using the tcp protocol
+
+rm -rf /projects/<your_project>/sim_dir/simulation.log   # remove the log file from last run
+# Run Job
+
+echo "------ Running Starccm+ ------"
+
+starccm+ -mpi intel -np $SLURM_NTASKS -batch /projects/<your_project>/sim_dir/your_simulation.sim >> simulation.log
+
+echo "------ End of the job ------"
+
+
+

We are specifying the MPI to be Intel MPI in the launch command. By default, Intel MPI thinks the network on which it is running is Infiniband. Kestrel’s is Slingshot, which you can think of as ethernet on steroids. The command export UCX_TLS=tcp is telling Intel MPI to treat the network as ethernet by using the tcp protocol.

+

To modify the settings for built-in Intel MPI, users can refer to the documentation of STAR-CCM by running starccm+ --help.

+

Running STAR-CCM+ with Cray MPI#

+

STAR-CCM+ can run with Cray MPI. The following Slurm script submits STAR-CCM+ job to run with Cray MPI.

+
+Example Cray MPI Script +
#!/bin/bash -l
+#SBATCH --time=2:00:00             # walltime limit of 2 hours
+#SBATCH --nodes=2                  # number of nodes
+#SBATCH --ntasks-per-node=104       # number of tasks per node (<=104 on Kestrel)
+#SBATCH --ntasks=72                # total number of tasks
+#SBATCH --job-name=your_simulation # name of job
+#SBATCH --account=<allocation-id>  # name of project allocation
+
+module load starccm                # load starccm module
+
+rm -rf /projects/<your_project>/sim_dir/simulation.log   # remove the log file from last run
+# Run Job
+
+echo "------ Running Starccm+ ------"
+
+starccm+ -mpi crayex -np $SLURM_NTASKS -batch /projects/<your_project>/sim_dir/your_simulation.sim >> simulation.log
+
+echo "------ End of the job ------"
+
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/vasp/index.html b/Documentation/Applications/vasp/index.html new file mode 100644 index 000000000..99828b021 --- /dev/null +++ b/Documentation/Applications/vasp/index.html @@ -0,0 +1,5914 @@ + + + + + + + + + + + + + + + + + + + + + + + VASP - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

VASP

+ +

The Vienna Ab initio Simulation Package (VASP) is an application for atomic scale materials modelling from first principles. VASP computes an approximate solution to the many-body Schrödinger equation, either within density functional theory or within the Hartree-Fock approximation using pseudopotentials and plane wave basis sets. VASP can carry out a range of electronic structure and quantum-mechanical molecular dynamics calculations and has many features including hybrid functionals, Green's functions methods (GW quasiparticles, and ACFDT-RPA) and many-body perturbation theory (2nd-order Møller-Plesset). For a full list of capabilities, please see the About VASP page and for further details, documentation, forums, and FAQs, visit the VASP website.

+

Accessing VASP on NREL's HPC Clusters#

+
+

Important

+

The VASP license requires users to be a member of a "workgroup" defined by the University of Vienna or Materials Design. If you are receiving "Permission denied" errors when trying to use VASP, you must be made part of the "vasp" Linux group first. To join, please contact HPC Help with the following information:

+

- Your name
+- The workgroup PI
+- Whether you are licensed through Vienna (academic) or Materials Design, Inc. (commercial)
+- If licensed through Vienna:
+    - The e-mail address under which you are registered with Vienna as a workgroup member (this may not be the e-mail address you used to get an HPC account)
+    - Your VASP license ID
+- If licensed through Materials Design:
+    - Proof of current licensed status
+
+Once status can be confirmed, we can provide access to our VASP builds.

+
+

Getting Started#

+

VASP is available through modules on all HPC systems. To view the available versions of VASP modules on each cluster, use the command module avail vasp. To see details for a specific version, use module show vasp/<version>. To load a specific version, use module load vasp/<version>. If no version is specified, the default module (marked with "(D)") will be loaded. In the following sections, we will give sample submission scripts and performance recommendations. To run VASP, the following 4 input files are needed: POSCAR, POTCAR, INCAR, KPOINTS. For more information about VASP input files, see the VASP wiki.

+

Each VASP module provides three executables where the correct one should be chosen for the type of job:

+
    +
  1. +

    vasp_std is for general k-point meshes with collinear spins

    +
  2. +
  3. +

    vasp_ncl is for general k-point meshes with non-collinear spins

    +
  4. +
  5. +

    vasp_gam is for Gamma-point-only calculations

    +
  6. +
+

NREL also offers build and module support for additional functionalities such as transition state theory tools from University of Texas-Austin, implicit solvation models from the University of Florida, and BEEF-vdw functionals. Please contact HPC-Help if a functionality you need is not present in one of our builds.

+
+

Attention

+

If you would like to build your own VASP on Kestrel, please read our section Building VASP on Kestrel carefully before compiling on Kestrel's cray architecture.

+
+

Supported Versions#

+

NREL offers modules for VASP 5 and VASP 6 on CPUs as well as GPUs on certain systems. See table below for current availability, as well as system specific documentation for more details on running different builds.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KestrelSwiftVermilion
VASP 5XX
VASP 6XXX
VASP 6 GPUXXX
+

VASP on Kestrel#

+

Running Using Modules#

+

CPU#

+

There are several modules for CPU builds of VASP 5 and VASP 6. As of 08/09/2024 we have released new modules for VASP on Kestrel CPUs:

+
CPU $ module avail vasp
+
+------------ /nopt/nrel/apps/cpu_stack/modules/default/application -------------
+   #new modules:
+   vasp/5.4.4+tpc     vasp/6.3.2_openMP+tpc    vasp/6.4.2_openMP+tpc
+   vasp/5.4.4_base    vasp/6.3.2_openMP        vasp/6.4.2_openMP
+
+   # Legacy modules will be removed during system time in December!
+   vasp/5.4.4         vasp/6.3.2               vasp/6.4.2            (D)
+
+

What’s new:

+
    +
  • New modules have been rebuilt with the latest Cray Programming Environment (cpe23), updated compilers, and math libraries.
  • +
  • OpenMP capability has been added to VASP 6 builds.
  • +
  • Modules that include third-party codes (e.g., libXC, libBEEF, VTST tools, and VASPsol) are now denoted with +tpc. Use module show vasp/<version> to see details of a specific version.
  • +
+

We encourage users to switch to the new builds and strongly recommend using OpenMP parallelism.

+
+

Important: Conserving your AUs on Kestrel

+

Kestrel nodes have nearly 3x as many cores as Eagle's did. Our testing has indicated VASP DFT jobs up to 200 atoms run more efficiently on a fraction of a node (see performance notes below). We therefore highly recommend that VASP DFT users check the efficiency of their calculations and consider using the shared partition to get the most out of their allocations. Please see the sample shared job script provided below and the Shared partition documentation.

+
+
+Sample job script: Kestrel - Full node w/ OpenMP +

Note: (--ntasks-per-node) x (--cpus-per-task) = total number of physical cores you want to use per node. Here 4x26=104, all cores/node.

+
#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=26 # set number of MPI ranks per node
+#SBATCH --cpus-per-task=4 # set number of OpenMP threads per MPI rank
+#SBATCH --time=2:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+module load vasp/<version with openMP>
+
+srun vasp_std &> out
+
+
+
+Performance Note +

The use of OpenMP threads is highly recommended on a system with as many cores per node as Kestrel. Testing of benchmark 2 has shown that OpenMP threads can both increase performance (faster time to solution) as well as scaling:

+

VASP-sharednodescaling

+
+
+Sample job script: Kestrel - Full node +
#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=104
+#SBATCH --cpus-per-task=1
+#SBATCH --time=2:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+module load vasp/<version>
+
+srun vasp_std &> out
+
+
+
+Sample job script: Kestrel - Shared (partial) node +

As described in detail in the Shared partition documentation, when you run on part of a node, you will be charged for the greater of either the fraction of cores (104 total) or of memory (about 240G total or 2.3G/core) requested. The script below shows how to request 1/4 of a node, but you can freely set --tasks and --mem-per-cpu as you see fit.

+
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --partition=shared
+#SBATCH --tasks=26 #How many cpus you want
+#SBATCH --mem-per-cpu=2G #Default is 1 GB/core but this is likely too little for electronic structure calculations
+#SBATCH --time=2:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+module load vasp/<version>
+
+srun vasp_std &> out
+
+
+
+Performance Note +

Internal testing at NREL has indicated that standard VASP DFT calculations from sizes 50-200 atoms run most efficiently on a quarter to a half node. The graph below shows the performance of a 192-atom VASP DFT job using partial nodes on the shared partition. Up to 1/2 a node, near perfect scaling is observed, but using the full node gives a speedup of only 1.5 relative to using 1/2 a node. So, the calculation will cost 50% more AUs if run on a single node compared to a half node. For a 48-atom surface Pt calculation, using the full node gives no speedup relative to using 1/2 a node, so the calculation will cost 100% more AUs if run on a single node compared to half a node.

+

VASP-sharednodescaling

+
+

GPU#

+
+

Important

+

Submit GPU jobs from a GPU login node. +$ ssh @kestrel-gpu.hpc.nrel.gov

+
+

There are several modules for GPU builds of VASP 5 and VASP 6:

+
GPU $ module avail vasp
+
+------------ /nopt/nrel/apps/gpu_stack/modules/default/application -------------
+   vasp/6.3.2_openMP    vasp/6.3.2    vasp/6.4.2_openMP    vasp/6.4.2 (D)
+
+
+Sample job script: Kestrel - Full GPU node +
#!/bin/bash
+#SBATCH --account=<your-account-name> 
+#SBATCH --nodes=1
+#SBATCH --gpus=4 
+#SBATCH --ntasks-per-node=4
+#SBATCH --cpus-per-task=1 #The GPU partition is shared :. you must specify cpus needed even when requesting all the GPU resources
+#SBATCH --time=02:00:00
+#SBATCH --job-name=<your-job-name>
+#SBATCH --mem=0 #The GPU partition is shared :. you must specify memory needed even when requesting all the GPU resources
+
+export MPICH_GPU_SUPPORT_ENABLED=1
+
+module load vasp/<version>
+
+srun vasp_std &> out
+
+
+

GPU nodes can be shared so you may request fewer than all 4 GPUs on a node. When doing so, you must also request appropriate CPU cores and memory. To run VASP on N GPUs, we recommend requesting --gpus=N, --ntasks-per-node=N, and --mem=N*85G. See the below sample script for running on 2 GPUs.

+
+Sample job script: Kestrel - Partial GPU node +
#!/bin/bash
+#SBATCH --account=<your-account-name> 
+#SBATCH --nodes=1
+#SBATCH --gpus=2 
+#SBATCH --ntasks-per-node=2
+#SBATCH --mem=170G # request cpu memory 
+#SBATCH --cpus-per-task=1
+#SBATCH --time=02:00:00
+#SBATCH --job-name=<your-job-name>
+
+export MPICH_GPU_SUPPORT_ENABLED=1
+
+module load vasp/<version>
+
+srun vasp_std &> out
+
+
+

Building VASP on Kestrel#

+

Sample makefiles for vasp5 (cpu version) and vasp6 (cpu and gpu versions) on Kestrel can be found in our Kestrel Repo under the vasp folder.

+
+

Important

+

On Kestrel, any modules you have loaded on the login node will be copied to a compute node, and there are many loaded by default for the cray programming environment. Make sure you are using what you intend to. Please see the Kestrel Environments page for more details on programming environments.

+
+

CPU#

+
Compiling your build#
+
+Build recommendations for VASP - CPU +

We recommend building vasp with a full intel toolchain and launching with the cray-mpich-abi at runtime. Additionally, you should build on a compute node so that you have the same architecture as at runtime:

+

salloc -N 1 -t <time> -A <account>
+
+Then, load appropriate modules for your mpi, compilers, and math packages: +
module purge
+module load craype-x86-spr  #specifies sapphire rapids architecture
+module load intel-oneapi-compilers
+module load intel-oneapi-mpi
+module load intel-oneapi-mkl
+

+

Sample makefiles for vasp5 and vasp6 on Kestrel can be found in our Kestrel Repo under the vasp folder.

+
+
Running your build#
+
+

Important

+

We have found that it is optimal to run an Intel toolchain build of VASP using cray-mpich-abi at runtime. Cray-mpich-abi has several dependencies on cray network modules, so the easiest way to load it is to first load PrgEnv-intel and then swap the default cray-mpich module for the cray-mpich-abi module swap cray-mpich cray-mpich-abi. You must then load your intel compilers and math libraries, and unload cray's libsci. A sample script showing all of this is in the dropdown below.

+
+
+Sample job script: How to run your own build - CPU +
#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=104
+#SBATCH --time=2:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+# Load cray-mpich-abi and its dependencies within PrgEnv-intel, intel compilers, mkl, and unload cray's libsci
+module purge
+module load PrgEnv-intel
+module load craype-x86-spr
+module swap cray-mpich cray-mpich-abi
+module unload cray-libsci
+module load intel-oneapi-compilers
+module load intel-oneapi-mkl
+
+export VASP_PATH=/PATH/TO/YOUR/vasp_exe
+
+srun ${VASP_PATH}/vasp_std &> out
+
+
+

GPU#

+
+

Important

+

Make sure to build GPU software on a GPU login node or GPU compute node.

+
+
Compiling your build#
+
+Build recommendations for VASP - GPU +
# Load appropriate modules for your build. For our example these are:
+ml gcc-stdalone/13.1.0
+ml PrgEnv-nvhpc/8.5.0
+ml nvhpc/23.9   #do not use the default nvhpc/24.1
+ml cray-libsci/23.05.1.4
+
+make DEPS=1 -j8 all
+
+
+
Running your build#
+
+Sample job script: How to run your own build - GPU +

See sample jobs scripts above for SBATCH and export directives to request full or shared gpu nodes.

+
# Load modules appropriate for your build. For ours these are:
+ml gcc-stdalone/13.1.0
+ml PrgEnv-nvhpc/8.5.0
+ml nvhpc/23.9   #do not use the default nvhpc/24.1
+ml cray-libsci/23.05.1.4
+
+# Export path to your buid
+export VASP_PATH=/PATH/TO/YOUR/BUILD/bin
+
+srun ${VASP_PATH}/vasp_std &> out
+
+
+

VASP on Swift#

+

CPU#

+
+Sample job script: Swift - VASP 6 CPU (Intel MPI) +
#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=64
+#SBATCH --nodes=1
+
+#Set --exclusive if you would like to prevent any other jobs from running on the same nodes (including your own)
+#You will be charged for the full node regardless of the fraction of CPUs/node used
+#SBATCH --exclusive
+
+module purge
+
+#Load Intel MPI VASP build and necessary modules
+ml vaspintel 
+ml slurm/21-08-1-1-o2xw5ti 
+ml gcc/9.4.0-v7mri5d 
+ml intel-oneapi-compilers/2021.3.0-piz2usr 
+ml intel-oneapi-mpi/2021.3.0-hcp2lkf 
+ml intel-oneapi-mkl/2021.3.0-giz47h4
+
+srun -n 64 vasp_std &> out
+
+
+
+Sample job script: Swift - VASP 6 CPU (Open MPI) +
#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=64
+#SBATCH --nodes=1
+
+#Set --exclusive if you would like to prevent any other jobs from running on the same nodes (including your own)
+#You will be charged for the full node regardless of the fraction of CPUs/node used
+#SBATCH --exclusive
+
+module purge
+
+#Load OpenMPI VASP build and necessary modules
+ml vasp 
+ml slurm/21-08-1-1-o2xw5ti 
+ml openmpi/4.1.1-6vr2flz
+
+srun -n 64 vasp_std &> out
+
+
+
+Sample job script: Swift - run multiple jobs on the same node(s) +

The following script launches two instances of srun vasp_std on the same node using an array job. Each job will be constricted to 32 cores on the node. +

#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=32
+#SBATCH --nodes=1
+
+#Set --exclusive=user if you would like to prevent anyone else from running on the same nodes as you
+#You will be charged for the full node regardless of the fraction of CPUs/node used
+#SBATCH --exclusive=user
+
+#Set how many jobs you would like to run at the same time as an array job
+#In this example, an array of 2 jobs will be run at the same time. This script will be run once for each job.
+#SBATCH --array=1-2
+
+#The SLURM_ARRAY_TASK_ID variable can be used to modify the parameters of the distinct jobs in the array.
+#In the case of array=1-2, the first job will have SLURM_ARRAY_TASK_ID=1, and the second will have SLURM_ARRAY_TASK_ID=2.
+#For example, you could assign different input files to runs 1 and 2 by storing them in directories input_1 and input_2 and using the following code:
+
+mkdir run_${SLURM_ARRAY_TASK_ID}
+cd run_${SLURM_ARRAY_TASK_ID}
+cp ../input_${SLURM_ARRAY_TASK_ID}/POSCAR .
+cp ../input_${SLURM_ARRAY_TASK_ID}/POTCAR .
+cp ../input_${SLURM_ARRAY_TASK_ID}/INCAR .
+cp ../input_${SLURM_ARRAY_TASK_ID}/KPOINTS .
+
+#Now load vasp and run the job...
+
+module purge
+
+#Load Intel MPI VASP build and necessary modules
+ml vaspintel 
+ml slurm/21-08-1-1-o2xw5ti 
+ml gcc/9.4.0-v7mri5d 
+ml intel-oneapi-compilers/2021.3.0-piz2usr 
+ml intel-oneapi-mpi/2021.3.0-hcp2lkf 
+ml intel-oneapi-mkl/2021.3.0-giz47h4
+
+srun -n 32 vasp_std &> out
+

+
+
+Sample job script: Swift - run a single job on a node shared with other users +

The following script launches srun vasp_std on only 32 cores on a single node. The other 32 cores remain open for other users to use. You will only be charged for half of the node hours.

+
#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=32
+#SBATCH --nodes=1
+
+#To make sure that you are only being charged for the CPUs your job is using, set mem=2GB*CPUs/node
+#--mem sets the memory used per node
+#SBATCH --mem=64G
+
+module purge
+
+#Load Intel MPI VASP build and necessary modules
+ml vaspintel 
+ml slurm/21-08-1-1-o2xw5ti 
+ml gcc/9.4.0-v7mri5d 
+ml intel-oneapi-compilers/2021.3.0-piz2usr 
+ml intel-oneapi-mpi/2021.3.0-hcp2lkf 
+ml intel-oneapi-mkl/2021.3.0-giz47h4
+
+srun -n 32 vasp_std &> out
+
+
+
+Performance Notes +

The Intel MPI builds are recommended over the Open MPI builds as they exhibit fastest performance.

+

Use at most 64 cores/node. On Swift, each node has 64 physical cores, and each core is subdivided into two virtual cores in a process that is identical to hyperthreading. Because of this, up to 128 cores can be requested from a single Swift node, but each core will only represent half of a physical core.

+

On Swift, VASP is most efficiently run on partially full nodes.

+

Multiple jobs can run on the same nodes on Swift. If you are only using a fraction of a node, other users' jobs could be assigned to the rest of the node, which might deteriorate the performance. Setting "#SBATCH --exclusive" in your run script prevents other users from using the same node as you, but you will be charged the full 5AUs/node, regardless of the number of CPUs/node you are using.

+
+

GPU#

+
+Sample job script: Swift - VASP 6 GPU (OpenACC) +
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --partition=gpu
+#SBATCH --gres=gpu:4
+#SBATCH --gpu-bind=map_gpu:0,1,2,3
+#SBATCH --exclusive
+#SBATCH --time=1:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+#Load environment and openACC VASP module:
+module purge
+. /nopt/nrel/apps/env.sh
+module use /nopt/nrel/apps/modules
+module load vasp/openacc
+
+# Note: environment will soon become default and the module will be able to be loaded with
+# module purge
+# module load vasp/openacc
+
+#Launch vasp using mpirun
+mpirun -npernode 4 vasp_std &> out
+
+
+

VASP on Vermilion#

+

CPU#

+
+Sample job script: Vermilion - VASP 6 CPU (Intel MPI) +
#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=1
+#SBATCH --time=8:00:00
+#SBATCH --error=std.err
+#SBATCH --output=std.out
+#SBATCH --partition=lg
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+module purge
+ml vasp/6.3.1
+
+source /nopt/nrel/apps/220525b/myenv.2110041605
+ml intel-oneapi-compilers/2022.1.0-k4dysra
+ml intel-oneapi-mkl/2022.1.0-akthm3n
+ml intel-oneapi-mpi/2021.6.0-ghyk7n2
+
+# some extra lines that have been shown to improve VASP reliability on Vermilion
+ulimit -s unlimited
+export UCX_TLS=tcp,self
+export OMP_NUM_THREADS=1
+ml ucx
+
+srun --mpi=pmi2 -n 60 vasp_std
+
+# If the multi-node calculations are breaking, replace the srun line with this line
+# I_MPI_OFI_PROVIDER=tcp mpirun -iface ens7 -np 60 vasp_std
+
+
+
+Sample job script: Vermilion - VASP 6 CPU (Open MPI) +
#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=1
+#SBATCH --time=8:00:00
+#SBATCH --error=std.err
+#SBATCH --output=std.out
+#SBATCH --partition=lg
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+module purge
+ml gcc
+ml vasp/6.1.1-openmpi
+
+# some extra lines that have been shown to improve VASP reliability on Vermilion
+ulimit -s unlimited
+export UCX_TLS=tcp,self
+export OMP_NUM_THREADS=1
+ml ucx
+
+# lines to set "ens7" as the interconnect network
+module use /nopt/nrel/apps/220525b/level01/modules/lmod/linux-rocky8-x86_64/gcc/12.1.0
+module load openmpi
+OMPI_MCA_param="btl_tcp_if_include ens7"
+
+srun --mpi=pmi2 -n 60 vasp_std
+
+
+
+Sample job script: Vermilion - VASP 5 CPU (Intel MPI) +
#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=1
+#SBATCH --time=8:00:00
+##SBATCH --error=std.err
+##SBATCH --output=std.out
+#SBATCH --partition=lg
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+module purge
+
+ml vasp/5.4.4
+
+source /nopt/nrel/apps/220525b/myenv.2110041605
+ml intel-oneapi-compilers/2022.1.0-k4dysra
+ml intel-oneapi-mkl/2022.1.0-akthm3n
+ml intel-oneapi-mpi/2021.6.0-ghyk7n2
+
+# some extra lines that have been shown to improve VASP reliability on Vermilion
+ulimit -s unlimited
+export UCX_TLS=tcp,self
+export OMP_NUM_THREADS=1
+ml ucx
+
+srun --mpi=pmi2 -n 60 vasp_std
+
+# If the multi-node calculations are breaking, replace the srun line with this line
+# I_MPI_OFI_PROVIDER=tcp mpirun -iface ens7 -np 60 vasp_std
+
+
+
+Performance Notes +

On Vermilion, VASP runs more performantly on a single node. Many issues have been reported for running VASP on multiple nodes, especially when requesting all available cores on each node. In order for MPI to work reliably on Vermilion, it is necessary to specify the interconnect network that Vermilion should use to communicate between nodes. If many cores are needed for your VASP calculation, it is recommended to run VASP on a singe node in the lg partition (60 cores/node), which provides the largest numbers of cores per node and use the following settings that have been shown to work well for multi-node jobs on 2 nodes. The Open MPI multi-node jobs are more reliable on Vermilion, but Intel MPI VASP jobs show better runtime performance as usual.

+

If your multi-node Intel MPI VASP job is crashing on Vermilion, try replacing your srun line with the following mpirun run line. -iface ens7 sets ens7 as the interconnect. +

I_MPI_OFI_PROVIDER=tcp mpirun -iface ens7 -np 16 vasp_std
+

+

If your multi-node Open MPI VASP job is crashing on Vermilion, replace a call to load an openmpi module with the following lines. The OMPI_MCA_param variable sets ens7 as the interconnect.

+
module use /nopt/nrel/apps/220525b/level01/modules/lmod/linux-rocky8-x86_64/gcc/12.1.0
+module load openmpi
+OMPI_MCA_param="btl_tcp_if_include ens7"
+
+
+

GPU#

+
+Sample job script: Vermilion - VASP 6 CPU (OpenACC) +
#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=2
+#SBATCH --time=1:00:00
+##SBATCH --error=std.err
+##SBATCH --output=std.out
+#SBATCH --partition=gpu
+#SBATCH --gpu-bind=map_gpu:0,1,0,1
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+# Load the OpenACC build of VASP
+ml vasp/6.3.1-nvhpc_acc
+
+# Load some additional modules
+module use  /nopt/nrel/apps/220421a/modules/lmod/linux-rocky8-x86_64/gcc/11.3.0/
+ml nvhpc
+ml fftw
+
+mpirun -npernode 1 vasp_std > vasp.$SLURM_JOB_ID
+
+
+
+Performance Notes +

The OpenACC build shows significant performance improvement compared to the Cuda build, but is more susceptible to running out of memory. The OpenACC GPU-port of VASP was released with VASP 6.2.0, and the Cuda GPU-port of VASP was dropped in VASP 6.3.0.

+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/wrf/index.html b/Documentation/Applications/wrf/index.html new file mode 100644 index 000000000..607bfdfdb --- /dev/null +++ b/Documentation/Applications/wrf/index.html @@ -0,0 +1,5092 @@ + + + + + + + + + + + + + + + + + + + + + + + WRF - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

How to Use the WRF Application Software#

+

Documentation: Weather Research Framework (WRF) Model

+

The WRF model is a state of the art mesoscale numerical weather prediction system designed for both atmospheric research and operational forecasting applications.

+

Getting Started#

+

This section provides the minimum amount of information necessary to +successfully run a WRF job on the NREL Kestrel cluster. First, we show +how to use WRF given that we may have different versions of WRF +in different toolchains already built and available as modules.

+
% module avail wrf
+     wrf/4.2.2-cray (D)    
+     wrf/4.2.2-intel
+
+

The module avail wrf command shows which WRF module(s) are available +for different versions of WRF built with different toolchains, at a given time. At this time, the +version 4.2.2 is built with the Cray and Intel toolchains as currently available. Users are +then free to choose any of the module versions available for +use. Currently there are no modules to run WRF on GPUs, but there is +current effort underway to make that available on future systems.

+

Next, we look at how to use the WRF module. Below is an example job script:

+

Example Job Script#

+
+Kestrel CPU Sample Submission Script +
#!/bin/bash
+
+# This job requests 102 tasks per node. This may need to be adjusted based on system hardware. 
+
+#SBATCH --time=12:00:00
+#SBATCH --nodes=4
+#SBATCH --ntasks-per-node=96
+#SBATCH --partition=<partition-name>
+#SBATCH --exclusive=user
+#SBATCH --account=<account-name>
+#SBATCH --export=ALL
+#SBATCH --job-name
+#SBATCH --output=out_%j
+
+module load 4.2.2-cray # or module load 4.2.2-intel
+
+# Note that builds with different toolchains may require different modules and environments to be loaded
+
+export OMP_NUM_THREADS=1
+
+srun wrf.exe
+
+
+

To submit the above WRF jobscript named submit_wrf.sh, do sbatch submit_wrf.sh

+

Supported Versions#

+ + + + + + + + + + + +
Kestrel
4.2.2
+

Advanced#

+

Building Instructions From Source#

+

Any WRF version can be downloaded here. Pursuant to building WRF, appropriate versions of netcdf, hdf5 and pnetcdf (if parallel netcdf is required) must also be built since they are dependent for building WRF. These dependent supporting softwares may already be offered as modules. If not, then users need to build them first, instructions for which are not provided here.

+
+Building on Kestrel with Cray Toolchain +

Copy the configure.wrf file for the Cray toolchain, which can be found at /nopt/nrel/apps/software/wrf/cray/WRF-4.2.2/configure.wrf on Kestrel, into your WRF build dir WRF-4.x.y, and do % cd WRF-4.x.y.

+
+
+Building on Kestrel with Intel Toolchain +

Copy the configure.wrf file for the Intel toolchain, which can be found at /nopt/nrel/apps/software/wrf/intel/WRF-4.2.2/configure.wrf on Kestrel, into your WRF build dir WRF-4.x.y, and do % cd WRF-4.x.y.

+
+

Alternatively, the configure.wrf can also be generated by setting the below modules and environments, and executing % ./configure and choosing the appropriate architecture option.

+

After loading the modules for the appropriate toolchain, supporting netcdf, hdf5, pnetcdf softwares, and setting the netcdf environments, compile the WRF software:

+

For Cray toolchain: +

% export NETCDF=/opt/cray/pe/netcdf/default/CRAYCLANG/14.0 
+% ./compile em_real
+

+

For Intel toolchain: +

% export NETCDF=/nopt/nrel/apps/libraries/08-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.10.0/netcdf 
+% ./compile em_real
+

+

<! ## Troubleshooting

+

Include known problems and workarounds here, if applicable !>

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Applications/xpressmp/index.html b/Documentation/Applications/xpressmp/index.html new file mode 100644 index 000000000..4ee375eb4 --- /dev/null +++ b/Documentation/Applications/xpressmp/index.html @@ -0,0 +1,4990 @@ + + + + + + + + + + + + + + + + + + + + + + + Xpress - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Xpress Solver#

+

FICO Xpress Optimizer provides optimization algorithms and technologies to solve linear, mixed integer and non-linear problems

+

For documentation, forums, and FAQs, see the FICO +website.

+

The Xpress solver includes algorithms that can solve

+
    +
  • Linear Programs
  • +
  • Mixed Integer Programs
  • +
  • Quadratic Programs
  • +
  • Quadratically Constrained Quadratic Programs
  • +
  • Second Order Cone Problems
  • +
+

Xpress solver cannot be used to solve nonlinear programs.

+

Available Modules#

+ + + + + + + + + + + + + + + + + +
Kestrel (CPU)
xpressmp/9.0.2
xpressmp/9.2.2
xpressmp/9.2.5
+
+

Info

+

Xpress is available as a module on Kestrel. Additionally, NREL has a site-wide license for Xpress to run locally on an NREL-issued computer. Please see instructions here.

+
+

Running Xpress Solver on Kestrel#

+
+

Important

+

While Xpress Solver is available as a module on Kestrel for use by all NREL-users, you MUST be a part of the xpressmp group on Kestrel. If you are new or have not used Xpress in a while, you can:

+
    +
  1. Check whether you are a part of this group by running the groups command from your terminal, or
  2. +
  3. Load the xpressmp module and run an example
  4. +
+

If you are not a part of the xpressmp linux group and/or are unable to run an Xpress instance, please submit a ticket to HPC-Help@nrel.gov requesting access to Xpress on HPC systems and provide a business justification that describes how you intend to use Xpress in your workflow.

+
+

Xpress solvers can be used by simply loading the module

+
module load xpressmp/9.2.5
+
+

Once the module is loaded, Xpress Solver can be used directly using the command line +by running the optimizer command.

+
$ optimizer
+FICO Xpress Solver 64bit v9.2.5 Nov  9 2023
+(c) Copyright Fair Isaac Corporation 1983-2023. All rights reserved
+ Optimizer v42.01.04    [/nopt/nrel/apps/software/xpressmp/9.2.5/lib/libxprs.so.42.01.04]
+[xpress kpanda] 
+
+

Alternatively, Xpress can now be used directly in Python or Julia by loading the necessary modules and programming environments.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Build_Tools/cmake/index.html b/Documentation/Development/Build_Tools/cmake/index.html new file mode 100644 index 000000000..2d90de881 --- /dev/null +++ b/Documentation/Development/Build_Tools/cmake/index.html @@ -0,0 +1,4934 @@ + + + + + + + + + + + + + + + + + + + + + + + CMake - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

CMake#

+

Documentation: https://cmake.org/documentation/

+

CMake is a cross-platform build tool that is used to manage software compilation and testing. From the CMake web site:

+
+

CMake is an open-source, cross-platform family of tools designed to build, test and package software. CMake is used to control the software compilation process using simple platform and compiler independent configuration files, and generate native makefiles and workspaces that can be used in the compiler environment of your choice.

+
+

Getting Started#

+

On the NREL HPC systems, CMake is available through:

+
module load cmake
+
+

New users are encouraged to refer to the documentation linked above, in particular the CMake tutorial. To build software that includes a CMakeLists.txt file, the steps often follow a pattern similar to:

+
mkdir build
+cd build
+# Reference the path to the CMakeLists.txt file:
+CC=<c_compiler> CXX=<c++_compiler> cmake ..
+make
+
+

Here the CC and CXX environment variables are used to explicitly specify the C and C++ compiler that CMake should use. If not specified, CMake will determine a default compiler to use.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Build_Tools/git/index.html b/Documentation/Development/Build_Tools/git/index.html new file mode 100644 index 000000000..fdfcd416f --- /dev/null +++ b/Documentation/Development/Build_Tools/git/index.html @@ -0,0 +1,5171 @@ + + + + + + + + + + + + + + + + + + + + + + + Git - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Using Git Revision Control#

+

Learn how to set up and use the Git software tool for development on the HPC systems

+

Git is used locally to track incremental development and modifications to a collection of files. GitHub is a git-repository hosting web-service, which serves as a synchronized, common access point for the file collections. GitHub also has social aspects, like tracking who changed what and why. There are other git hosting services like GitLab which are similar to GitHub but offer slightly different features.

+

NREL has a Github Enterprise server (github.nrel.gov) for internally-managed repos. Please note that github.nrel.gov is only available internally using the NREL network or VPN. NREL's git server uses SAML/SSO for logging into GitHub Enterprise. To get help accessing the server or creating a repository, please contact NREL ITS.

+

Git Configuration Set Up#

+

The git software tool is already installed on the HPC systems.

+

Git needs to know your user name and an email address at a minimum:

+
$ git config --global user.name "Your name"
+$ git config --global user.email "your.name@nrel.gov"
+
+

Github does not accept account passwords for authenticated Git operations. Instead, token-based authentication (PAT or SSH key) is required.

+

Set Up SSH Authorization#

+

Users already have SSH keys created on the HPC systems. To set up Github SSH authorization, you can add the existing SSH (secure shell) key(s) to your Github profile. You will also need to change any remote repo URL to use SSH instead of HTTPS.

+
+Set up SSH Key +
    +
  1. On the HPC system, copy the content of ~/.ssh/id_rsa.pub.
  2. +
  3. On Github, click on: your git profile > Settings > SSH and GPG keys > New SSH key
  4. +
  5. Paste the content of ~/.ssh/id_rsa.pub into the "Key" window
  6. +
  7. In your local git repo directory, type: +
    git remote set-url origin <git@github.nrel.gov:username/my-projectname.git>.
    +
    +Your URL can be retrieved in the Github UI by going to the remote repo, then "Code" > "SSH".
  8. +
+
+
+

Warning

+

Please do not alter or delete the key pair that exists on the HPC systems in ~/.ssh/. You can copy the public key to Github.

+
+

Git Vocabulary#

+
+Repository/repo +

A git repository is an independent grouping of files to be tracked. A git repo has a "root" which is the directory that it sits in, and tracks further directory nesting from that. A single repo is often thought of as a complete project or application, though it's not uncommon to nest modules of an application as child repositories to isolate the development history of those submodules.

+
+
+Commit +

A commit, or "revision", is an individual change to a file (or set of files). It's like when you save a file, except with Git, every time you save it creates a unique ID (a.k.a. the "SHA" or "hash") that allows you to keep record of what changes were made when and by who. Commits usually contain a commit message which is a brief description of what changes were made.

+
+
+Fork +

A fork is a personal copy of another user's repository that lives on your account. Forks allow you to freely make changes to a project without affecting the original. Forks remain attached to the original, allowing you to submit a pull request to the original's author to update with your changes. You can also keep your fork up to date by pulling in updates from the original.

+
+
+Pull +

Pull refers to when you are fetching in changes and merging them. For instance, if someone has edited the remote file you're both working on, you'll want to pull in those changes to your local copy so that it's up to date.

+
+
+Pull Request +

Pull requests are proposed changes to a repository submitted by a user and accepted or rejected by a repository's collaborators. Like issues, pull requests each have their own discussion forum.

+
+
+Push +

Pushing refers to sending your committed changes to a remote repository, such as a repository hosted on GitHub. For instance, if you change something locally, you'd want to then push those changes so that others may access them.

+
+
+Branch +

A branch is a new/separate version of the repository. Use branches when you want to work on a new feature, but don't want to mess-up the main branch while testing your ideas.

+
+

Tool Use#

+
+Clone an existing repo +

For example, you could create a local working copy of the "test_repo" repo (puts it in a folder in your current directory): +

cd /some/project/dir
+git clone <git@github.nrel.gov:username/test_repo.git>
+
+Now, make changes to whatever you need to work on. +Recommendation: commit your changes often, e.g., whenever you have a workable chunk of work completed.

+
+
+See what files you've changed +

git status

+
+
+Push your changes to the repo +
git add <filename(s)-you-changed>
+git commit -m "A comment about the changes you just made."
+git push
+
+
+
+Get remote changes from the repo +

If you collaborate with others in this repo, you'll want to pull their changes into your copy of the repo. You may want to do this first-thing when you sit down to work on something to minimize the number of merges you'll need to handle: +git pull

+
+
+Create a new local git code repo +
mkdir my.projectname
+cd my.projectname
+git init
+touch README.txt
+git add README.txt
+git commit -m 'first commit'
+# Push the repo to Github
+git remote add origin git@hpc/my.projectname.git
+git push origin main
+
+
+
+Revert a commit +

You can use git revert to remove unwanted changes. +Find the hash of the commit that you need to undo:
+git log
+Once you have the hash:
+git revert <hash of commit to undo>
+The git revert command will undo only the changes associated with the chosen commit, even if it is not the most recent commit. The reverted commit will still be stored in the history of changes, so it can still be accessed or reviewed in the future.

+
+
+Make a branch +

Create a local branch called "experimental" based on the current master branch: +

git checkout master #Switch to the master branch
+git branch experimental
+

+

Use Your Branch +(start working on that experimental branch....): +

git checkout experimental
+# If this branch exists on the remote repo, pull in new changes:
+git pull origin experimental
+# work, work, work, commit....:
+

+

Send local branch to the repo:
+git push origin experimental

+

Get the remote repo and its branches:
+git fetch origin

+

Merge the branch into the master branch:
+

git checkout master
+git merge experimental
+
+If there are conflicts, git adds >>>> and <<<<< markers in files to mark where you need to fix/merge your code.
+Examine your code with git diff:
+git diff
+Make any updates needed, then git add and git commit your changes.

+
+
+Delete a branch +

Once you've merged a branch and you are done with it, you can delete it:
+

git branch --delete <branchName> # deletes branchName from your local repo
+git push origin --delete <branchName> # deletes the remote branch if you pushed it to the remote server
+

+
+
+Git diff tricks +

You can use git log to see when the commits happened, and then git diff has some options that can help identify changes.
+What changed between two commits (hopefully back to back commits):
+git diff 57357fd9..4f890708 > my.patch
+Just the files that changed:
+git diff --name-only 57357fd9 4f890708

+
+
+Tags +

You can tag a set of code in git, and use a specific tagged version.
+List tags:
+git tags -l
+Set a tag:
+git tag -a "2.2" -m "Tagging current rev at 2.2"
+Push your tag:
+git push --tags
+Use tag tagname:
+git checkout tagname

+
+
+Unmodify a modified file +

To revert your file back to your last commit and discard current changes, use the output from git status to easily un-modify it. +

$ git status
+# Changes not staged for commit:  
+# (use "git add <file>..." to update what will be committed)
+# (use "git restore <file>..." to discard changes in working directory)
+    # modified: modified_code.py  
+
+# Run the command in the above output to discard changes:  
+$ git restore modified_code.py
+
+If you run git status again you will see that the changes have been reverted. Just be sure that you want to revert the file before doing so, because all current changes will not be recoverable.

+
+
+Point your repo to a different remote server +

For example, you may need to do this if you were working on code from a repo that was checked-out from Github.com, and you want to check that code into a repository on NREL's github server. Once you've requested a new NREL git repo from ITS and it's configured, you can:
+

git remote set-url origin git@github.nrel.gov:hpc/my.<newprojectname>.git
+
+See git help remote for more details or you can just edit .git/config and change the URLs there. +This shouldn't cause any lost repo history, but if you want to be sure, you can make a copy of your repo until the url change is confirmed.

+
+
+Send someone a copy of your current code (not the whole repo) +

You can export a copy of your code to your $HOME directory using the following command:
+git archive master --prefix=my.projectname/ --output=~/my.projectname.tgz

+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Build_Tools/spack/index.html b/Documentation/Development/Build_Tools/spack/index.html new file mode 100644 index 000000000..afba0d233 --- /dev/null +++ b/Documentation/Development/Build_Tools/spack/index.html @@ -0,0 +1,5684 @@ + + + + + + + + + + + + + + + + + + + + + + + Spack - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Spack

+ +

Introduction#

+

Spack is an HPC-centric package manager for acquiring, building, and managing HPC applications as well as all their dependencies, down to the compilers themselves. Like frameworks such as Anaconda, it is associated with a repository of both source-code and binary packages. Builds are fully configurable through a DSL at the command line as well as in YAML files. Maintaining many build-time permutations of packages is simple through an automatic and user-transparent hashing mechanism. The Spack system also automatically creates (customizable) environment modulefiles for each built package.

+

Installation#

+

Multiple installations of Spack can easily be kept, and each is separate from the others by virtue of the environment variable SPACK_ROOT. +All package, build, and modulefile content is kept inside the SPACK_ROOT path, so working with different package collections is as simple as setting SPACK_ROOT to the appropriate location. +The only exception to this orthogonality are YAML files in $HOME/.spack/<platform>. +Installing a Spack instance is as easy as

+

git clone https://github.com/spack/spack.git

+

Once the initial Spack instance is set up, it is easy to create new ones from it through

+

spack clone <new_path>

+

SPACK_ROOT will need to point to <new_path> in order to be consistent.

+

Spack environment setup can be done by sourcing $SPACK_ROOT/share/spack/setup-env.sh, or by simply adding $SPACK_ROOT/bin to your PATH.

+

source $SPACK_ROOT/share/spack/setup-env.sh +or +export PATH=$SPACK_ROOT/bin:$PATH

+

Setting Up Compilers#

+

Spack is able to find certain compilers on its own, and will add them to your environment as it does. +In order to obtain the list of available compilers on Eagle the user can run module avail, the user can then load the compiler of interest using module use <compiler>. +To see which compilers your Spack collections know about, type

+

spack compilers

+

To add an existing compiler installation to your collection, point Spack to its location through

+

spack add compiler <path to Spack-installed compiler directory with hash in name>

+

The command will add to $HOME/.spack/linux/compilers.yaml. +To configure more generally, move changes to one of the lower-precedence compilers.yaml files (paths described below in Configuration section). +Spack has enough facility with standard compilers (e.g., GCC, Intel, PGI, Clang) that this should be all that’s required to use the added compiler successfully.

+

Available Packages in Repo#

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Command
Description
spack listall available packages by name. Dumps repo content, so if use local repo, this should dump local package load.
spack list <pattern>all available packages that have <pattern> somewhere in their name. <pattern> is simple, not regex.
spack info <package_name>available versions classified as safe, preferred, or variants, as well as dependencies. Variants are important for selecting certain build features, e.g., with/without Infiniband support.
spack versions <package_name>
see which versions are available
+

Installed packages#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Command
Description
spack findlist all locally installed packages
spack find --deps <package>list dependencies of <package>
spack find --explicitlist packages that were explicitly requested via spack install
spack find --implicitlist packages that were installed as a dependency to an explicitly installed package
spack find --longinclude partial hash in package listing. Useful to see distinct builds
spack find --pathsshow installation paths
+

Finding how an installed package was built does not seem as straightforward as it should be. +Probably the best way is to examine <install_path>/.spack/build.env, where <install_path> is the Spack-created directory with the hash for the package being queried. +The environment variable SPACK_SHORT_SPEC in build.env contains the Spack command that can be used to recreate the package (including any implicitly defined variables, e.g., arch). +The 7-character short hash is also included, and should be excluded from any spack install command.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Symbols
Description
@package versions. Can use range operator “:”, e.g., X@1.2:1.4 . Range is inclusive and open-ended, e.g., “X@1.4:” matches any version of package X 1.4 or higher.
%compiler spec. Can include versioning, e.g., X%gcc@4.8.5
+,-,~build options. +opt, -opt, “~” is equivalent to “-“
name=valuebuild options for non-Boolean flags. Special names are cflags, cxxflags, fflags, cppflags, ldflags, and ldlibs
target=valuefor defined CPU architectures, e.g., target=haswell
os=valuefor defined operating systems
^dependency specification, using above specs as appropriate
^/<hash>specify dependency where <hash> is of sufficient length to resolve uniquely
+

External Packages#

+

Sometimes dependencies are expected to be resolved through a package that is installed as part of the host system, or otherwise outside of the Spack database. +One example is Slurm integration into MPI builds. +If you were to try to add a dependency on one of the listed Slurms in the Spack database, you might see, e.g.,

+
[$user@el2 ~]$ spack spec openmpi@3.1.3%gcc@7.3.0 ^slurm@19-05-3-2
+Input spec
+--------------------------------
+openmpi@3.1.3%gcc@7.3.0
+    ^slurm@19-05-3-2
+
+Concretized
+--------------------------------
+==> Error: The spec 'slurm' is configured as not buildable, and no matching external installs were found
+
+

Given that something like Slurm is integrated deeply into the runtime infrastructure of our local environment, we really want to point to the local installation. +The way to do that is with a packages.yaml file, which can reside in the standard Spack locations (see Configuration below). +See the Spack docs on external packages for more detail. +In the above example at time of writing, we would like to build OpenMPI against our installed Slurm 19.05.2. +So, you can create file ~/.spack/linux/packages.yaml with the contents

+
packages:
+  slurm:
+    paths:
+      slurm@18-08-0-3: /nopt/slurm/18.08.3
+      slurm@19-05-0-2: /nopt/slurm/19.05.2
+
+

that will enable builds against both installed Slurm versions. +Then you should see

+
[$user@el2 ~]$ spack spec openmpi@3.1.3%gcc@7.3.0 ^slurm@19-05-0-2
+Input spec
+--------------------------------
+openmpi@3.1.3%gcc@7.3.0
+    ^slurm@19-05-0-2
+
+Concretized
+--------------------------------
+openmpi@3.1.3%gcc@7.3.0 cflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" cxxflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" fflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" +cuda+cxx_exceptions fabrics=verbs ~java~legacylaunchers~memchecker+pmi schedulers=slurm ~sqlite3~thread_multiple+vt arch=linux-centos7-x86_64
+-
+    ^slurm@19-05-0-2%gcc@7.3.0 cflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" cxxflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" fflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" ~gtk~hdf5~hwloc~mariadb+readline arch=linux-centos7-x86_64
+
+

where the Slurm dependency will be satisfied with the installed Slurm (cflags, cxxflags, and arch are coming from site-wide configuration in /nopt/nrel/apps/base/2018-12-02/spack/etc/spack/compilers.yaml; the variants string is likely coming from the configuration in the Spack database, and should be ignored).

+

Virtual Packages#

+

It is possible to specify some packages for which multiple options are available at a higher level. +For example, mpi is a virtual package specifier that can resolve to mpich, openmpi, Intel MPI, etc. +If a package's dependencies are spec'd in terms of a virtual package, Spack will choose a specific package at build time according to site preferences. +Choices can be constrained by spec, e.g.,

+

spack install X ^mpich@3

+

would satisfy package X’s mpi dependency with some version 3 of MPICH. +You can see available providers of a virtual package with

+

spack providers <vpackage>

+

Extensions#

+

In many cases, frameworks have sub-package installations in standard locations within their own installations. +A familiar example of this is Python and its usual module location in lib(64)/python<version>/site-packages, and pointed to via the environment variable PYTHONPATH.

+

To find available extensions

+

spack extensions <package>

+

Extensions are just packages, but they are not enabled for use out of the box. To do so (e.g., so that you could load the Python module after installing), you can either load the extension package’s environment module, or

+

spack use <extension package>

+

This only lasts for the current session, and is not of general interest. A more persistent option is to activate the extension:

+

spack activate <extension package>

+

This takes care of dependencies as well. The inverse operation is deactivation.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Command
Description
spack deactivate <extension package>deactivates extension alone. Will not deactivate if dependents exist
spack deactivate --force <extension package>deactivates regardless of dependents
spack deactivate --all <extension package>deactivates extension and all dependencies
spack deactivate --all <parent>deactivates all extensions of parent (e.g., <python>)
+

Modules#

+

Spack can auto-create environment modulefiles for the packages that it builds, both in Tcl for “environment modules” per se, and in Lua for Lmod. +Auto-creation includes each dependency and option permutation, which can lead to excessive quantities of modulefiles. +Spack also uses the package hash as part of the modulefile name, which can be somewhat disconcerting to users. +These default behaviors can be treated in the active modules.yaml file, as well as practices used for support. +Tcl modulefiles are created in $SPACK_ROOT/share/spack/modules by default, and the equivalent Lmod location is $SPACK_ROOT/share/spack/lmod. +Only Tcl modules are created by default. +You can modify the active modules.yaml file in the following ways to affect some example behaviors:

+

To turn Lmod module creation on:#

+
modules:
+    enable:
+        - tcl
+        - lmod 
+
+

To change the modulefile naming pattern:#

+
modules:
+    tcl:
+        naming_scheme: ‘{name}/{version}/{compiler.name}-{compiler.version}
+
+

would achieve the Eagle naming scheme.

+

To remove default variable settings in the modulefile, e.g., CPATH:#

+
modules:
+    tcl:
+        all:
+            filter:
+                environment_blacklist: [‘CPATH’]
+
+

Note that this would affect Tcl modulefiles only; if Spack also creates Lmod files, those would still contain default CPATH modification behavior.

+

To prevent certain modulefiles from being built, you can whitelist and blacklist:#

+
modules:
+    tcl:
+        whitelist: [‘gcc’]
+        blacklist: [‘%gcc@4.8.5’]
+
+

This would create modules for all versions of GCC built using the system compiler, but not for the system compiler itself. +There are a great many further behaviors that can be changed, see https://spack.readthedocs.io/en/latest/module_file_support.html#modules for more.

+

For general user support, it is not a bad idea to keep the modules that are publicly visible separate from the collection that Spack auto-generates. This involves some manual copying, but is generally not onerous as all rpaths are included in Spack-built binaries (i.e., you don’t have to worry about satisfying library dependencies for Spack applications with an auto-built module, since library paths are hard-coded into the application binaries). This separation also frees one from accepting Spack’s verbose coding formats within modulefiles, should you decide to maintain certain modulefiles another way.

+

Configuration#

+

Spack uses hierarchical customization files. +Every package is a Python class, and inherits from the top-level class Package. +Depending on the degree of site customization, you may want to fork the Spack repo to create your own customized Spack package. +There are 4 levels of configuration. In order of increasing precedence,

+
    +
  1. Default: $SPACK_ROOT/etc/spack/default
  2. +
  3. System-wide: /etc/spack
  4. +
  5. Site-wide: $SPACK_ROOT/etc/spack
  6. +
  7. User-specific: $HOME/.spack
  8. +
+

Spack configuration uses YAML files, a subset of JSON native to Python. +There are 5 main configuration files.

+
    +
  1. +

    compilers.yaml. Customizations to the Spack-known compilers for all builds

    +

    i. Use full path to compilers

    +

    ii. Additional rpaths beyond the Spack repo

    +

    iii. Additional modules necessary when invoking compilers

    +

    iv. Mixing toolchains

    +

    v. Optimization flags

    +

    vi. Environment modifications

    +
  2. +
  3. +

    config.yaml. Base functionality of Spack itself

    +

    i. install_tree: where to install packages

    +

    ii. build_stage: where to do compiles. For performance, can specify a local SSD or a RAMFS.

    +

    iii. modules_roots: where to install modulefiles

    +
  4. +
  5. +

    modules.yaml. How to create modulefiles

    +

    i. whitelist/blacklist packages from having their own modulefiles created

    +

    ii. adjust hierarchies

    +
  6. +
  7. +

    packages.yaml. Specific optimizations, such as multiple hardware targets.

    +

    i. dependencies, e.g., don’t build OpenSSL (usually want sysadmins to handle updates, etc.)

    +

    ii. mark specific packages as non-buildable, e.g., vendor MPIs

    +

    iii. preferences, e.g., BLAS -> MKL, LAPACK -> MKL

    +
  8. +
  9. +

    repos.yaml

    +

    i. Directory-housed, not remote

    +

    ii. Specify other package locations

    +

    iii. Can then spec build in other configs (e.g., binary, don’t build)

    +

    iv. Precedence in YAML file order, but follows Spack precedence order (user > site > system > default)

    +
  10. +
+

Variants: standard adjustments to package build#

+

spack edit …-- opens Python file for package, can easily write new variants

+

Providers#

+

spack providers -- virtual packages, e.g., blas, mpi, etc. Standards, not implementations. Abstraction of an implementation (blas/mkl, mpi/mpich, etc.)

+

Mirrors#

+
    +
  • mirrors.yaml: where packages are kept
  • +
  • A repo is where build information is kept; a mirror is where code lives
  • +
+
MirrorTopLevel
+    package_a
+        package_a-version1.tar.gz
+        package_a-version2.tar.gz
+    package_b
+        ⋮
+
+

spack mirror to manage mirrors

+

Repos#

+
    +
  • Can take precedence from, e.g., a site repo
  • +
  • Can namespace
  • +
+
packages
+    repo.yaml
+    alpha
+        hotfix-patch-ABC.patch
+        package.py
+        package.pyc
+    beta
+    theta
+
+

Kestrel specific configuration#

+

In order to add HPE installed compilers to Kestrel, we can edit the compilers.yaml file as discussed earlier. +We can add the 3 PrgEnv of choice (Cray, Intel, Gnu) using the following lines:

+
- compiler:
+    spec: intel@=2023.2.0
+    modules:
+    - PrgEnv-intel
+    - intel/2023.2.0
+    paths:
+      cc:  cc
+      cxx: CC
+      f77: ftn
+      fc:  ftn
+    flags: {}
+    operating_system: rhel8
+    target: x86_64
+    environment: {}
+    extra_rpaths: []
+- compiler:
+    spec: cce@=14.0.4
+    modules:
+    - PrgEnv-cray
+    - cce/14.0.4
+    paths:
+      cc: cc
+      cxx: CC
+      f77: ftn
+      fc: ftn
+    flags: {}
+    operating_system: rhel8
+    target: x86_64
+    environment: {}
+    extra_rpaths: []
+- compiler:
+    spec: gcc@=12.1.0
+    modules:
+    - PrgEnv-gnu
+    - gcc/12.1.0
+    paths:
+      cc: cc
+      cxx: CC
+      f77: ftn
+      fc: ftn
+    flags: {}
+    operating_system: rhel8
+    target: x86_64
+    environment: {}
+    extra_rpaths: []
+
+

Similarly, we can add the HPE provided MPIs (Cray-MPICH) by editing the packages.yaml file and adding the following:

+
cray-mpich:
+    externals:
+    - spec: "cray-mpich@8.1.23%intel@2023.2.0"
+      modules:
+      - intel/2023.2.0 
+      - cray-dsmml/0.2.2     
+      - craype-network-ofi  
+      - cray-libsci/22.12.1.1   
+      - craype-x86-spr
+      - craype/2.7.19   
+      - libfabric/1.15.2.0   
+      - cray-mpich/8.1.23
+      - PrgEnv-intel/8.3.3 
+      prefix: /opt/cray/pe/mpich/8.1.23/ofi/intel/19.0
+    - spec: "cray-mpich@8.1.23%gcc@12.1.0"
+      modules:
+      - cray-dsmml/0.2.2     
+      - craype-network-ofi  
+      - cray-libsci/22.12.1.1   
+      - craype-x86-spr
+      - craype/2.7.19   
+      - libfabric/1.15.2.0   
+      - cray-mpich/8.1.23
+      - PrgEnv-gnu/8.3.3
+      - cray-mpich/8.1.23
+      prefix: /opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1 
+    - spec: "cray-mpich@8.1.23%cce@14.0.4"
+      modules:
+      - cray-dsmml/0.2.2     
+      - craype-network-ofi  
+      - cray-libsci/22.12.1.1   
+      - craype-x86-spr
+      - craype/2.7.19   
+      - libfabric/1.15.2.0   
+      - cray-mpich/8.1.23
+      - PrgEnv-cray/8.3.3       
+      - cray-mpich/8.1.23
+      prefix: /opt/cray/pe/mpich/8.1.23/ofi/cray/10.0 
+    buildable: False
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Compilers/aocc/index.html b/Documentation/Development/Compilers/aocc/index.html new file mode 100644 index 000000000..79c092e1a --- /dev/null +++ b/Documentation/Development/Compilers/aocc/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Aocc - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Aocc

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Compilers/cray/index.html b/Documentation/Development/Compilers/cray/index.html new file mode 100644 index 000000000..e15b2d574 --- /dev/null +++ b/Documentation/Development/Compilers/cray/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Cray - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Cray

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Compilers/gnu/index.html b/Documentation/Development/Compilers/gnu/index.html new file mode 100644 index 000000000..67fff6919 --- /dev/null +++ b/Documentation/Development/Compilers/gnu/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Gnu - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Gnu

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Compilers/intel/index.html b/Documentation/Development/Compilers/intel/index.html new file mode 100644 index 000000000..95b050896 --- /dev/null +++ b/Documentation/Development/Compilers/intel/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Intel - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Intel

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Compilers/nvhpc/index.html b/Documentation/Development/Compilers/nvhpc/index.html new file mode 100644 index 000000000..9cd5bc25f --- /dev/null +++ b/Documentation/Development/Compilers/nvhpc/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Nvhpc - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Nvhpc

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Compilers/rosetta_stone/index.html b/Documentation/Development/Compilers/rosetta_stone/index.html new file mode 100644 index 000000000..49c102705 --- /dev/null +++ b/Documentation/Development/Compilers/rosetta_stone/index.html @@ -0,0 +1,7607 @@ + + + + + + + + + + + + + + + + + + + + + + + Compilers - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Compiler Information#

+

This document describes some of the important command line options for various compilers. This includes gcc, gfortran, g++, Intel, Fortran, C, C++, as well as the Cray compilers. The infomation contained herein is not complete but only a small subset of what is available in man pages and full documentation. For example, the man page for gcc is over 21,000 lines long.

+

Topics#

+

The topics covered include:

+
    +
  • Normal invocation
  • +
  • Default optimization level
  • +
  • Compiling for performance
  • +
  • Compiling for debugging and related purposes
  • +
  • Runtime checks
  • +
  • Some File extensions
  • +
  • Language standard settings (Dialect)
  • +
  • Generating listing, if available
  • +
  • Preprocessing
  • +
  • OpenMP support
  • +
  • OpenACC support
  • +
  • UPC support (C++)
  • +
  • Coarray support (Fortran)
  • +
  • Important compiler specific options
  • +
  • Notes
  • +
+

Compilers covered#

+
    +
  • gcc
  • +
  • gfortran
  • +
  • Intel icc (Classic)
      +
    • Moving to Intel's new icx compiler
    • +
    +
  • +
  • Intel ifort (Fortran)
      +
    • Moving to Intel's new ifx compiler
    • +
    +
  • +
  • Cray C (Clang based)
  • +
  • Cray Fortran (ftn)
  • +
+

gcc/g++#

+

This discussion is for version 12.x. Most options are supported for recent versions of the compilers. Also, most command line options for gcc and g++ are supported for each compiler. It is recommended that C++ programs be compiled with g++ and C programs with gcc.

+

Normal invocation#

+
# Compile and link a program with the executable sent to the indicated
+  file
+gcc mycode.c -o myexec
+g++ mycode.C -o myexec
+
+# Compile a file but don't link 
+gcc -c mycode.c 
+g++ -c mycode.C 
+
+

Default optimization#

+

The default optimization level is -O0 on most systems. It is possible that a compiler might be configured to have a different default. One easy way to determine the default is to build a simple application without specifying an optimization level and compare its size to a version compiled with optimization on.

+

Compiling for performance#

+
-O1 Optimize.  Optimizing compilation takes somewhat more time, and a
+    lot more memory for a large function.
+
+-O2 Optimize even more.  GCC performs nearly all supported
+    optimizations that do not involve a space-speed tradeoff.
+
+-O3 Optimize yet more.
+
+-Ofast Disregard strict standards compliance.  -Ofast enables all -O3
+    optimizations.  It also enables optimizations that are not valid
+    for all standard-compliant programs.  
+
+

You can discover which optimizations are at various levels of optimization as shown below. The last command will show all potential optimization flags, over 250.

+
gcc -c -Q -O3 --help=optimizers > /tmp/O3-opts
+gcc -c -Q -O2 --help=optimizers > /tmp/O2-opts
+diff /tmp/O2-opts /tmp/O3-opts | grep enabled
+
+gcc -Q  --help=optimizers 
+
+ +

-Og Optimize debugging experience. Use instead of -O0. Does sopme
+    optimization but maintains debug information
+
+-g  Produce debugging information
+
+gcc -Og -g myprog.c
+
+-p,-pg Generate extra code to write profile information suitable for
+    the analysis program prof (for -p) or gprof
+
+There are many potential options for profiling. See the man page and search for -pg.

+

Some file extensions#

+
file.c
+   C source code that must be preprocessed.
+
+file.i
+   C source code that should not be preprocessed.
+
+file.ii
+   C++ source code that should not be preprocessed.
+
+file.cc
+file.cp
+file.cxx
+file.cpp
+file.CPP
+file.c++
+file.C
+   C++ source code that must be preprocessed.  
+
+

You can specify explicitly the language for file indepenent of the extension using the -x option. For example gcc -x c file.cc will complie the program as C instead of C++.

+

Language standard settings (Dialect)#

+
-ansi This is equivalent to -std=c90. In C++ mode, it is equivalent to -std=c++98.
+
+
+-std=
+
+c90
+   Support all ISO C90 programs 
+
+iso9899:199409
+   ISO C90 as modified in amendment 1.
+
+c99
+   ISO C99.  
+
+c11
+   ISO C11, the 2011 revision of the ISO C standard.  
+
+c18
+   ISO C17, the 2017 revision of the ISO C standard
+   (published in 2018).  
+
+c2x The next version of the ISO C standard, still under
+    development.  The support for this version is
+    experimental and incomplete.
+
+
+c++98 The 1998 ISO C++ standard plus the 2003 technical
+      corrigendum and some additional defect reports. Same as
+      -ansi for C++ code.
+
+c++11
+   The 2011 ISO C++ standard plus amendments.  
+
+c++14
+   The 2014 ISO C++ standard plus amendments.  
+
+c++17
+   The 2017 ISO C++ standard plus amendments. 
+
+

This is a subset of all of the options. There are "gnu" specific versions of many of these which give slight variations. Also, some fo these can be specified in various deprecated flags. The dialects available for the compilers are highly version dependent. Older versions of compiler will not support newer dialects.

+

Preprocessing#

+

Unless explicitly disabled by the file extension as described above files are preprocessed. If you pass the -E option the file will be preprocessed only and will not be compiled. The output is sent to the standard output

+

OpenMP support#

+
-fopenmp 
+          Enable handling of OpenMP directives
+-fopenmp-simd
+          Enable handling of OpenMP's SIMD directives   
+-mgomp          
+          Generate code for use in OpenMP offloading 
+
+

Offlading will not work on all platforms and may require additional options.

+

OpenACC support#

+
 -fopenacc
+           Enable handling of OpenACC directives
+
+ -fopenacc-dim=geom
+           Specify default compute dimensions for parallel offload
+           regions that do not explicitly specify
+ ```
+
+Offlading will not work on all platforms and may require additional options.          
+
+

Important compiler specific options#

+

-Wall + This enables all the warnings about constructions that some + users consider questionable, and that are easy to avoid (or + modify to prevent the warning)

+

-Wextra + This enables some extra warning flags that are not enabled by + -Wall.

+

gfortran#

+

This discussion is for version 12.x. Most options are supported for recent versions of the compilers. Also, most command line options for gcc and g++ are supported for gfortran.

+

Normal invocation#

+
# Compile and link a program with the executable sent to the indicated
+  file
+gfortran mycode.f90  -o myexec
+
+# Compile a file but don't link 
+gfortran -c mycode.f90
+
+

Default optimization#

+

The default optimization level is -O0 on most systems. It is possible that a compiler might be configured to have a different default. One easy way to determine the default is to build a simple application without specifying an optimization level and compare its size to a version compiled with optimization on.

+

Compiling for performance#

+
-O1 Optimize.  Optimizing compilation takes somewhat more time, and a
+    lot more memory for a large function.
+
+-O2 Optimize even more.  GCC performs nearly all supported
+    optimizations that do not involve a space-speed tradeoff.
+
+-O3 Optimize yet more.
+
+-Ofast Disregard strict standards compliance.  -Ofast enables all -O3
+    optimizations.  It also enables optimizations that are not valid
+    for all standard-compliant programs.  
+
+

You can discover which optimizations are at various levels of optimization as shown below. The last command will show all potential optimization flags, over 250.

+
gfortran -c -Q -O3 --help=optimizers > /tmp/O3-opts
+gfortran -c -Q -O2 --help=optimizers > /tmp/O2-opts
+diff /tmp/O2-opts /tmp/O3-opts | grep enabled
+
+gfortran -Q  --help=optimizers 
+
+ +

-Og Optimize debugging experience. Use instead of -O0. Does sopme
+    optimization but maintains debug information
+
+-g  Produce debugging information
+
+-fbacktrace Try to print a back trace on error
+
+-fcheck=<all|array-temps|bits|bounds|do|mem|pointer|recursion>.
+    Perform various runtime checks.  This will slow your program
+    down.
+
+gfortran -Og -g -fbacktrace -fcheck=all myprog.c
+
+-fcheck=<all|array-temps|bits|bounds|do|mem|pointer|recursion>
+    Perform various runtime checks
+
+-p,-pg Generate extra code to write profile information suitable for
+    the analysis program prof (for -p) or gprof
+
+There are many potential options for profiling. See the man page and search for -pg.

+

Some file extensions#

+
.F, .FOR, .FTN, .fpp, .FPP, .F90, .F95, .F03
+    preprocessor is run automatically   
+
+.f, .for, .ftn, .f90, .f95, .f03
+    preprocessor is not run automatically   
+
+

Language standard settings (Dialect)#

+
f95, f2003, f2008, f2018 Specify strict conformance to the various
+    standards
+
+gnu 2018 with gnu extensions
+
+legacy Older codes
+
+-ffree-form / -ffixed-form The source is in Free / Fixed form
+
+

Language standard settings (Save)#

+

The Fortran 90 standard does not indicate the status of variables that leave scope. That is in general, a variable defined in a subroutine may or may not be defined when the subroutine is reentered. There are exceptions for variables in common blocks and those defined in modules.

+

For Fortran 95 and later local allocatable variables are automatically deallocated upon exit from a subroutine.

+

The flags -fautomatic and -fno-automatic change this behavior.

+
-fautomatic Automatically deallocate variables on exit independent of
+    standard setting
+
+-fno-automatic Do not automatically deallocate variables on exit
+    independent of standard setting
+
+-fmax-stack-var-size With this value set to some small value, say 1
+    it appears that variables are not deallocated.  A program
+    compiled with this option would in general be nonconformnet.
+
+

The above applies to allocatable arrays. It is not clean what happens to scalers.

+

Language standard settings (argument mismatch)#

+

Some code contains calls to external procedures with mismatches +between the calls and the procedure definition, or with +mismatches between different calls. Such code is non-conforming, +and will usually be flagged with an error. This options degrades +the error to a warning, which can only be disabled by disabling +all warnings via -w. Only a single occurrence per argument is +flagged by this warning. -fallow-argument-mismatch is implied by +-std=legacy.

+

It is recomended that source code be modified to have interfaces for routines that are called iwth various types of arguments. Fortran 2018 allows for a generic type for such interfaces. For example here is an interface for MPI_Bcast

+
module bcast
+interface
+ subroutine MPI_BCAST(BUF, COUNT, DATATYPE, DEST, COMM, IERROR)
+ type(*),intent(inout) :: BUF
+ !type(*), dimension(..), intent(in) :: BUF
+ integer, intent(in) ::  COUNT, DATATYPE, DEST,  COMM
+ integer, intent(out) :: IERROR
+ end subroutine
+end interface
+end module
+
+

Generating listing#

+

Gfortran does not produce listings.

+

Preprocessing#

+

Automatic preprocessing is determined by the file name extension as discussed above. You can manually turn it on/off via the options

+
-cpp - Preprocess
+-nocpp - Don't preprocess
+-cpp -E - Preprocess and send output to standard out. Don't compile
+
+

OpenMP support#

+
-fopenmp        Enable handling of OpenMP directives
+-fopenmp-simd   Enable handling of OpenMP's SIMD directives   
+-mgomp          Generate code for use in OpenMP offloading 
+
+

Offlading will not work on all platforms and may require additional options.

+

OpenACC support#

+

 -fopenacc Enable handling of OpenACC directives
+
+ -fopenacc-dim=geom Specify default compute dimensions for parallel offload
+     regions that do not explicitly specify
+
+Offlading will not work on all platforms and may require additional options.

+

Important compiler specific options#

+
-fimplicit-none 
+            Produce and error message if there are explicitly typed variables.  
+
+-fdefault-real-8
+            Set the default real type to an 8 byte wide type.  This option also affects the kind of non-double real constants like 1.0. 
+
+-pedantic 
+            Issue warnings for uses of extensions to Fortran.
+
+       -fall-intrinsics
+           This option causes all intrinsic procedures (including the GNU-specific extensions) to be accepted.  This can
+           be useful with -std= to force standard-compliance but get access to the full range of intrinsics available
+           with gfortran.  
+
+

icc/icpc#

+

This discussion is for version 2021.6.0. Icc and icpc will be replaced with clang based alternatives in the near future, icx and icpx. In the Cray environment if PrgEnv-intel is loaded the "cc" maps to icc.

+

Normal invocation#

+
Compile and link a program with the executable sent to the indicated
+  file
+icc mycode.c -o myexec
+icpc mycode.C -o myexec
+
+Compile a file but don't link 
+icc -c mycode.c 
+icpc -c mycode.C 
+
+

NOTE: The icpc command uses the same compiler options as the icc command. Invoking the compiler using icpc compiles .c and .i files as C++. Invoking the compiler using icc compiles .c and .i files as C. Using icpc always links in C++ libraries. Using icc only links in C++ libraries if C++ source is provided on the command line.

+

Default optimization#

+

The default optimization level is -O2.

+

Compiling for performance#

+
-O0  Disables all optimizations.
+
+-O1  Enables optimizations for speed.
+
+-O2 Optimize even more. 
+
+-O  Same ans -O2
+
+-O3 Optimize yet more.
+
+-Ofast -O3, -no-prec-div, and -fp-model
+
+-no-prec-div  enables optimizations that give slightly less precise
+    results than full IEEE division
+
+-fp-model slight decrease in the accuracy of math library functions
+
+-opt_report  Generate and optimization report
+
+

You can learn more about optimizations are at various levels of optimization as shown below.

+
icc -V -help opt
+
+ +
-g[n] 
+    0 Disables generation of symbolic debug information.
+    1 Produces minimal debug information for performing stack traces.
+    2 Produces complete debug information. This is the same as specifying -g with no n.
+    3 Produces extra information that may be useful for some tools.
+
+-Os Generate extra code to write profile information suitable for
+    the analysis program gprof
+
+

Some file extensions#

+
file.c
+   C source code that must be preprocessed.
+
+file.i
+   C source code that should not be preprocessed.
+
+file.ii
+   C++ source code that should not be preprocessed.
+
+file.cc
+file.cp
+file.cxx
+file.cpp
+file.CPP
+file.c++
+file.C
+   C++ source code that must be preprocessed.  
+
+

You can specify explicitly the language for file indepenent of the extension using the -x option. For example icc -x c file.cc will complie the program as C instead of C++. +

#### Language standard settings (Dialect)
+
+-std= enable language support for , as described below

+

c99 + conforms to ISO/IEC 9899:1999 standard for C programs

+

c11 + conforms to ISO/IEC 9899:2011 standard for C programs

+

c17 + conforms to ISO/IEC 9899:2017 standard for C programs

+

c18 + conforms to ISO/IEC 9899:2018 standard for C programs

+

c++11 + enables C++11 support for C++ programs

+

c++14 + enables C++14 support for C++ programs

+

c++17 + enables C++17 support for C++ programs

+

c++20 + enables C++20 support for C++ programs

+

c89 + conforms to ISO/IEC 9899:1990 standard for C programs

+

gnu89 + conforms to ISO C90 plus GNU extensions

+

gnu99 + conforms to ISO C99 plus GNU extensions

+

gnu++98 + conforms to 1998 ISO C++ standard plus GNU extensions

+

gnu++11 + conforms to 2011 ISO C++ standard plus GNU extensions

+

gnu++14 + conforms to 2014 ISO C++ standard plus GNU extensions

+

gnu++17 + conforms to 2017 ISO C++ standard plus GNU extensions

+

gnu++20 c + onforms to 2020 ISO C++ standard plus GNU extensions

+

-strict-ansi + Implement a strict ANSI conformance dialect

+
```
+
+

Preprocessing#

+

Unless explicitly disabled by the file extension as described above files are preprocessed. If you pass the -E option the file will be preprocessed only and will not be compiled. The output is sent to the standard output

+

OpenMP support#

+
-fopenmp
+    Enable handling of OpenMP directives
+-qopenmp-stubs
+    Compile OpenMP programs in sequential mode 
+-parallel          
+    Auto parallelize
+
+

OpenACC support#

+
Not supported
+
+

Offlading will not work on all platforms and may require additional options.

+
    +
  • Important compiler specific options
  • +
+
-Wall
+     This enables all the warnings about constructions that some
+     users consider questionable, and that are easy to avoid (or
+     modify to prevent the warning)
+
+-Wextra 
+     This enables some extra warning flags that are not enabled by
+     -Wall.
+
+-help [category]   print full or category help message
+
+Valid categories include
+       advanced        - Advanced Optimizations
+       codegen         - Code Generation
+       compatibility   - Compatibility
+       component       - Component Control
+       data            - Data
+       deprecated      - Deprecated Options
+       diagnostics     - Compiler Diagnostics
+       float           - Floating Point
+       help            - Help
+       inline          - Inlining
+       ipo             - Interprocedural Optimization (IPO)
+       language        - Language
+       link            - Linking/Linker
+       misc            - Miscellaneous
+       opt             - Optimization
+       output          - Output
+       pgo             - Profile Guided Optimization (PGO)
+       preproc         - Preprocessor
+       reports         - Optimization Reports
+
+       openmp          - OpenMP and Parallel Processing
+
+

Moving to Intel's new compiler icx#

+

The Intel compilers icc and icpc are being retired and being replaced with icx and icpx.
+Other than the name change many people will not notice significant differences.

+

The document https://www.intel.com/content/www/us/en/developer/articles/guide/porting-guide-for-icc-users-to-dpcpp-or-icx.html +has details. Here are some important blurbs from that page.

+

ICX and ICC Classic use different compiler drivers. The Intel® C++ Compiler Classic +compiler drivers are icc, icpc, and icl.  The Intel® oneAPI DPC++/C++ Compiler drivers +are icx and icpx. Use icx to compile and link C programs, and icpx for C++ programs.

+

Unlike the icc driver, icx does not use the file extension to determine whether to +compile as C or C+. Users must invoke icpx to compile C+ files. . In addition to +providing a core C++ Compiler, ICX/ICPX is also used to compile SYCL/DPC++ codes for the +Intel® oneAPI Data Parallel C++ Compiler when we pass an additional flag “-fsycl”. 

+

The major changes in compiler defaults are listed below:

+
    +
  • The Intel® oneAPI DPC++/C++ Compiler drivers are icx and icpx.
  • +
  • Intel® C++ Compiler Classic uses icc, icpc or icl drivers but this compiler will be deprecated in the upcoming release.
  • +
  • DPC++/SYCL users can use the icx/icpx driver along with the -fsycl flag which invokes ICX with SYCL extensions.
  • +
  • Unlike Clang, the ICX Default floating point model was chosen to match ICC behavior and by default it is -fp-model=fast .
  • +
  • MACRO naming is changing. Please be sure to check release notes for future macros to be included in ICX.
  • +
  • No diagnostics numbers are listed for remarks, warnings, or notes. Every diagnostic is emitted with the corresponding compiler option to disable it.
  • +
  • Compiler intrinsics cannot be automatically recognized without processor targeting options, unlike the behavior in Intel® C++ Compiler Classic. If you use intrinsics, read more on the documentation about intrinsic behavior changes.
  • +
+

ifort#

+

This discussion is for version 2021.6.0. Ifort will be replaced with a clang backend based alternative in the near future, ifx. Ifx will have most of the same options as ifort with some clang additions. In the Cray environment if PrgEnv-intel is loaded the "cc" maps to icc.

+

Normal invocation#

+
# Compile and link a program with the executable sent to the indicated
+  file
+ifort mycode.f90 -o myexec
+
+# Compile a file but don't link 
+ifort -c mycode.c 
+
+

Default optimization#

+

The default optimization level is -O2.

+

Compiling for performance#

+
-O1 optimize for maximum speed, but disable some optimizations which
+    increase code size for a small speed benefit
+
+-O2 optimize for maximum speed (DEFAULT)
+
+-O3 optimize for maximum speed and enable more aggressive
+    optimizations that may not improve performance on some programs
+
+-O  same as -O2
+
+-Os  enable speed optimizations, but disable some optimizations which
+    increase code size for small speed benefit
+
+-O0  disable optimizations
+
+-Ofast  enable -O3 -no-prec-div -fp-model fast=2 optimizations
+
+-fno-alias  assume no aliasing in program
+
+-fno-fnalias  assume no aliasing within functions, but assume
+    aliasing across calls
+
+-fast  enable -xHOST -ipo -no-prec-div -O3 -static -fp-model=fast=2
+    optimizations
+
+-opt_report Generate and optimization report
+
+

You can learn more about optimizations are at various levels of optimization as shown below.

+
ifort -V -help opt
+
+ +
 -g[n] 
+       0 Disables generation of symbolic debug information.
+       1 Produces minimal debug information for performing stack traces.
+       2 Produces complete debug information. This is the same as specifying -g with no n.
+       3 Produces extra information that may be useful for some tools.
+
+none    Disables all check options.
+
+arg_temp_created    Determines whether checking occurs for actual
+    arguments copied into temporary storage before routine calls.
+
+assume    Determines whether checking occurs to test that the
+    scalar-Boolean-expression in the ASSUME directive is true, or
+    that the addresses in the ASSUME_ALIGNED directive  are  aligned 
+    on  the specified byte boundaries.
+
+bounds    Determines whether checking occurs for array subscript and
+    character s ubstring expressions.
+
+contiguous    Determines whether the compiler checks pointer
+    contiguity at pointer-assignment time.
+
+format    Determines whether checking occurs for the data type of an
+    item being formatted for output.
+
+output_conversion    Determines whether checking occurs for the fit
+    of data items within a designated format descriptor field.
+
+pointers    Determines whether checking occurs for certain
+    disassociated or uninitialized pointers or unallocated
+    allocatable objects.
+
+shape    Determines whether array conformance checking is performed.
+
+stack    Determines whether checking occurs on the stack frame.
+
+teams    Determines whether the run-time system diagnoses
+    non-standard coarray team usage.
+
+udio_iostat    Determines whether conformance checking occurs when
+    user-defined derived type input/output routines are executed.
+
+uninit     Determines whether checking occurs for uninitialized
+    variables.
+
+    all    Enables all check options.
+
+-Os Generate extra code to write profile information suitable for
+           the analysis program gprof
+
+

Some file extensions#

+
Filenames with the suffix .f90 are interpreted as free-form Fortran
+    95/90 source files.
+
+Filenames with the suffix .f, .for, or .ftn are interpreted as
+    fixed-form Fortran source files.
+
+Filenames with the suffix .fpp, .F, .FOR, .FTN, or .FPP are
+    interpreted as fixed-form Fortran source files, which must be
+    preprocessed by the fpp preprocessor before being compiled.
+
+Filenames with the suffix .F90 are interpreted as free-form Fortran
+    source files, which must be pre-processed by the fpp preprocessor
+    before being compiled.
+
+

You can specify explicitly the language for file indepenent of the extension using the -x option. For example icc -x c file.cc will complie the program as C instead of C++.

+

Language standard settings (Dialect)#

+
-stand 
+
+none    Tells the compiler to issue no messages for nonstandard
+    language elements. This is the same as specifying nostand.
+
+f90    Tells the compiler to issue messages for language elements
+    that are not standard in Fortran 90.
+
+f95    Tells the compiler to issue messages for language elements
+    that are not standard in Fortran 95.
+
+f03    Tells the compiler to issue messages for language elements
+    that are not standard in Fortran 2003.
+
+f08    Tells the compiler to issue messages for language elements
+    that are not standard in Fortran 2008.
+
+f18    Tells the compiler to issue messages for language elements
+    that are not standard in Fortran 2018. This option is set if you
+    specify warn stderrors.
+
+

Generate Listing#

+
-list
+
+

Preprocessing#

+

Unless explicitly enabled by the file extension as described above files are not preprocessed. If you pass the -E option the file will be preprocessed only and will not be compiled. The output is sent to the standard output. The option **-fpp ** will force running the preprocessor.

+

OpenMP support#

+
-fopenmp
+    Enable handling of OpenMP directives
+-qopenmp-stubs
+    Compile OpenMP programs in sequential mode 
+-parallel          
+    Auto parallelize
+
+

OpenACC support#

+
Not supported
+
+

Coarray Fortran#

+
-coarray[=keyword] Enables the coarray feature where keyword
+    Specifies the memory system where the coarrays will be
+    implemented. Possible values are:
+
+shared    Indicates a shared memory system. This is the default.
+
+distributed    Indicates a distributed memory system.
+
+single     Indicates a configuration where the image does not
+    contain self-replication code. This results in an executable with
+    a single running image. This configuration can be useful for
+    debugging purposes, even though there are no inter-image
+    interactions.
+
+
    +
  • Important compiler specific options
  • +
+
-save    Causes variables to be placed in static memory.
+
+
+Default:    This option saves all variables in static allocation
+    except local variables within a recursive routine and variables
+    declared as AUTOMATIC.
+
+-auto-scalar    Scalar variables of intrinsic types INTEGER, REAL,
+    COMPLEX, and LOGICAL are allocated  to the run-time stack unless
+    the routine is recursive of OpenMP For Fortran 95 and later
+    variables are not saved by default and allocatable arrays are
+    deallocated.  This appears to be true ifort even if the standard
+    is set to f90.  However, it is poor practice to rely on this
+    behavior.
+
+
+-Wall.   This enables all the warnings about constructions that some
+    users consider questionable, and that are easy to avoid (or
+    modify to prevent the warning)
+
+-warn declarations    Generate warnings for variables that are not
+    explicitly typed.
+
+-Wextra     This enables some extra warning flags that are not
+    enabled by -Wall.
+-save    Causes variables to be placed in static memory.
+
+
+Default:    This option saves all variables in static allocation
+    except local variables within a recursive routine and variables
+    declared as AUTOMATIC.
+
+-auto-scalar    Scalar variables of intrinsic types INTEGER, REAL,
+    COMPLEX, and LOGICAL are allocated  to the run-time stack unless
+    the routine is recursive of OpenMP For Fortran 95 and later
+    variables are not saved by default and allocatable arrays are
+    deallocated.  This appears to be true ifort even if the standard
+    is set to f90.  However, it is poor practice to rely on this
+    behavior.
+
+
+-Wall.   This enables all the warnings about constructions that some
+    users consider questionable, and that are easy to avoid (or
+    modify to prevent the warning)
+
+-warn declarations    Generate warnings for variables that are not
+    explicitly typed.
+
+-Wextra     This enables some extra warning flags that are not
+    enabled by -Wall.
+
+
+-help [category]    print full or category help message
+
+Valid categories include
+       advanced        - Advanced Optimizations
+       codegen         - Code Generation
+       compatibility   - Compatibility
+       component       - Component Control
+       data            - Data
+       deprecated      - Deprecated Options
+       diagnostics     - Compiler Diagnostics
+       float           - Floating Point
+       help            - Help
+       inline          - Inlining
+       ipo             - Interprocedural Optimization (IPO)
+       language        - Language
+       link            - Linking/Linker
+       misc            - Miscellaneous
+       opt             - Optimization
+       output          - Output
+       pgo             - Profile Guided Optimization (PGO)
+       preproc         - Preprocessor
+       reports         - Optimization Reports
+
+       openmp          - OpenMP and Parallel Processing
+
+

Moving to Intel's new compiler ifx#

+

Intel® Fortran Compiler Classic (ifort) is now deprecated and will be discontinued in late 2024. +Intel recommends that customers transition now to using the LLVM-based Intel® Fortran Compiler (ifx). +Other than the name change some people will not notice significant differences. The new compiler +supports offloading to Intel GPU. Kestrel and Swift do not have Intel GPUs so this is not at NREL.

+

One notable deletion from the new compiler is dropping of auto-parilization. With ifort the +-parallel compiler option auto-parallelization is enabled. That is not true for ifx; there + is no auto-parallelization feature with ifx.

+

For complete details please see: https://www.intel.com/content/www/us/en/developer/articles/guide/porting-guide-for-ifort-to-ifx.html

+

Cray CC#

+

In the Cray environment cc is a generic call for several different compilers. The compile actually called is determined by the modules loaded. Here we discuss Cray C : Version 14.0.4. cc will detect if the program being compiled calls MPI routines. If so, it will call the program as MPI. Cray C : Version 14.0.4 is clang based with Cray enhancements

+

Normal invocation#

+
# Compile and link a program with the executable sent to the indicated
+  file
+cc mycode.c  -o myexec
+
+# Compile a file but don't link 
+cc -c mycode.c 
+
+

Default optimization#

+

The default optimization level is -O0.

+

Compiling for performance#

+

-O0, -O1, -O2, -O3, -Ofast, -Os, -Oz, -Og, -O, -O4 Specify which
+    optimization level to use: 
+
+-O0    Means "no optimization": this
+    level compiles the fastest and generates the most debuggable
+    code.
+
+-O1    Somewhere between -O0 and -O2.
+
+-O2    Moderate level of optimization which enables most
+    optimizations.
+
+-O3     Like -O2, except that it enables optimizations that take
+    longer to perform or that may generate larger code (in an attempt
+    to make the program run faster).
+
+-Ofast     Enables all the optimizations from -O3 along with other
+    aggressive optimizations that may violate strict compliance with
+    language standards.
+
+-Os     Like -O2 with extra optimizations to reduce code size.
+
+-Oz    Like -Os (and thus -O2), but reduces code size further.
+
+-Og    Like -O1. In future versions, this option might disable
+    different optimizations in order to improve debuggability.
+
+-O    Equivalent to -O1.
+
+-O4    and higher Currently equivalent to -O3
+
+For best performance, -Ofast with -flto is recommended where -flot = Generate output files in LLVM formats, suitable for link time optimization. The performance improvement with high levels of optimmization. Here are the run times for a simple finite difference code at various levels of optimization.

+
Option       Run Time (sec)
+-O0            10.30
+-O1             3.19
+-O2             2.99
+-O3             2.04
+-Ofast          1.88
+-Ofast -flto    1.49
+-Os             3.19
+-Oz             3.31
+-Og             3.19
+-O              3.20
+
+ +
-fstandalone-debug 
+      Turn off the stripping of some debug information that might be useful to some debuggers
+
+-feliminate-unused-debug-types
+      By default, Clang does not emit type information for types that are defined but not 
+      used in a program. To retain the debug info for these unused types, the negation 
+      -fno-eliminate-unused-debug-types can be used.
+
+-fexceptions
+      Enable generation of unwind information. This allows exceptions to be thrown through 
+      Clang compiled stack frames.  This is on by default in x86-64.
+
+-ftrapv
+      Generate code to catch integer overflow errors.  Signed integer overflow is undefined 
+      in C. With this flag, extra code is generated to detect this and abort when it happens.
+
+

Some file extensions#

+
file.c
+   C source code that must be preprocessed.
+
+file.i
+   C source code that should not be preprocessed.
+
+file.ii
+   C++ source code that should not be preprocessed.
+
+file.cc
+file.cp
+file.cxx
+file.cpp
+file.CPP
+file.c++
+file.C
+   C++ source code that must be preprocessed.  
+file.upc
+   UPC
+
+

Language standard settings (Dialect)#

+

Standards are determined by the file extension as given above. Some addttional checks can be performed.

+
  -std=<standard>
+
+

Specify the language standard to compile for.

+
Supported values for the C language are:#
+
    +
  • +

    ISO C 1999 with GNU extensions

    +
      +
    • c89
    • +
    • c90
    • +
    +
  • +
  • +

    iso9899:1990

    +
  • +
  • +

    ISO C 2011

    +
      +
    • c11
    • +
    • iso9899:2011
    • +
    +
  • +
  • +

    ISO C 2011 with GNU extensions

    +
      +
    • gnu11
    • +
    +
  • +
  • +

    ISO C 2017

    +
      +
    • iso9899:2017
    • +
    • c17
    • +
    +
  • +
  • +

    ISO C 2017 with GNU extensions

    +
      +
    • gnu17
    • +
    +
  • +
+

The default C language standard is gnu17

+
Supported values for the C++ language are:#
+
    +
  • +

    ISO C++ 1998 with amendments

    +
      +
    • c++98
    • +
    • c++03
    • +
    +
  • +
  • +

    ISO C++ 1998 with amendments and GNU extensions

    +
      +
    • gnu++98
    • +
    • gnu++03
    • +
    +
  • +
  • +

    ISO C++ 2011 with amendments

    +
      +
    • c++11
    • +
    +
  • +
  • +

    ISO C++ 2011 with amendments and GNU extensions

    +
      +
    • gnu++11
    • +
    +
  • +
  • +

    ISO C++ 2014 with amendments

    +
      +
    • c++14
    • +
    +
  • +
  • +

    ISO C++ 2014 with amendments and GNU extensions

    +
      +
    • gnu++14
    • +
    +
  • +
  • +

    ISO C++ 2017 with amendments

    +
      +
    • c++17
    • +
    +
  • +
  • +

    ISO C++ 2017 with amendments and GNU extensions

    +
      +
    • gnu++17
    • +
    +
  • +
  • +

    Working draft for ISO C++ 2020

    +
      +
    • c++2a
    • +
    +
  • +
  • +

    Working draft for ISO C++ 2020 with GNU extensions

    +
      +
    • gnu++2a
    • +
    +
  • +
  • +

    The default OpenCL language standard is cl1.0.

    +
      +
    • OpenCL
    • +
    +
  • +
+
Supported values for the CUDA language are:#
+
    +
  • cuda
  • +
+

Generating listing#

+
-fsave-loopmark    Generate a loopmark listing file (.lst) that shows which optimizations 
+    were applied to which parts of the source code.
+
+-floopmark-style=<style>    Specifies the style of the loopmark listing file.
+
+    Valid values for <style> are:
+        ''grouped''         Places all messages at the end of the listing.
+        ''interspersed''    Places each message after the relevant source code line.
+
+

Preprocessing#

+

Automatic preprocessing is determined by the file name extension as discussed above. You can manually turn it on/off via the options

+
 -E    with output going to standard out
+
+

The compiler predefines the macro cray in addition to all of the usual Clang predefined macros.

+

OpenMP support#

+
-fopenmp    Enables OpenMP and links in OpenMP libraries
+
+

OpenACC support#

+
Not suported    
+
+

Important compiler specific options#

+
   Unified Parallel C (UPC) Options
+-hupc, -hdefault -hupc    Configures the compiler driver to expect
+    UPC source code.  Source files with a .upc extension are
+    automatically treated as UPC code, but this option permits a file
+    with any other extension (typically .c) to be understood as UPC
+    code.  -hdefault cancels this behavior; if both -hupc and
+    -hdefault appear in a command line, whichever appears last takes
+    precedence and applies to all source files in the command line.
+
+-fupc-auto-amo, -fno-upc-auto-amo    Automatically use network
+    atomics for remote updates to reduce latency.  For example, x +=
+    1 can be performed as a remote atomic add.  If an update is
+    recognized as local to the current  thread,  then  no  atomic  is
+    used.  These atomics are intended as a performance optimization
+    only and shall not be relied upon to prevent race conditions. 
+    Enabled at -O1 and above.
+
+-fupc-buffered-async, -fno-upc-buffered-async    Set aside memory in
+    the UPC runtime library for aggregating random remote accesses
+    designated with "#pragma pgas buffered_async".  Disabled by
+    default.
+
+-fupc-pattern, -fno-upc-pattern    Identify simple communication
+    loops and aggregate the remote accesses into a single function
+    call which replaces the loop.  Enabled at -O1 and above.
+
+-fupc-threads=<N>    Set  the number of threads for a static THREADS
+    translation.  This option causes __UPC_STATIC_THREADS__ to be
+    defined instead of __UPC_DYNAMIC_THREADS__ and replaces all uses
+    of the UPC keyword THREADS with the value N.
+
+

Cray ftn#

+

In the Cray environment ftn is a generic call for several different compilers. The compile actually called is determined by the modules loaded. Here we discuss Cray Fortran : Version 14.0.4. Ftn will detect if the program being compiled calls MPI routines. If so, it will call the program as MPI.

+

Normal invocation#

+
# Compile and link a program with the executable sent to the indicated
+  file
+ftn mycode.f90  -o myexec
+
+# Compile a file but don't link 
+ftn -c mycode.f90
+
+

Default optimization#

+

The default optimization level is -O 2.

+

Compiling for performance#

+
-O
+
+0      Specifies  no  automatic  cache  management;  all memory
+    references are allocated to cache.  Both automatic cache blocking
+    and manual cache blocking (by use of the BLOCKABLE directive) are
+    shut off. Characteristics include low compile time.  This option
+    is compatible with all optimization levels.
+
+1      Specifies conservative automatic cache management.
+    Characteristics include moderate compile time.  Symbols are
+    placed in the cache when the possibility of cache reuse exists
+    and the predicted cache footprint of the symbol in isolation is
+    small enough to experience reuse.
+
+2      Specifies  moderately  aggressive automatic cache management. 
+    Characteristics include moderate compile time.  Symbols are
+    placed in the cache when the possibility of cache reuse exists
+    and the pre‐ dicted state of the cache model is such that the
+    symbol will be reused. (Default)
+
+3      Specifies aggressive automatic cache management.
+    Characteristics include potentially high compile time.  Symbols
+    are placed in the cache when the possibility of cache reuse
+    exists and the  allocation of the symbol to the cache is
+    predicted to increase the number of cache hits.
+
+fast    Same as 3.
+
+ +
-G (level)
+
+    0      Full   information is available for debugging, but at the cost
+        of a slower and larger executable.  Breakpoints can be set at
+        each line.  Most optimizations are disabled.
+
+    1      Most  information is available with partial optimization. Some
+        optimizations make tracebacks and limited breakpoints available
+        in the debugger.  Some scalar optimizations and  all  loop  nest
+        re‐ structuring  is  disabled,  but  the source code will be
+        visible and most symbols will be available.
+
+    2      Partial information.  Most optimizations, tracebacks and very
+        limited breakpoints are available in the debugger.  The source
+        code will be visible and some symbols will be  available.
+
+
+-R runchk Specifies any of a group of runtime checks for your
+    program.  To specify more than one type of checking, specify
+    consecutive runchk arguments, as follows: -R bs.
+
+
+    b      Enables checking of array bounds.  Bounds checking is not
+        performed on arrays dimensioned as (1).  Enables -Ooverindex.
+
+    c      Enables conformance checking of array operands in array
+        expressions.
+
+    d      Enables a run time check for the !dir$ collapse directive and
+        checks the validity of the loop_info count information.
+
+    p      Generates run time code to check the association or allocation
+        status of referenced POINTER variables, ALLOCATABLE arrays, or
+        assumed-shape arrays.
+
+    s      Enables checking of character substring bounds.
+
+

Some file extensions#

+

The default is fixed for source files that have .f, .F, .for, or .FOR

+

The default is free for source files that have .f90, .F90, .f95, .F95, .f03, .F03, .f08, .F08, .f18, .F18, .ftn, or .FTN

+

The upper-case file extensions, .F, .FOR, .F90, .F95, .F03, .F08, .F18, or .FTN, will enable source preprocessing by default.

+

Language standard settings (Dialect)#

+

Standards are determined by the file extension as given above. Some addttional checks can be performed.

+
-e enable
+
+      b      If enabled, issue a warning message rather than an error
+        message when the compiler detects a call to a procedure
+        with one or more dummy arguments having the TARGET,
+        VOLATILE or ASYNCHRONOUS attribute and there is not an
+        explicit interface definition.
+
+
+      c      Interface checking: use Cray system modules to check
+        library calls in a compilation.  If you have a procedure
+        with the same name as one in the library, you will get
+        errors, as the compiler does not skip  user- specified
+        procedures when performing checks.
+
+
+      C      Enable/disable some types of standard call site
+        checking.  The current Fortran standard requires that the
+        number and types of arguments must agree between the caller
+        and callee.  These constraints are enforced in cases where
+        the compiler can detect them, however, specifying -dC
+        disables some of this error-checking, which may be
+        necessary in order to get some older Fortran codes to
+        compile.
+
+-f source_form free or fixed
+
+

Language standard settings (Save)#

+
    -e v    Allocate  variables to static storage.  These variables
+            are treated as if they had appeared in a SAVE statement.  Variables
+            that are explicitly or implicitly defined as automatic variables are
+            not allocated to static storage. The following types of variables are
+            not allocated to static storage: automatic variables (explicitly or
+            implicitly stated), variables declared with the AUTOMATIC attribute,
+            variables allocated in  an  ALLOCATE statement, and local
+            variables in explicit recursive procedures.  Variables with the
+            ALLOCATABLE attribute remain allocated upon procedure exit, unless
+            explicitly deallocated, but they are not allocated in static memory. 
+            Variables in explicit recursive procedures consist of those in
+            functions, in subroutines, and in internal procedures within
+            functions and subroutines that have been defined with the RECURSIVE 
+            attribute.  The STACK compiler directive overrides this option.
+
+

Generating listing#

+

-h list=a

+

Preprocessing#

+

Automatic preprocessing is determined by the file name extension as discussed above. You can manually turn it on/off via the options

+

 -E    Preprocess and compile
+ -eZ   Preprocess and compile
+ -eP   Preprocess don' compile
+
+The Cray Fortran preprocessor has limited functionality. In particular it does not remove C style comments which can cause compile errors. You might want to use the gnu preprocessor instead.

+
gfortran -cpp -E file.F90 > file.f90
+ftn file.f80
+
+

OpenMP support#

+
-homp    Enables OpenMP and links in OpenMP libraries when possible
+    using CCE-Classic.
+
+-hnoomp    Disables OpenMP and links in non-OpenMP libraries when
+    using CCE-classic.
+
+THE FOLLOWING APPLIE IF THE BACKEND COMPILER IS NOT CRAY FORTRAN.
+
+-fopenmp   Enables OpenMP and links in OpenMP libraries when possible
+    using CCE, AOCC, and GNU.
+
+-openmp    Enables OpenMP and links in OpenMP libraries when
+    possible.
+
+-noopenmp       Disables OpenMP.
+
+-mp        Enables OpenMP and links in OpenMP libraries when
+    possible using PGI.
+
+-Mnoopenmp  Disables OpenMP and links in non-OpenMP libraries when
+    using PGI.
+
+-qopenmp     Enables OpenMP and links in OpenMP libraries when
+    possible when using Intel.
+
+-qno-openmp  Disables OpenMP and links in non-OpenMP libraries
+    when possible when using Intel.
+
+

OpenACC support#

+
 -h acc         
+
+

Coarray#

+

The -h pgas_runtime option directs the compiler driver to link with the runtime libraries required when linking programs that use UPC or coarrays. In general, a resource manager job launcher such as aprun or + srun must be used to launch the resulting executable.

+

Important compiler specific options#

+
-e I      Treat all variables as if an IMPLICIT NONE statement had been specified. 
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Containers/apptainer/index.html b/Documentation/Development/Containers/apptainer/index.html new file mode 100644 index 000000000..dd79203b2 --- /dev/null +++ b/Documentation/Development/Containers/apptainer/index.html @@ -0,0 +1,5989 @@ + + + + + + + + + + + + + + + + + + + + + + + Apptainer - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Apptainer

+ +
+

Note

+

Singularity has been deprecated in favor of a new container runtime environment called Apptainer, which is its direct decendent. Apptainer will run Singularity containers and it supports Singularity commands by default. Since Singularity is deprecated, it is advised to use Apptainer when building new images. More information about Apptainer can be found at https://apptainer.org.

+
+

How to use Apptainer#

+

On NREL HPC systems, Apptainer is accessed via a module named apptainer (you can check the current default module via ml -d av apptainer). On Kestrel specifically, the directory /nopt/nrel/apps/software/apptainer/1.1.9/examples holds a number of images (*.sif) and an example script (script) that shows how to run containers hosting MPI programs across multiple nodes. The script can also be accessed from our GitHub repository.

+

Before we get to the more complicated example from script, we'll first look at downloading (or pulling) and working with a simple image. The following examples assume you are logged into Kestrel, but the concepts demonstrated are still valid for any host system on which you wish to execute a container.

+

Input commands are preceded by a $.

+
+

Note

+

If you wish to containerize your own application, it may be worth starting with building a local Docker image and transferring it to Kestrel before attempting to directly create your own Apptainer image, since you do not have root access on HPC systems.

+
+

Apptainer runtime examples#

+

Run hello-world Ubuntu image#

+
Allocate a compute node.#
+
$ ssh USERNAME@kestrel.hpc.nrel.gov
+[USERNAME@kl1 ~]$ salloc --exclusive --mem=0 --tasks-per-node=104 --nodes=1 --time=01:00:00 --account=MYACCOUNT --partition=debug
+[USERNAME@x1000c0s0b0n0 ~]$ cat /etc/redhat-release
+Red Hat Enterprise Linux release 8.6 (Ootpa)
+
+
Load the apptainer module#
+

[USERNAME@x1000c0s0b0n0 ~]$ module purge
+[USERNAME@x1000c0s0b0n0 ~]$ ml -d av apptainer
+------------------------------------ /nopt/nrel/apps/modules/default/application ------------------------------------
+   apptainer/1.1.9
+[USERNAME@x1000c0s0b0n0 ~]$ module load apptainer/1.1.9
+
+Note: at the time of writing, apptainer/1.1.9 is the default Apptainer module on Kestrel as determined by running ml -d av apptainer.

+
Retrieve hello-world image. Be sure to use /scratch as images are typically large#
+
[USERNAME@x1000c0s0b0n0 ~]$ cd /scratch/$USER
+[USERNAME@x1000c0s0b0n0 USERNAME]$ mkdir -p apptainer-images
+[USERNAME@x1000c0s0b0n0 USERNAME]$ cd apptainer-images
+[USERNAME@x1000c0s0b0n0 apptainer-images]$ apptainer pull --name hello-world.simg shub://vsoch/hello-world
+Progress |===================================| 100.0%
+
+
Explore image details#
+
[USERNAME@x1000c0s0b0n0 apptainer-images]$ apptainer inspect hello-world.simg # Shows labels
+{
+    "org.label-schema.usage.apptainer.deffile.bootstrap": "docker",
+    "MAINTAINER": "vanessasaur",
+    "org.label-schema.usage.apptainer.deffile": "apptainer",
+    "org.label-schema.schema-version": "1.0",
+    "WHATAMI": "dinosaur",
+    "org.label-schema.usage.apptainer.deffile.from": "ubuntu:14.04",
+    "org.label-schema.build-date": "2017-10-15T12:52:56+00:00",
+    "org.label-schema.usage.apptainer.version": "2.4-feature-squashbuild-secbuild.g780c84d",
+    "org.label-schema.build-size": "333MB"
+}
+[USERNAME@x1000c0s0b0n0 apptainer-images]$ apptainer inspect -r hello-world.simg # Shows the script run
+#!/bin/sh
+
+exec /bin/bash /rawr.sh
+
+
Run image default script#
+
[USERNAME@x1000c0s0b0n0 apptainer-images]$ apptainer run hello-world.simg
+RaawwWWWWWRRRR!! Avocado!
+
+

Run images containing MPI programs on multiple nodes#

+

As mentioned above, there is a script in the apptainer directory that shows how MPI applications built inside an image can be run on multiple nodes. We'll run 5 containers with different versions of MPI. Each container has two MPI programs installed, a glorified Hello World (phostone) and PingPong (ppong). The 5 versions of MPI are:

+
    +
  1. openmpi
  2. +
  3. IntelMPI
  4. +
  5. MPICH - with ch4
  6. +
  7. MPICH - with ch4 with different compile options
  8. +
  9. MPICH - with ch3
  10. +
+

"ch*" can be thought as a "lower level" communications protocol. A MPICH container might be built with either but we have found that ch4 is considerably faster on Kestrel.

+

The script can be found at /nopt/nrel/apps/software/apptainer/1.1.9/examples/script, as well as our GitHub repository.

+

Here is a copy:

+
+Sample job script: Running MPI-enabled Apptainer containers +
#!/bin/bash 
+#SBATCH --job-name="apptainer"
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=2
+#SBATCH --exclusive
+#SBATCH --export=ALL
+#SBATCH --time=02:00:00
+#SBATCH --output=apptainer.log
+#SBATCH --mem=0
+
+export STARTDIR=`pwd`
+export CDIR=/nopt/nrel/apps/software/apptainer/1.1.9/examples
+mkdir $SLURM_JOB_ID
+cd $SLURM_JOB_ID
+
+cat $0 >   script
+printenv > env
+
+touch warnings
+touch output
+
+module load apptainer
+which apptainer >> output
+
+echo "hostname" >> output
+hostname        >> output
+
+echo "from alpine.sif" >> output
+          apptainer exec $CDIR/alpine.sif hostname  >> output
+echo "from alpine.sif with srun" >> output
+srun -n 1 --nodes=1 apptainer exec $CDIR/alpine.sif cat /etc/os-release  >> output
+
+
+export OMP_NUM_THREADS=2
+
+$CDIR/tymer times starting
+
+MPI=pmix
+for v in openmpi intel mpich_ch4 mpich_ch4b  mpich_ch3; do
+  srun  --mpi=$MPI   apptainer  exec   $CDIR/$v.sif  /opt/examples/affinity/tds/phostone -F >  phost.$v  2>>warnings
+  $CDIR/tymer times $v
+  MPI=pmi2
+  unset PMIX_MCA_gds
+done
+
+MPI=pmix
+#skip mpich_ch3 because it is very slow
+for v in openmpi intel mpich_ch4 mpich_ch4b           ; do
+  srun  --mpi=$MPI   apptainer  exec   $CDIR/$v.sif  /opt/examples/affinity/tds/ppong>  ppong.$v  2>>warnings
+  $CDIR/tymer times $v
+  MPI=pmi2
+  unset PMIX_MCA_gds
+done
+
+$CDIR/tymer times finished
+
+mv $STARTDIR/apptainer.log .
+
+
+

We set the variable CDIR which points to the directory from which we will get our containers.

+

We next create a directory for our run and go there. The cat and printenv commands give us a copy of our script and the environment in which we are running. This is useful for debugging.

+

Before we run the MPI containers, we run the command hostname from inside a very simple container alpine.sif. We show containers can be run without/with srun. In the second instance we cat /etc/os-release to show we are running a different OS.

+

Then we get into the MPI containers. This is done in a loop over containers containing the MPI versions: openmpi, intelmpi, mpich_ch4, mpich_ch4b, and mpich_ch3.

+

The application tymer is a simple wall clock timer.

+

The --mpi= option on the srun line instructs slurm how to launch jobs. The normal option is --mpi=pmi2. However, containers using OpenMPI might need to use the option --mpi=pmix as we do here.

+

The first loop just runs a quick "hello world" example. The second loop runs a pingpong test. We skip the mpich_ch3 pingpong test because it runs very slowly.

+

You can see example output from this script in the directory:

+
/nopt/nrel/apps/software/apptainer/1.1.9/examples/output/
+
+

Within /nopt/nrel/apps/software/apptainer/1.1.9/examples, the subdirectory defs contains the recipes for the images in examples. The images apptainer.sif and intel.sif were built in two steps using app_base.def - apptainer.def and mods_intel.def - intel.def. They can also be found in the HPC code examples repository.

+

The script sif2def can be used to generate a .def recipe from a .sif image. It has not been extensively tested, so it may not work for all images and is provided here "as is."

+

Apptainer buildtime examples#

+

Create Ubuntu-based image with MPI support#

+

Apptainer images can be generated from a .def recipe.

+

This example shows how to create an Apptainer image running on the Ubuntu operating system with openmpi installed. The recipe is shown in pieces to make it easier to describe what each section does. The complete recipe can be found in the defs subdirectory of /nopt/nrel/apps/software/apptainer/1.1.9/examples. Building images requires root/admin privileges, so the build process must be run on a user's computer with apptainer installed or via the Singularity Container Service. After creation, the image can be copied to Kestrel and run.

+
Create a new recipe based on ubuntu:latest#
+
Bootstrap: docker
+from: ubuntu:latest
+
+
Add LD_LIBRARY_PATH /usr/local/lib used by OpenMPI#
+
%environment
+    export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
+    export PMIX_MCA_gds=^ds12
+
+
Install development tools after bootstrap is created#
+
%post
+    echo "Installing basic development packages..."
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get install -y bash gcc g++ gfortran make curl python3
+
+
Download, compile and install openmpi.#
+
    echo "Installing OPENMPI..."
+    curl https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz --output openmpi-4.1.5.tar.gz
+    mkdir -p /opt/openmpi/src
+    tar -xzf openmpi-4.1.5.tar.gz -C /opt/openmpi/src
+    cd /opt/openmpi/src/*
+    ./configure 
+    make install
+
+
Compile and install example MPI application#
+
    echo "Build OPENMPI example..."
+    export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
+    cd /opt/openmpi/src/*/examples
+    mpicc ring_c.c -o /usr/bin/ring
+
+
Set default script to run ring#
+
  /usr/bin/ring
+
+
Example Build image command (must have root access)#
+
sudo $(type -p apptainer) build small.sif  ubuntu-mpi.def
+
+
Test image#
+
[kuser@kl1 ~]$ salloc --exclusive --mem=0 --tasks-per-node=104 --nodes=2 --time=01:00:00 --account=MYACCOUNT --partition=debug
+salloc: Granted job allocation 90367
+salloc: Waiting for resource configuration
+salloc: Nodes x3000c0s25b0n0,x3000c0s27b0n0 are ready for job
+[kuser@x3000c0s25b0n0 ~]$ module load apptainer 
+[kuser@x3000c0s25b0n0 ~]$ srun -n 8 --tasks-per-node=4 --mpi=pmix apptainer run small.sif
+Process 2 exiting
+Process 3 exiting
+Process 0 sending 10 to 1, tag 201 (8 processes in ring)
+Process 0 sent to 1
+Process 0 decremented value: 9
+Process 0 decremented value: 8
+Process 0 decremented value: 7
+Process 0 decremented value: 6
+Process 0 decremented value: 5
+Process 0 decremented value: 4
+Process 0 decremented value: 3
+Process 0 decremented value: 2
+Process 0 decremented value: 1
+Process 0 decremented value: 0
+Process 0 exiting
+Process 1 exiting
+Process 5 exiting
+Process 6 exiting
+Process 7 exiting
+Process 4 exiting
+[kuser@x3000c0s25b0n0 ~]$
+
+

Utilizing GPU resources with Apptainer images#

+

GPU-accelerated software often have complex software and hardware requirements to function properly, making containerization a particularly attractive option for deployment and use. These requirements manifest themselves as you are building your image (buildtime) and when you run a container (runtime). This section describes key components of software images that are successfully GPU-enabled with a Tensorflow container example. For more detailed documentation on the subject, visit Apptainer's dedicated GPU Support page.

+

Tensorflow Container Example#

+

1. Pull a compatible version of GPU-enabled Tensorflow from DockerHub#

+

There are several versions (tags) of Tensorflow images available from the DockerHub container registry, each with different versions of GPU drivers and CUDA. You can obtain this information from the host by running the command nvidia-smi after allocating a GPU within a Slurm job on your desired system. Alternatively, you could simply consult the table below. If your running container is installed with a different GPU driver/CUDA version than what is listed below for your target system, you will either run into a fatal error, or the software will bypass the GPU and run on the CPU, slowing computation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SystemPartition nameGPU type
(cards per node)
nvidia-smi
GPU driver version
CUDA Version
Kestrelgpu-h100H100 (4)550.54.1512.4
SwiftgpuA100 (4)550.54.1512.4
VermiliongpuA100 (1)460.106.0011.2
+

Kestrel's H100 GPUs run with CUDA 12.4 with a GPU driver version of 550.54.15. Most GPU-enabled applications are compatible with a given major version release of CUDA; for example, if an application requires CUDA/12.4, it will more than likely work with other versions of CUDA >= 12.0. So for this example on Kestrel, we are looking for a Tensorflow image tag that includes as close to CUDA/12.4 as we can. On DockerHub, we see from consulting the layers of tensorflow:2.15.0-gpu that this image fits our requirements (note line 14: ENV CUDA_VERSION=12.3.0). At the time of writing, a Tensorflow image with CUDA/12.4 is not yet available from this DockerHub repository.

+

First, allocate a Kestrel GPU compute node:

+
salloc -A <YOUR-ACCOUNT> -t 1:00:00 --gpus=1 -N 1 -n 1 --mem-per-cpu=8G
+
+
+

Note

+

We are only requesting 1 GPU card (--gpus=1) of the 4 available per node, and subsequently 1 task (-n 1). Though we are automatically given access to all of the GPU memory on the node, we request 8G of CPU memory from salloc. This is because our Tensorflow example will require a decent amount of CPU memory as it copies data to and from the GPU device. If such CPU memory is a bottleneck in a real-world example, you may want to consider replacing -n 1 --mem-per-cpu=8G with --exclusive --mem=0 to request all of the node's CPU resources, even if you are only using a single GPU card.

+
+

Once we are allocated a node, we will load the Apptainer module, and then pull tensorflow:2.15.0-gpu from DockerHub to a personal scratch location on Kestrel.

+
module load apptainer
+apptainer pull /scratch/$USER/tensorflow-2.15.0.sif docker://tensorflow/tensorflow:2.15.0-gpu
+
+

Once the image finishes pulling, we can see a new .sif file in /scratch/$USER:

+
ls -lh /scratch/$USER/tensorflow-2.15.0.sif
+-rwxrwxr-x 1 USERNAME USERNAME 3.4G Apr  3 11:49 /scratch/USERNAME/tensorflow-2.15.0.sif
+
+
+

Note

+

We recommend saving .sif files to /scratch or /projects whenever feasible, as these images tend to be large, sometimes approaching tens of GB.

+
+

2. Verify GPU device is found#

+
Recognizing GPU device from Slurm#
+

As a reminder, we only requested 1 GPU card in our salloc command above. On the Slurm side of things, we can verify this device is accessible to our computing environment by examining the contents of the SLURM_GPUS_ON_NODE (the number of allocated GPU cards) and SLURM_JOB_GPUS (the device's ID). By grepping for GPU from our list of environmental variables, we can see that Slurm indeed recognizes a single GPU device with ID 0:

+
env | grep GPU
+SLURM_GPUS_ON_NODE=1
+SLURM_JOB_GPUS=0
+
+
Recognizing GPU device from the container#
+

It is important to note that just because Slurm has allocated this device, it doesn't necessarily mean that the Tensorflow container can recognize it. Let's now verify that a GPU is accessible on the containerized Python side of things. We will invoke /scratch/$USER/tensorflow-2.15.0.sif to see whether Tensorflow itself can use the GPU allocated by Slurm:

+
apptainer exec /scratch/$USER/tensorflow-2.15.0.sif python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+
+

Oh no! You should see this error as the output from the above command:

+
libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program
+
+

What happened here - didn't we pull a Tensorflow image that contains CUDA/12.3? We did, but whenever you run GPU-enabled Apptainers, it is critical to supply the --nv flag after exec, otherwise the GPU device(s) will not be found. You can read more about what --nv does here.

+

Let's try finding this device from Python again, this time after supplying --nv to the container runtime:

+
apptainer exec --nv /scratch/$USER/tensorflow-2.15.0.sif python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+
+

That's better! We can now see that GPU device 0 as allocated by Slurm is accessible to Tensorflow:

+
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
+
+

3. Run example Tensorflow training script#

+

Now we are ready to run the Tensorflow container. We will run the script below, which is based on Tensorflow's advanced quickstart example. This script tests a model that is trained on the mnist example dataset.

+
+Python script: Simple GPU Tensorflow train and test +
import time
+import tensorflow as tf
+from tensorflow.keras.layers import Dense, Flatten, Conv2D
+from tensorflow.keras import Model
+
+# source: https://www.tensorflow.org/tutorials/quickstart/advanced
+
+### load mnist dataset
+mnist = tf.keras.datasets.mnist
+
+(x_train, y_train), (x_test, y_test) = mnist.load_data()
+x_train, x_test = x_train / 255.0, x_test / 255.0
+
+# Add a channels dimension
+x_train = x_train[..., tf.newaxis].astype("float32")
+x_test = x_test[..., tf.newaxis].astype("float32")
+
+### Use tf.data to batch and shuffle the dataset
+train_ds = tf.data.Dataset.from_tensor_slices(
+    (x_train, y_train)).shuffle(10000).batch(32)
+
+test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
+
+class MyModel(Model):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = Conv2D(32, 3, activation='relu')
+        self.flatten = Flatten()
+        self.d1 = Dense(128, activation='relu')
+        self.d2 = Dense(10)
+
+    def call(self, x):
+        x = self.conv1(x)
+        x = self.flatten(x)
+        x = self.d1(x)
+        return self.d2(x)
+
+# Create an instance of the model
+model = MyModel()
+
+### optimizer/loss function
+loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+optimizer = tf.keras.optimizers.Adam()
+
+### Select metrics to measure the loss and the accuracy of the model. These metrics accumulate the values over epochs and then print the overall result.
+train_loss = tf.keras.metrics.Mean(name='train_loss')
+train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
+
+test_loss = tf.keras.metrics.Mean(name='test_loss')
+test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
+
+### train the model
+@tf.function
+def train_step(images, labels):
+    with tf.GradientTape() as tape:
+        # training=True is only needed if there are layers with different
+        # behavior during training versus inference (e.g. Dropout).
+        predictions = model(images, training=True)
+        loss = loss_object(labels, predictions)
+    gradients = tape.gradient(loss, model.trainable_variables)
+    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
+
+    train_loss(loss)
+    train_accuracy(labels, predictions)
+
+# test the model
+@tf.function
+def test_step(images, labels):
+    # training=False is only needed if there are layers with different
+    # behavior during training versus inference (e.g. Dropout).
+    predictions = model(images, training=False)
+    t_loss = loss_object(labels, predictions)
+
+    test_loss(t_loss)
+    test_accuracy(labels, predictions)
+
+
+t0 = time.time()
+EPOCHS = 10
+for epoch in range(EPOCHS):
+    # Reset the metrics at the start of the next epoch
+    train_loss.reset_states()
+    train_accuracy.reset_states()
+    test_loss.reset_states()
+    test_accuracy.reset_states()
+
+    for images, labels in train_ds:
+        train_step(images, labels)
+
+    for test_images, test_labels in test_ds:
+        test_step(test_images, test_labels)
+
+    print(
+        f'Epoch {epoch + 1}, '
+        f'Loss: {train_loss.result()}, '
+        f'Accuracy: {train_accuracy.result() * 100}, '
+        f'Test Loss: {test_loss.result()}, '
+        f'Test Accuracy: {test_accuracy.result() * 100}'
+    )
+t1 = time.time()
+print(f'A total of {EPOCHS} epochs took {t1-t0} seconds')
+
+
+

Save this script as tensor_test.py into your current working directory and run the following command:

+
apptainer exec --nv /scratch/$USER/tensorflow-2.15.0.sif python tensor_test.py
+
+

Assuming you made the same salloc request above, it should take ~26 seconds to run through 10 training/testing epochs on a single GPU. This particular container is sophisticated enough to automatically switch between CPU and GPU computation depending on the availability of a GPU device. If you'd like to compare the time it takes for this script to run purely on a CPU, simply omit the --nv flag from your call to apptainer above and run the command on the same node. You should observe that the runtime jumps to ~252 seconds, meaning that the GPU computation is almost 10 times faster than the CPU!

+

Best practices and recommendations#

+

This section describes general recommendations and best practices for Apptainer users across NREL's HPC systems.

+

Change Apptainer cache location to /scratch/$USER#

+

By default, Apptainer will cache image layers to your $HOME folder when you pull or build .sif images, which is not ideal as users have a limited storage quota in /home. As you continue to use Apptainer, this cache folder can become quite large and can easily fill your $HOME. Fortunately, the location of this cache folder can be controlled through the APPTAINER_CACHEDIR environmental variable. To avoid overfilling your $HOME with unnecessary cached data, it is recommended to add an APPTAINER_CACHEDIR location to your ~/.bashrc file. You can accomplish this with the following command, which will direct these layers to save to a given system's scratch space:

+

echo "export APPTAINER_CACHEDIR=/scratch/$USER/.apptainer" >> ~/.bashrc

+

Note that you will either need to log out and back into the system, or run source ~/.bashrc for the above change to take effect.

+

Save .def files to home folder and images to /scratch or /projects#

+

An Apptainer definition file (.def) is a relatively small text file that contains much (if not all) of the build context for a given image. Since your $HOME folders on NREL's HPC systems are regularly backed up, it is strongly recommended to save this file to your home directory in case it accidentally gets deleted or otherwise lost. Since .sif images themselves are 1. typically large and 2. can be rebuilt from the .def files, we recommend saving them to a folder outside of your $HOME, for similar reasons described in the previous section. If you intend to work with an image briefly or intermittantly, it may make sense to save the .sif to your /scratch folder, from which files can be purged if they haven't been accessed for 28 days. If you plan to use an image frequently over time or share it with other users in your allocation, saving it in a /projects location you have access to may be better.

+

Bind Mounting Directories#

+

By default, most containers only mount your $HOME folder, current working directory, and a handful of other common folders. If a host directory isn't in this list and isn't explicitly provided during runtime, you may get a "File not found" error. For example, if you are running a container from /scratch/$USER and want to write a result file to a /projects location, you will need to provide the mount path with the -B </path/on/host>:</path/in/container> option:

+
apptainer -B /projects:/projects --nv exec IMAGE.sif COMMAND > /projects/my-project/result.txt
+
+

Provide the --nv flag to Apptainer Runtime (GPU)#

+

Once you allocate at least one GPU card in your job, you then need to make Apptainer recognize the GPU resources you wish to use. To accomplish this, you can supply the --nv flag to an apptainer shell ... or apptainer exec ... command. Using a generic gpu_accelerated_tensorflow.sif image as an example:

+
apptainer exec --nv gpu_accelerated_tensorflow.sif python tensorflow.py
+
+

Providing CUDA to Host Environment (GPU)#

+

In the Tensorflow example above, the container included all of the necessary software to run on a GPU, including CUDA. However, depending on the specific software container you are trying to run, its image may or may not include a working version of CUDA. If you encounter CUDA- or GPU-driver errors, try loading version 12.4 of the CUDA module before running the container:

+
module load cuda/12.4
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Containers/index.html b/Documentation/Development/Containers/index.html new file mode 100644 index 000000000..6f06613ee --- /dev/null +++ b/Documentation/Development/Containers/index.html @@ -0,0 +1,5076 @@ + + + + + + + + + + + + + + + + + + + + + + + Containers Intro - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Introduction to Software Containerization#

+

What are software images/containers?#

+

Software images provide a method of packaging your code so that its container can be run anywhere you have a container runtime environment. This enables you to create an image on your local laptop and then run it on an HPC system or other computing resource. Software containerization provides an alternative, more robust method of isolating and packaging your code compared to solutions such as Conda virtual environments.

+

A note on terminology: A software container is considered an instance of an image, meaning the former gets created during the runtime of the latter. In other words, a software image is what you build and distribute, whereas the container is what gets executed from a given image.

+

Docker vs. Apptainer#

+

The most common container runtime environment (outside of HPC) is Docker. Due to the fact that it requires root-level permissions to build its associated images and run containers, Docker is not suited for HPC environments and is therefore not available on NREL's systems currently. Apptainer is an alternative containerization tool that can be used in HPC environments because running it does not require root. However, you can use Docker to build images locally and convert them to the Apptainer format for use with HPC (described in more detail here).

+

Compatibility#

+

Apptainer is able to run most Docker images, but Docker is unable to run Apptainer images. A key consideration when deciding to containerize an application is which container engine to build with. A suggested best practice is to build images with Docker whenever possible, as this provides more flexibility. However, if this is not feasible, you may have to build with Apptainer or maintain separate images for each container engine.

+

Advantages to software containerization#

+
    +
  • Portability: Containers can be run on HPC, locally, and on cloud infrastructure used at NREL.
  • +
  • Reproducibility: Containers are one option to ensure reproducible research by packaging all necessary software to reproduce an analysis. Containers are also easily versioned using a hash.
  • +
  • Modularity: Images are composed of cacheable "layers" of other images or build commands, facilitating the image building process.
  • +
  • Workflow integration: Workflow management systems such as Airflow, Nextflow, Luigi, and others provide built-in integration with container engines.
  • +
+

Accessing HPC hardware from software containers#

+

Both Apptainer and Docker provide the ability to use hardware based features on the HPC systems such as GPUs. A common usage of containers is packaging GPU-enabled tools such as TensorFlow. Apptainer natively provides access to the GPU and driver on the host. Please visit our documentation on accessing GPUs from Apptainer images for more information. In principle, the MPI installations can be also be accessed from correctly configured containers, but care is also needed to ensure compatibility between the libraries on the host and container.

+

Building software images#

+

Regardless of the runtime platform, images are built from a special configuration file. A Dockerfile is such a configuration for Docker, while Apptainer uses a "Definition File" (with a .def extension). These files specify the installation routines necessary to create the desired application, as well as any additional software packages to install and configure in this environment that may be required. You can think of these files as "recipes" for installing a given application you wish to containerize.

+

Building Docker or Apptainer images requires root/admin privileges and cannot be done directly by users of HPC systems. Docker is available on most platforms, and users with admin privileges on a local machine (such as your laptop) can build Docker images locally. The Docker image file can then be pushed to a registry and pulled on the HPC system using Apptainer as described here, or a tool such as Docker2Singularity may be used to convert the image to the Apptainer format. Alternatively, users with admin privileges on a Linux system can run Apptainer locally to build images. Another option is to use Sylab's remote building Container Service, which provides free accounts with a limited amount of build time for Apptainer-formatted images.

+

Example Docker build workflow for HPC users#

+

Because of the permission limitations described above, it is recommended that HPC users start with building a Docker image locally, e.g., on your laptop. If you are a researcher at NREL and plan to regularly containerize applications, you can request Docker to be installed at the admin-level on your work computer from the IT Service Portal. This section will describe a simple workflow for building a Docker image locally, exporting it as a .tar file, uploading it to Kestrel, and converting it to an Apptainer image for execution on HPC.

+

1. Local Docker build#

+

The following Dockerfile illustrates the build steps to create a small image. In this example, we simply install python3 into an image based on the Ubuntu operating system (version 22.04):

+
# Docker example: save as `Dockerfile` in your working directory
+
+FROM ubuntu:22.04
+
+RUN apt-get update -y && apt-get install python3 -y
+
+

Images are normally built (or "bootstrapped") from a base image indicated by FROM. This base image is composed of one or more layers that will be pulled from the appropriate container registry during buildtime. In this example, version 22.04 of the Ubuntu operating system is specified as the base image. Docker pulls from Ubuntu's DockerHub container registry by default. The ability to use a different base image provides a way to use packages which may work more easily on a specific operating system distribution. For example, the Linux distribution on Kestrel is Red Hat, so building the above image would allow the user to install packages from Ubuntu repositories.

+

The RUN portion of the above Dockerfile indicates the command to run during the image's buildtime. In this example, it installs the Python 3 package. Additional commands such as COPY, ENV, and others enable the customization of your image to suit your compute environment requirements.

+

To build an image from the above Dockerfile (we will call it "simple_python3"), copy its contents to a file named Dockerfile in your current working directory and run the following:

+
docker build . -t simple_python3 --platform=linux/amd64
+
+

It is important to note that without the --platform option, docker build will create an image that matches your local machine's CPU chip architecture by default. If you have a machine running on x86-64/amd64, the container's architecture will be compatible NREL's HPC systems. If your computer does not use chips like these (such as if you have a Mac computer that runs on "Apple Silicon", which uses arm64), your image's architecture will not match what is found on NREL's HPC systems, causing performance degradation of its containers (at best) or fatal errors (at worst) during runtime on Kestrel, Swift, or Vermillion. Regardless of your local machine, as a best practice, you should explicitly specify your image's desired platform during buildtime with --platform=linux/amd64 to ensure compatibility on NREL's HPC systems.

+

2. Export Docker image to .tar#

+

Coming soon: a centralized software image registry/repository for NREL users, which will simplify the following steps. In the meantime, please follow steps 2 and 3 as written.

+

Once the Docker image is built, you can export it to a .tar archive with the following command:

+
docker image save simple_python3 -o simple_python3.tar
+
+

Depending on the specific application you are building, exported images can be relatively large (up to tens of GB). For this reason, you may wish to gzip/compress the .tar to a .tar.gz, which will save network bandwidth and ultimately reduce total transfer time:

+
tar czf simple_python3.tar.gz simple_python3.tar
+
+

3. Upload exported image in .tar.gz format to HPC system#

+

Now that the exported Docker image is compressed to .tar.gz format, you will need to transfer it to one of NREL's HPC systems. Considering the scratch space of Kestrel as an example destination, we will use rsync as the transfer method. Be sure to replace USERNAME with your unique HPC username:

+
rsync -aP --no-g simple_python3.tar.gz USERNAME@kestrel.hpc.nrel.gov:/scratch/USERNAME/
+
+

For more information on alternatives to rsync (such as FileZilla or Globus), please refer to our documentation regarding file transfers.

+

4. Convert .tar to Apptainer image#

+

Once rsync finishes, you should find the following file (roughly 72MB in size) in your personal scratch folder on Kestrel (i.e., /scratch/$USER):

+
[USERNAME@kl1 USERNAME]$ ls -lh /scratch/$USER/simple_python3.tar.gz
+-rw-r--r-- 1 USERNAME USERNAME 72M Mar 20 15:39 /scratch/USERNAME/simple_python3.tar.gz
+
+

The next step is to convert this "Docker archive" to an Apptainer-compatible image. Especially for larger images, this can be a memory-intensive process, so we will first request a job from Slurm, e.g.:

+
salloc -A <account> -p <partition> -t <time> ...
+
+

You can now convert the Docker image archive to an Apptainer .sif image on Kestrel with the following build command. Be sure to first unzip the .tar.gz archive, and prefix the resulting .tar with docker-archive://:

+
cd /scratch/$USER
+module load apptainer/1.1.9
+tar xzf simple_python3.tar.gz simple_python3.tar
+apptainer build simple_python3.sif docker-archive://simple_python3.tar
+
+

Once this finishes, you can invoke the container with apptainer exec simple_python3.sif <command>. Anything that follows the name of the image will be executed from the container, even if the same command is found on the host system. To illustrate, if we examine the location of the python3 binary within the simple_python3.sif image and the host system (Kestrel), we see they are both called from the location /usr/bin/python3:

+
# host's Python3
+[USERNAME@COMPUTE_NODE USERNAME]$ which python3
+/usr/bin/python3
+
+# container's Python3
+[USERNAME@COMPUTE_NODE USERNAME]$ apptainer exec simple_python3.sif which python3
+/usr/bin/python3
+
+

However, the apt-get install python3 command in the Dockerfile should have installed the most up-to-date Python3 library from Ubuntu's package manager, which is 3.10.12 at the time this is written. By contrast, the Python3 library installed on the host is older (version 3.6.8). In this way, we can confirm that the python3 executed with apptainer exec ... is indeed originating from simple_python3.sif:

+
# host Python3
+[USERNAME@COMPUTE_NODE USERNAME]$ python3 --version
+Python 3.6.8
+
+# container Python3
+[USERNAME@COMPUTE_NODE USERNAME]$ apptainer exec simple_python3.sif python3 --version
+Python 3.10.12
+
+

For more specific information on and best practices for using Apptainer on NREL's HPC systems, please refer to its dedicated documentation page.

+

5. A more involved Dockerfile example (CUDA 12.4)#

+

For an example of an image you can build to provide everything needed for CUDA v.12.4, please refer to this Dockerfile.

+

Using Apptainer as build alternatives to Docker#

+

Given Docker's popularity, support, and its widespread compatibility with other container runtimes, it is recommended to start your containerization journey with the steps outlined in the previous section. However, there could be rare cases in which you need to directly build an image with Apptainer. Instead of "Dockerfiles", these container runtimes use "Definition Files" for image building that have a similar, yet distinct format. Please refer to the respective link for more information. We also provide an Apptainer image build example in our documentation, which can be remotely built via the Singularity Container Service from Sylabs, the developer of Apptainer.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Containers/registries/index.html b/Documentation/Development/Containers/registries/index.html new file mode 100644 index 000000000..3111b008f --- /dev/null +++ b/Documentation/Development/Containers/registries/index.html @@ -0,0 +1,5313 @@ + + + + + + + + + + + + + + + + + + + + + + + Container registries at NREL - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Container registries at NREL#

+

Introduction#

+

Container registries enable users to store container images. An overview of the steps to use each fo the main container registries available to NREL users is provided below. Registries can enable reproducibility by storing tagged versions of containers, and also facilitate transferring images easily between different computational resources.

+

Create Docker images#

+

Docker is not supported on NREL's HPC systems, including Kestrel. Instead, Apptainer is the container engine provided as a module. Apptainer is able to pull Docker images and convert them to Apptainer-formatted images. We generally recommend building Docker images to ensure portability between compute resources and using Apptainer to convert the image when running on an HPC system.

+

Accessibility#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RegistryKestrel AccessAWS AccessDocker SupportApptainer Support
HarborNo**NoYesYes
AWS ECRYesYesYesNo*
DockerHubYesYesYesNo*
*for DockerHub and AWS ECR it may be possible to push images using ORAS, but this was not found to be a streamlined process in testing.
**Harbor was originally set up for Kestrel's predecessor, Eagle. A replacement is being identified.
+

AWS ECR#

+

AWS ECR can be utilized by projects with a cloud allocation to host containers. ECR primarily can be used with Docker containers, although Apptainer should also be possible.

+

Harbor#

+

NREL's Harbor is a registry hosted by ITS that supports both Docker and Apptainer containers. Harbor was originally set up for Kestrel's predecessor, Eagle, which also used Apptainer's predecessor, Singularity. **NREL ITS is currently evaluating a replacement to internally hosted Harbor (likely moving to Enterprise DockerHub) The following information is archived until such a replacement is identified for Kestrel.

+

Docker#

+

Login#

+

On your local machine to push a container to the registry. +

docker login harbor.nrel.gov
+

+

Prepare image for push#

+
docker tag SOURCE_IMAGE[:TAG] harbor.nrel.gov/REPO/IMAGE[:TAG]
+
+
docker push harbor.nrel.gov/REPO/IMAGE[:TAG]
+
+

Pull Docker image on Eagle#

+

Pull and convert container to Singularity on Eagle.

+

Note: --nohttps is not optimal but need to add certs for NREL otherwise there is a cert error. +

apptainer pull --nohttps --docker-login docker://harbor.nrel.gov/REPO/IMAGE[:TAG]
+

+

The container should now be downloaded and usable as usual

+

Singularity#

+

Login information#

+

Under your User Profile in Harbor obtain and export the following information +

export SINGULARITY_DOCKER_USERNAME=<harbor username>
+export SINGULARITY_DOCKER_PASSWORD=<harbor CLI secret>
+

+

Push a Singularity image#

+
singularity push <image>.sif oras://harbor.nrel.gov/<PROJECT>/<IMAGE>:<TAG>
+
+

Pull a Singularity image#

+
singularity pull oras://harbor.nrel.gov/<PROJECT>/<IMAGE>:<TAG>
+
+

DockerHub#

+

An enterprise version of DockerHub is being evaluated and is currently unavailable. However, NREL HPC users are free to pull Docker images with Apptainer directly from the public version of DockerHub. For example, this pulls the official Ubuntu v.22.04 image from DockerHub and converts it to the Apptainer-formatted ubuntu-22.04.sif image:

+
apptainer pull ubuntu-22.04.sif docker://ubuntu:22.04
+
+
+

Note

+

DockerHub maintains a series of "official" images that follow the syntax apptainer pull <name of SIF> docker://<image name>:<image version> when pulling with Apptainer. For all other images that are not listed in the link, you should instead use the syntax apptainer pull <name of SIF> docker://<image repo name>/<image name>:<image version>.

+
+

DockerHub Enterprise Credentials#

+

To get the needed credentials for NREL Dockerhub, select your username in the top right -> Account -> Security -> Create a new access token.

+

The dialog box will describe how to use the security token with docker login to enable pulling and pushing containers.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Containers/singularity/index.html b/Documentation/Development/Containers/singularity/index.html new file mode 100644 index 000000000..9953f9dec --- /dev/null +++ b/Documentation/Development/Containers/singularity/index.html @@ -0,0 +1,4861 @@ + + + + + + + + + + + + + + + + + + + + + + + Singularity - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Singularity

+ +

Singularity is a platform designed specifically for running containers on HPC systems.

+
+

Note

+

Singularity has been deprecated in favor of a new container application called Apptainer, and NREL clusters now exclusively use Apptainer. This page has subsequently been deprecated. For more information about Apptainer and using it on NREL HPC systems, see Apptainer.

+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Debug_Tools/ddt/index.html b/Documentation/Development/Debug_Tools/ddt/index.html new file mode 100644 index 000000000..58e784be3 --- /dev/null +++ b/Documentation/Development/Debug_Tools/ddt/index.html @@ -0,0 +1,4865 @@ + + + + + + + + + + + + + + + + + + + + + + + DDT - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

DDT (Linaro Debugger)#

+

DDT is Linaro's (formally ARM's) parallel GUI based debugger

+

ddt is a GUI based parallel debugger that supports MPI, OpenMP, Cuda. +It can be used with C, C++, Fortran and Python. It shares much of its +infrastructure with Linaro's map and profiling tools. See the Linaro-Forge page for additional information.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Debug_Tools/gdb/index.html b/Documentation/Development/Debug_Tools/gdb/index.html new file mode 100644 index 000000000..44ca11dbc --- /dev/null +++ b/Documentation/Development/Debug_Tools/gdb/index.html @@ -0,0 +1,4993 @@ + + + + + + + + + + + + + + + + + + + + + + + GDB - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

GDB (GNU Debugger)#

+

Documentation: GDB

+

GDB is GNU's command line interface debugging tool.

+

Getting started#

+

GDB is available on NREL machines and supports a number of languages, including C, C++, and Fortran.

+

When using GDB, make sure the program you are attempting to debug has been compiled with the -g debug flag and with the -O0 optimization flag to achieve the best results.

+

Run GDB with the following command: gdb --args my_executable arg1 arg 2 arg3 +This will launch gdb running my_executable, and passes arguments arg1, arg2, and arg3 to my_executable.

+

For links to in-depth tutorials and walkthroughs of GDB features, please see Resources.

+

Availability#

+ + + + + + + + + + + + + + + + + +
KestrelEagleSwiftVermilion
gdb/12.1gdb/7.6.1*gdb/8.2*gdb/12.1, gdb/8.2*
+

* Located in /usr/bin. Do not need to use module load.

+

Resources#

+ + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Jupyter/index.html b/Documentation/Development/Jupyter/index.html new file mode 100644 index 000000000..f05a9d44c --- /dev/null +++ b/Documentation/Development/Jupyter/index.html @@ -0,0 +1,5449 @@ + + + + + + + + + + + + + + + + + + + + + + + Introduction to Jupyter - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Introduction to Jupyter#

+

What is Jupyter?#

+

A web app for interactive Python in a browser

+

Jupyter offers a number of benefits for researchers in many fields, including:

+
    +
  • Live coding: Make changes and see the effects in real-time.
  • +
  • Instant visualization: Charts and graphics render quickly in a browser window.
  • +
  • Sharable: Notebooks can be copied and sent to others, or multiple users can edit a single shared notebook.
  • +
  • Reproducible: Create a shareable environment with pinned Python and scientific library versions.
  • +
  • Customizable: Many configuration options, extensions, and libraries are available.
  • +
  • Not just for Python: Supports many other languages (including R, Julia, and many others.) +
  • +
+

Example Notebook Code#

+

With the appropriate libraries installed into the Jupyter environment, the following code can be placed in one cell in a notebook, or split across multiple cells, and executed to produce quick graphs:

+
import chart_studio.plotly as py
+import plotly.figure_factory as ff
+import pandas as pd
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+x = np.linspace(0, 5, 10)
+y = x ** 2
+n = np.array([0,1,2,3,4,5])
+xx = np.linspace(-0.75, 1., 100)
+
+fig, axes = plt.subplots(1, 4, figsize=(12,3))
+
+axes[0].scatter(xx, xx + 1.25*np.random.randn(len(xx)))
+#axes[0].scatter(xx, xx + 0.25*np.random.randn(len(xx)))
+axes[0].set_title("scatter")
+
+axes[1].step(n, n**2.0, lw=2)
+axes[1].set_title("step")
+
+axes[2].bar(n, n**2, align="center", width=0.5, alpha=0.5)
+axes[2].set_title("bar")
+
+axes[3].fill_between(x, x**2.5, x**3, color="green", alpha=0.5);
+axes[3].set_title("fill_between");
+
+

png

+

Jupyter Terminology#

+

Jupyterhub#

+

This is the multi-user "backend" server. The "Hub" allows users to login, then launches the single-user Jupyter server for them. Hubs are usually installed and managed by system administrators, not Jupyter users.

+

A Jupyterhub server (kestrel-jhub) is available on Kestrel for use with your HPC data. More on KJHub later in this document.

+

Jupyter/Jupyter Server/Notebook server#

+

The single-user server/web interface. Use to create, save, or load .ipynb notebook files. This is what users generally interact with.

+

Jupyter Notebook#

+

A Notebook is an individual .pynb file. It contains your Python code and visualizations, and is sharable/downloadable.

+

Jupyter Lab#

+

A redesigned web interface for your Jupyter Notebook Server - "Notebooks 2.0". Preferred by some, and promoted as the next evolution of Notebooks. +Lab has many new and different extensions, but many are also not compatible between Notebook and Lab. Lab is still under development, so is lacking some features of "classic" notebooks.

+

Kernel#

+

Kernels define the Python environments used by your notebooks. Derived from ipykernel, a predecessor project to Jupyter, and you may see Jupyter kernels referred to as "ipykernels". Custom kernels require the "ipykernel" package installed in your Jupyter conda environment.

+

More on kernels later.

+

JupyterHub Service on Kestrel (KJHub)#

+

The NREL HPC team runs a JupyterHub service for HPC users to quickly access notebooks and data stored on Kestrel, Kestrel-JHub (KJHub.)

+

KJHub is available from the NREL VPN (onsite or offsite) for internal NREL users.

+

This service is not directly accessible externally for non-NREL HPC users. However, it may be reached by using the HPC VPN, or by using a FastX Remote Desktop session via the DAV nodes.

+

The JupyterHub service is accessible via web browser at https://kestrel-jhub.hpc.nrel.gov

+

JupyterHub Advantages:#

+
    +
  • Fast and easy access to notebooks with no setup.
  • +
  • Use regular Kestrel credentials to log in.
  • +
  • Great for simple tasks, including light to moderate data processing, code debugging/testing, and/or visualization using basic scientific and visualization libraries.
  • +
+

JupyterHub Disadvantages:#

+
    +
  • Limited resources: KJHub is a single node with 128 CPU cores and 512GB RAM.
  • +
  • Managed usage: Up to 8 cores/100GB RAM per user before automatic throttling will greatly slow down processing.
  • +
  • Competition: Your notebook competes with other users for CPU and RAM on the KJHub nod.
  • +
  • Slow updates: A limited list of basic scientific libraries are available in the default notebook kernel/environment.
  • +
+

Simple Instructions to access JupyterHub:#

+ +

KJHub opens a standard JupyterLab interface by default. Change the url ending from "/lab" to "/tree" in your web browser to switch to the classic Notebooks interface.

+

Using a Compute Node to Run Your Own Jupyter Notebooks#

+

Kestrel supports running your own Jupyter Notebook server on a compute node. This is highly recommended over KJHub for advanced Jupyter use and heavy computational processing.

+

Advantages:#

+
    +
  • Custom conda environments to load preferred libraries.
  • +
  • Full node usage: Exclusive access to the resources of the node your job is reserved on, including up to 104 CPU cores and up to 240GB RAM on Kestrel CPU nodes and up to 2TB RAM on Kestrel bigmem nodes. (See the system specifications page for more information on the types of nodes available on Kestrel.)
  • +
  • No competing with other users for CPU cores and RAM, and no Arbiter2 process throttling.
  • +
  • Less than a whole node may be requested via the shared node queue, to save AUs.
  • +
+

Disadvantages:#

+
    +
  • Must compete with other users for a node via the job queue.
  • +
  • Costs your allocation AU.
  • +
+

Launching Your Own Jupyter Server on an HPC System#

+

Before you get started, we recommend installing your own Jupyter inside of a conda environment. The default conda/anaconda3 module contains basic Jupyter Notebook packages, but you will likely want your own Python libraries, notebook extensions, and other features. Basic directions are included later in this document.

+

Internal (NREL) HPC users on the NREL VPN, or external users of the HPC VPN, may use the instructions below.

+

External (non-NREL) HPC users may follow the same instructions, but please use kestrel.nrel.gov in place of kestrel.hpc.nrel.gov.

+

Using a Compute Node to run Jupyter Notebooks#

+

Connect to a login node and request an interactive job using the salloc command.

+

The examples below will start a 2-hour job. Edit the <account> to the name of your allocation, and adjust the time accordingly. Since these are interactive jobs, they will get some priority, especially if they're shorter, so only book as much time as you will be actively working on the notebook.

+

On Kestrel:#

+

Connect to the login node and launch an interactive job:

+

[user@laptop:~]$ ssh kestrel.hpc.nrel.gov

+

[user@kl1:~]$ salloc -A <account> -t 02:00:00

+

Starting Jupyter Inside the Job#

+

Once the job starts and you are allocated a compute node, load the appropriate modules, activate your Jupyter environment, and launch the Jupyter server.

+

[user@x1000c0s0b0n1:~]$ module load anaconda3

+

[user@x1000c0s0b0n1:~]$ source activate myjupenv

+

[user@x1000c0s0b0n1:~]$ jupyter-notebook --no-browser --ip=$(hostname -s)

+

Take note of the node name that your job is assigned. (x1000c0s0b0n1 in the above example.)

+

Also note the url that Jupyter displays when starting up, e.g. http://127.0.0.1:8888/?token=<alphabet soup>.

+

The <alphabet soup> is a long string of letters and numbers. This is a unique authorization token for your Jupyter session. you will need it, along with the full URL, for a later step.

+

On Your Own Computer:#

+

Next, open an SSH tunnel through a login node to the compute node. Log in when prompted using your regular HPC credentials, and put this terminal to the side or minimize it, but leave it open until you are done working with Jupyter for this session.

+

[user@laptop:~]$ ssh -N -L 8888:<nodename>:8888 username@kestrel.hpc.nrel.gov

+

Open a Web Browser#

+

Copy the full url and token from Jupyter startup into your web browser. For example:

+

http://127.0.0.1:8888/?token=<alphabet soup>

+

Using a Compute Node - The Easy Way#

+

Scripted assistance with launching a Jupyter session on Kestrel is available.

+

Internal NREL Users Only: pyeagle#

+

The pyeagle package is available for internal users to handle launching and monitoring a jupyter server on a compute node. This package is maintained by an NREL HPC user group and was originally written for use with Eagle, but now supports Kestrel.

+

Auto-launching on Kestrel with an sbatch Script#

+

There are scripts written for launching a Jupyter session inside of a slurm job.

+

Full directions and scripts included in the Jupyter repo.

+
+Standard Jupyter session launch with full CPU request +

Download sbatch_jupyter.sh and auto_launch_jupyter.sh

+

Edit sbatch_jupyter.sh to change: +

--time=<time_request>
+--account=<project_handle>
+...
+...
+source activate /home/$USER/.conda-envs/<MY_ENVIRONMENT> # Replace <MY_ENVIRONMENT> with the name of your conda environment
+

+

Edit auto_launch_jupyter.sh to include your sbatch_jupyter script: +

RES=$(sbatch sbatch_jupyter.sh)
+

+

Run auto_launch_jupyter.sh and follow the directions that come up on your terminal window.

+
+
+Standard Jupyter session launch in the shared partition with partial CPU request +

Download shared_sbatch_jupyter.sh and auto_launch_jupyter.sh

+

Edit shared_sbatch_jupyter.sh to change: +

--time=<time_request>
+--account=<project_handle>
+...
+--cpus-per-task=<CPUs_request>
+--mem-per-cpu=<CPU_memory_request>                 # Default is 1G per core
+...
+...
+source activate /home/$USER/.conda-envs/<MY_ENVIRONMENT>  # Replace <MY_ENVIRONMENT> with the name of your conda environment
+

+

Edit auto_launch_jupyter.sh to include your sbatch_jupyter script: +

RES=$(sbatch shared_sbatch_jupyter.sh)
+

+

Run auto_launch_jupyter.sh and follow the directions that come up on your terminal window.

+
+
+Standard Jupyter session launch with GPU request +

Download gpu_sbatch_jupyter.sh and auto_launch_jupyter.sh

+

Edit gpu_sbatch_jupyter.sh to change: +

--time=<time_request>
+--account=<project_handle>
+...
+--cpus-per-task=<CPU_request>
+--gres=gpu:<GPU_request>
+
+export CUDA_VISIBLE_DEVICES=0  # if GPUs request is 1, then set =0
+                               # if GPUs request is 2, then set =0,1
+                               # if GPUs request is 3, then set =0,1,2
+                               # if GPUs request if 4, then set =0,1,2,3
+...
+...
+source activate /home/$USER/.conda-envs/<MY_ENVIRONMENT>  # Replace <MY_ENVIRONMENT> with the name of your conda environment
+

+

Edit auto_launch_jupyter.sh to include your sbatch_jupyter script: +

RES=$(sbatch gpu_sbatch_jupyter.sh)
+

+

Run auto_launch_jupyter.sh and follow the directions that come up on your terminal window.

+
+

Reasons to Not Run Jupyter Directly on a Login Node#

+

Data processing and visualization should be done via either KJHub or a compute node.

+

Login nodes are highly shared and limited resources. There will be competition for CPU, RAM, and network I/O for storage, and Arbiter2 software will automatically throttle moderate to heavy usage on login nodes, greatly slowing down your processing.

+

Custom Conda Environments and Jupyter Kernels#

+

On Kestrel, the module 'anaconda3' is available to run the conda command and manage your environments.

+

As an alternative, the module 'mamba' is available instead. Mamba is a conda-compatible environment manager with very similar usage. Most conda commands in this documentation may be used with mamba instead and they may generally be considered interchangeable.

+

Creating a Conda Environment#

+

To add your own packages to conda on Kestrel:

+

Create an environment and install the base jupyter packages. Then activate the environment and install other libraries that you want to use, e.g. scipy, numpy, and so on.

+

conda create -n myjupyter -c conda-forge jupyter ipykernel

+

source activate myjupyter

+

conda install -c conda-forge scipy numpy matplotlib

+

Add Custom iPykernel#

+

A kernel is what allows Jupyter to use your customized conda environment inside Jupyter, in a notebook. Use ipykernel to build your kernel. Inside your custom conda environment, run:

+

python -m ipykernel install --user --name=myjupyter

+

If you already have a Jupyter server running, restart it to load the new kernel.

+

The new kernel will appear in the drop-down as an option to open a new notebook.

+

You can have multiple kernels, allowing you to load different conda environments for your different projects into Notebooks.

+

Jupyter Kernel Management#

+

Use the kernelspec list command inside your Jupyter conda environment to see what ipykernels you have installed:

+

jupyter kernelspec list

+

To remove an old kernel, use the kernelspec remove command:

+

jupyter kernelspec remove myoldjupyter

+

Magic Commands#

+

Magic commands are "meta commands" that add extra functionality to Jupyter.

+

Magic commands begin with % or %%.

+

Example Magic Commands#

+
* %lsmagic - list all magic commands
+* %run _file.py_ - run an external python script
+* %%time - placed at top of cell, prints execution time
+* %who - list all defined variables in notebook
+
+
%lsmagic
+
+
Available line magics:
+%alias  %alias_magic  %autoawait  %autocall  %automagic  %autosave  %bookmark  %cat  %cd  %clear  %colors  %conda  %config  %connect_info  %cp  %debug  %dhist  %dirs  %doctest_mode  %ed  %edit  %env  %gui  %hist  %history  %killbgscripts  %ldir  %less  %lf  %lk  %ll  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %lx  %macro  %magic  %man  %matplotlib  %mkdir  %more  %mv  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %pip  %popd  %pprint  %precision  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %rep  %rerun  %reset  %reset_selective  %rm  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode
+
+Available cell magics:
+%%!  %%HTML  %%SVG  %%bash  %%capture  %%debug  %%file  %%html  %%javascript  %%js  %%latex  %%markdown  %%perl  %%prun  %%pypy  %%python  %%python2  %%python3  %%ruby  %%script  %%sh  %%svg  %%sx  %%system  %%time  %%timeit  %%writefile
+
+Automagic is ON, % prefix IS NOT needed for line magics.
+
+

Shell Commands#

+

You can also run shell commands inside a cell. For example:

+

!conda list - see the packages installed in the environment you're using

+
!pwd
+!ls
+
+
/home/username/jup
+auto_launch_jupyter.sh    Jupyter Presentation.ipynb  slurm-6445885.out
+geojsondemo.ipynb         old                         sshot1.png
+Interesting Graphs.ipynb  sbatch_jupyter.sh           sshot2.png
+jup-logo.png              slurm
+
+

Interesting/Useful Notebooks, Extensions, and Learning Resources#

+

Awesome Jupyter

+

Awesome Jupyterlab

+

Plotting with matplotlib

+

Python for Data Science

+

Numerical Computing in Python

+

The Sound of Hydrogen

+

Plotting Pitfalls

+

GeoJSON Extension

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Fortran/f90_advanced/index.html b/Documentation/Development/Languages/Fortran/f90_advanced/index.html new file mode 100644 index 000000000..81c157aca --- /dev/null +++ b/Documentation/Development/Languages/Fortran/f90_advanced/index.html @@ -0,0 +1,9702 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Fortran 90 for Fortran 77 programmers - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Advanced Fortran 90#

+

This document is derived from an HTML page written at the San Diego Supercomper Center many years ago. Its purpose is to Introduce Fortran 90 concepts to Fortran 77 programers. It does this by presenting an example program and introducing concepts as various routines of the program are presented. The original web page has been used over the years and has been translated into several languages.

+

Note: See our Fortran Overview page for basic getting started instructions and compiler/toolchain information.

+
+
+ + +

Format for our presentation#

+
    +
  • We will "develop" an application
      +
    • Incorporate f90 features
    • +
    • Show source code
    • +
    • Explain what and why as we do it
    • +
    +
  • +
  • Application is a genetic algorithm
      +
    • Easy to understand and program
    • +
    • Offers rich opportunities for enhancement
    • +
    +
  • +
  • We also provide an summary of F90 syntax, key words, operators, constants, and functions
  • +
+
+
+

What was in mind of the language writers? What were they thinking?#

+
    +
  • Enable portable codes
      +
    • Same precision
    • +
    • Include many common extensions
    • +
    +
  • +
  • More reliable programs
  • +
  • Getting away from underlying hardware
  • +
  • Move toward parallel programming
  • +
  • Run old programs
  • +
  • Ease of programming
      +
    • Writing
    • +
    • Maintaining
    • +
    • Understanding
    • +
    • Reading
    • +
    +
  • +
  • Recover C and C++ users
  • +
+
+
+

Why Fortran?#

+

Famous Quote: "I don't know what the technical characteristics of + the standard language for scientific and engineering + computation in the year 2000 will be... but I know it + will be called Fortran." John Backus.

+

Note: He claimed that he never said this.

+
    +
  • Language of choice for Scientific programming
  • +
  • Large installed user base.
  • +
  • Fortran 90 has most of the features of C . . . and then some
  • +
  • The compilers produce better programs
  • +
+
+
+

Justification of topics#

+
    +
  • Enhance performance
  • +
  • Enhance portability
  • +
  • Enhance reliability
  • +
  • Enhance maintainability
  • +
+
+
+

Classification of topics#

+
    +
  • New useful features
  • +
  • Old tricks
  • +
  • Power features
  • +
  • Overview of F90
  • +
+

What is a Genetic Algorithm#

+
    +
  • A "suboptimization" system
      +
    • Find good, but maybe not optimal, solutions to difficult problems
    • +
    • Often used on NP-Hard or combinatorial optimization problems
    • +
    +
  • +
  • Requirements
      +
    • Solution(s) to the problem represented as a string
    • +
    • A fitness function
        +
      • Takes as input the solution string
      • +
      • Output the desirability of the solution
      • +
      +
    • +
    • A method of combining solution strings to generate new solutions
    • +
    +
  • +
  • Find solutions to problems by Darwinian evolution
      +
    • Potential solutions ar though of as living entities in a population
    • +
    • The strings are the genetic codes for the individuals
    • +
    • Fittest individuals are allowed to survive to reproduce
    • +
    +
  • +
+
+
+

Simple algorithm for a GA#

+
    +
  • Generate a initial population, a collection of strings
  • +
  • do for some time
      +
    • evaluate each individual (string) of the population using the fitness function
    • +
    • sort the population with fittest coming to the top
    • +
    • allow the fittest individuals to "sexually" reproduce replacing the old + population
    • +
    • allow for mutation
    • +
    +
  • +
  • end do
  • +
+
+
+

Our example problem#

+
    +
  • Instance:Given a map of the N states or countries and a fixed number of colors
  • +
  • Find a coloring of the map, if it exists, such that no two states that share a boarder have the same color
  • +
  • Notes + - In general, for a fixed number of colors and an arbitrary map the only + known way to find if there is a valid coloring is a brute force search + with the number of combinations = (NUMBER_OF_COLORS)**(NSTATES)
      +
    • The strings of our population are integer vectors represent the coloring
    • +
    • Our fitness function returns the number of boarder violations
    • +
    • The GA searches for a mapping with few, hopefully 0 violations
    • +
    • This problem is related to several important NP_HARD problems in computer science
        +
      • Processor scheduling
      • +
      • Communication and grid allocation for parallel computing
      • +
      • Routing
      • +
      +
    • +
    +
  • +
+
+
+

Start of real Fortran 90 discussion

+
+
+

Comparing a FORTRAN 77 routine to a Fortran 90 routine#

+
    +
  • The routine is one of the random number generators from: Numerical Recipes, The Art of Scientific Computing. Press, Teukolsky, Vetterling and Flannery. Cambridge University Press 1986.
  • +
  • Changes
      +
    • correct bugs
    • +
    • increase functionality
    • +
    • aid portability
    • +
    +
  • +
+

Original#

+
    function ran1(idum)
+        real ran1
+        integer idum
+        real r(97)
+        parameter ( m1=259200,ia1=7141,ic1=54773)
+        parameter ( m2=134456,ia2=8121,ic2=28411)
+        parameter ( m3=243000,ia3=4561,ic3=51349)
+        integer j
+        integer iff,ix1,ix2,ix3
+        data iff /0/
+        if (idum.lt.0.or.iff.eq.0)then
+            rm1=1.0/m1
+            rm2=1.0/m2
+            iff=1
+            ix1=mod(ic1-idum,m1)
+            ix1=mod(ia1*ix1+ic1,m1)
+            ix2=mod(ix1,m2)
+            ix1=mod(ia1*ix1+ic1,m1)
+            ix3=mod(ix1,m3)
+            do 11 j=1,97
+                ix1=mod(ia1*ix1+ic1,m1)
+                ix2=mod(ia2*ix2+ic2,m2)
+                r(j)=(real(ix1)+real(ix2)*rm2)*rm1
+ 11           continue
+            idum=1
+        endif
+        ix1=mod(ia1*ix1+ic1,m1)
+        ix2=mod(ia2*ix2+ic2,m2)
+        ix3=mod(ia3*ix3+ic3,m3)
+        j=1+(97*ix3)/m3
+        if(j.gt.97.or.j.lt.1)then
+            write(*,*)' error in ran1 j=',j
+            stop
+        endif
+        ran1=r(j)
+        r(j)=(real(ix1)+real(ix2)*rm2)*rm1
+        return
+     end 
+
+

Fortran 90#

+
module ran_mod
+contains
+     function ran1(idum)
+        use numz
+        implicit none  !note after use statement
+        real (b8) ran1
+        integer , intent(inout), optional ::  idum
+        real (b8) r(97),rm1,rm2
+        integer , parameter :: m1=259200,ia1=7141,ic1=54773
+        integer , parameter :: m2=134456,ia2=8121,ic2=28411
+        integer , parameter :: m3=243000,ia3=4561,ic3=51349
+        integer j
+        integer iff,ix1,ix2,ix3
+        data iff /0/
+        save ! corrects a bug in the original routine
+        if(present(idum))then
+          if (idum.lt.0.or.iff.eq.0)then
+            rm1=1.0_b8 m1
+            rm2=1.0_b8 m2
+            iff=1
+            ix1=mod(ic1-idum,m1)
+            ix1=mod(ia1*ix1+ic1,m1)
+            ix2=mod(ix1,m2)
+            ix1=mod(ia1*ix1+ic1,m1)
+            ix3=mod(ix1,m3)
+            do j=1,97
+                ix1=mod(ia1*ix1+ic1,m1)
+                ix2=mod(ia2*ix2+ic2,m2)
+                r(j)=(real(ix1,b8)+real(ix2,b8)*rm2)*rm1
+            enddo
+            idum=1
+          endif
+        endif
+        ix1=mod(ia1*ix1+ic1,m1)
+        ix2=mod(ia2*ix2+ic2,m2)
+        ix3=mod(ia3*ix3+ic3,m3)
+        j=1+(97*ix3)/m3
+        if(j.gt.97.or.j.lt.1)then
+            write(*,*)' error in ran1 j=',j
+            stop
+        endif
+        ran1=r(j)
+        r(j)=(real(ix1,b8)+real(ix2,b8)*rm2)*rm1
+        return
+     end function ran1
+
+

Comments#

+
    +
  1. Modules are a way of encapsulating functions an data. More below.
  2. +
  3. The use numz line is similar to an include file. In this case it defines our real data type.
  4. +
  5. real (b8) is a new way to specify percision for data types in a portable way.
  6. +
  7. integer , intent(inout), optional :: idum we are saying idum is an optional input parameter
  8. +
  9. integer , parameter :: just a different syntax
  10. +
  11. The save statement is needed for program correctness
  12. +
  13. present(idum) is a function to determine if ran1 was called with the optional parameter
  14. +
+
+
+

Obsolescent features#

+

The following are available in Fortran 90. On the other hand, the concept of "obsolescence" is introduced. This means that some constructs may be removed in the future.

+
    +
  • Arithmetic IF-statement
  • +
  • Control variables in a DO-loop which are floating point or double-precision floating-point
  • +
  • Terminating several DO-loops on the same statement
  • +
  • Terminating the DO-loop in some other way than with CONTINUE or END DO
  • +
  • Alternate return
  • +
  • Jump to END IF from an outer block
  • +
  • PAUSE
  • +
  • ASSIGN and assigned GOTO and assigned FORMAT , that is the whole "statement number variable" concept.
  • +
  • Hollerith editing in FORMAT.
  • +
+
+
+ +

Summary#

+
    +
  • ! now indicates the start of a comment
  • +
  • & indicates the next line is a continuation
  • +
  • Lines can be longer than 72 characters
  • +
  • Statements can start in any column
  • +
  • Use ; to put multiple statements on one line
  • +
  • New forms for the do loop
  • +
  • Many functions are generic
  • +
  • 32 character names
  • +
  • Many new array assignment techniques
  • +
+

Features#

+
    +
  • Flexibility can aid in program readability
  • +
  • Readability decreases errors
  • +
  • Got ya!
      +
    • Can no longer use C to start a comment
    • +
    • Character in column 5 no longer is continue
    • +
    • Tab is not a valid character (may produce a warning)
    • +
    • Characters past 72 now count
    • +
    +
  • +
+
program darwin
+     real a(10), b(10), c(10), d(10), e(10), x, y
+     integer odd(5),even(5)
+! this line is continued by using "&"
+     write(*,*)"starting ",&  
+                "darwin" ! this line in a continued from above
+! multiple statement per line --rarely a good idea
+     x=1; y=2; write(*,*)x,y  
+     do i=1,10    ! statement lable is not required for do
+        e(i)=i
+     enddo
+     odd= (/ 1,3,5,7,9 /)  ! array assignment
+     even=(/ 2,4,6,8,10 /) ! array assignment
+     a=1          ! array assignment, every element of a = 1
+     b=2
+     c=a+b+e      ! element by element assignment
+     c(odd)=c(even)-1  ! can use arrays of indices on both sides
+     d=sin(c)     ! element by element application of intrinsics
+     write(*,*)d
+     write(*,*)abs(d)  ! many intrinsic functions are generic
+ a_do_loop : do i=1,10
+               write(*,*)i,c(i),d(i)
+             enddo a_do_loop
+     do
+        if(c(10) .lt. 0.0 ) exit
+        c(10)=c(10)-1
+     enddo
+     write(*,*)c(10)
+     do while (c(9) .gt. 0)
+        c(9)=c(9)-1
+     enddo
+     write(*,*)c(9)
+end program
+
+
+
+

New data declaration method#

+
    +
  • +

    Motivation

    +
      +
    • Variables can now have attributes such as + - Parameter + - Save + - Dimension
    • +
    • Attributes are assigned in the variable declaration statement
    • +
    +
  • +
  • +

    One variable can have several attributes

    +
  • +
  • Requires Fortran 90 to have a new statement form
  • +
+

integer,parameter :: in2 = 14
+    real, parameter :: pi = 3.141592653589793239
+    real, save, dimension(10) :: cpu_times,wall_times
+!****    the old way of doing the same    ****!
+!****    real cpu_times(10),wall_times(10) ****!
+!****    save cpu_times, wall_times        ****!
+
+- Other Attributes + - allocatable + - public + - private + - target + - pointer + - intent + - optional

+
+
+

Kind facility#

+
    +
  • Motivation
      +
    • Assume we have a program that we want to run on two different machines
    • +
    • We want the same representation of reals on both machines (same number + of significant digits)
    • +
    • Problem: different machines have different representations for reals
    • +
    +
  • +
+

Digits of precision for some (old) machines and data type#

+ + + + + + + + + + + + + + + + + + + + + + + + + +
MachineRealDouble Precision
IBM (SP)615
Cray (T90)1533
Cray (T3E)1515
+

* or *#

+
    +
  • We may want to run with at least 6 digits today and at least 14 digits tomorrow
  • +
  • Use the Select_Real_Kind(P) function to create a data type with P digits of precision
  • +
+
program darwin
+! e has at least 4 significant digits
+  real(selected_real_kind(4))e
+! b8 will be used to define reals with 14 digits
+  integer, parameter:: b8 = selected_real_kind(14)
+  real(b8), parameter :: pi = 3.141592653589793239_b8 ! note usage of _b8
+! with  a constant
+! to force precision
+ e= 2.71828182845904523536
+  write(*,*)"starting ",&  ! this line is continued by using "&"
+            "darwin"       ! this line in a continued from above
+  write(*,*)"pi has ",precision(pi)," digits precision ",pi
+  write(*,*)"e has   ",precision(e)," digits precision ",e
+end program
+
+

Example output#

+
  sp001  % darwin
+ starting darwin
+ pi has  15  digits precision  3.14159265358979312
+ e has    6  digits precision  2.718281746
+sp001 %
+
+
    +
  • Can convert to/from given precision for all variables created using "b8" by changing definition of "b8"
  • +
  • Use the Select_Real_Kind(P,R) function to create a data type with P digits of precision and exponent range of R
  • +
+
+
+

Modules#

+
    +
  • +

    Motivation:

    +
      +
    • Common block usage is prone to error
    • +
    • Provide most of capability of common blocks but safer
    • +
    • Provide capabilities beyond common blocks
    • +
    +
  • +
  • +

    Modules can contain:

    +
      +
    • Data definitions
    • +
    • Data to be shared much like using a labeled common
    • +
    • Functions and subroutines
    • +
    • Interfaces (more on this later)
    • +
    +
  • +
  • +

    You "include" a module with a "use" statement

    +
  • +
+
module numz
+  integer,parameter:: b8 = selected_real_kind(14)
+  real(b8),parameter :: pi = 3.141592653589793239_b8
+  integergene_size
+end module
+ program darwin
+    use numz
+    implicit none    ! now part of the standard, put it after the use statements
+   write(*,*)"pi has ",precision(pi),"
+digits precision ",pi
+   call set_size()
+   write(*,*)"gene_size=",gene_size
+ end program
+subroutine set_size
+  use numz
+  gene_size=10
+end subroutine
+
+

An example run#

+
  pi has  15  digits precision  3.14159265358979312
+  gene_size=10
+
+
+
+

Module functions and subroutines#

+
    +
  • +

    Motivation:

    +
      +
    • Encapsulate related functions and subroutines
    • +
    • Can "USE" these functions in a program or subroutine
    • +
    • Can be provided as a library
    • +
    • Only routines that contain the use statement can see the routines
    • +
    +
  • +
  • +

    Example is a random number package: +

    module ran_mod
    +! module contains three functions
    +! ran1 returns a uniform random number between 0-1
    +! spread returns random number between min - max
    +! normal returns a normal distribution
    +contains
    +    function ran1()  !returns random number between 0 - 1
    +        use numz
    +        implicit none
    +        real(b8) ran1,x
    +        call random_number(x) ! built in fortran 90 random number function
    +        ran1=x
    +    end function ran1
    +    function spread(min,max)  !returns random # between min/max
    +        use numz
    +        implicit none
    +        real(b8) spread
    +        real(b8) min,max
    +        spread=(max - min) * ran1() + min
    +    end function spread
    +    function normal(mean,sigma) !returns a normal distribution
    +        use numz
    +        implicit none
    +        real(b8) normal,tmp
    +        real(b8) mean,sigma
    +        integer flag
    +        real(b8) fac,gsave,rsq,r1,r2
    +        save flag,gsave
    +        data flag /0/
    +        if (flag.eq.0) then
    +        rsq=2.0_b8
    +            do while(rsq.ge.1.0_b8.or.rsq.eq.0.0_b8) ! new from for do
    +                r1=2.0_b8*ran1()-1.0_b8
    +                r2=2.0_b8*ran1()-1.0_b8
    +                rsq=r1*r1+r2*r2
    +            enddo
    +            fac=sqrt(-2.0_b8*log(rsq)/rsq)
    +            gsave=r1*fac
    +            tmp=r2*fac
    +            flag=1
    +        else
    +            tmp=gsave
    +            flag=0
    +        endif
    +        normal=tmp*sigma+mean
    +        return
    +    end function normal end module ran_mod
    +

    +
  • +
+
+
+

Exersize 1: Write a program that returns 10 uniform random numbers.

+
+
+

Allocatable arrays (the basics)#

+
    +
  • +

    Motivation:

    +
      +
    • At compile time we may not know the size an array needs to be
    • +
    • We may want to change problem size without recompiling
    • +
    +
  • +
  • +

    Allocatable arrays allow us to set the size at run time

    +
  • +
  • We set the size of the array using the allocate statement
  • +
  • We may want to change the lower bound for an array
  • +
  • A simple example:
  • +
+
module numz
+  integer, parameter:: b8 = selected_real_kind(14)
+  integer gene_size,num_genes
+  integer,allocatable :: a_gene(:),many_genes(:,:)
+end module
+program darwin
+    use numz
+    implicit none
+    integer ierr
+    call set_size()
+    allocate(a_gene(gene_size),stat=ierr) !stat= allows for an error code return
+    if(ierr /= 0)write(*,*)"allocation error"  ! /= is .ne.
+    allocate(many_genes(gene_size,num_genes),stat=ierr)  !2d array
+    if(ierr /= 0)write(*,*)"allocation error"
+    write(*,*)lbound(a_gene),ubound(a_gene) ! get lower and upper bound
+                                            ! for the array
+    write(*,*)size(many_genes),size(many_genes,1) !get total size and size
+                                                  !along 1st dimension
+    deallocate(many_genes) ! free the space for the array and matrix
+    deallocate(a_gene)
+    allocate(a_gene(0:gene_size)) ! now allocate starting at 0 instead of 1
+    write(*,*)allocated(many_genes),allocated(a_gene) ! shows if allocated
+    write(*,*)lbound(a_gene),ubound(a_gene)
+end program
+  subroutine set_size
+    use numz
+    write(*,*)'enter gene size:'
+    read(*,*)gene_size
+    write(*,*)'enter number of genes:'
+    read(*,*)num_genes
+end subroutine set_size
+
+

Example run#

+
    enter gene size:
+10
+ enter number of genes:
+20
+           1          10
+         200          10
+ F T
+           0          10
+
+

Passing arrays to subroutines#

+
    +
  • There are several ways to specify arrays for subroutines
      +
    • Explicit shape
        +
      • integer, dimension(8,8)::an_explicit_shape_array
      • +
      +
    • +
    • Assumed size
        +
      • integer, dimension(i,*)::an_assumed_size_array
      • +
      +
    • +
    • Assumed Shape
        +
      • integer, dimension(:,:)::an_assumed_shape_array
      • +
      +
    • +
    +
  • +
+

Example#

+
subroutine arrays(an_explicit_shape_array,&
+                  i                      ,& !note we pass all bounds except the last
+                  an_assumed_size_array  ,&
+                  an_assumed_shape_array)
+! Explicit shape
+    integer, dimension(8,8)::an_explicit_shape_array
+! Assumed size
+    integer, dimension(i,*)::an_assumed_size_array
+! Assumed Shape
+    integer, dimension(:,:)::an_assumed_shape_array
+    write(*,*)sum(an_explicit_shape_array)
+    write(*,*)lbound(an_assumed_size_array) ! why does sum not work here?
+    write(*,*)sum(an_assumed_shape_array)
+end subroutine
+
+
+
+

Interface for passing arrays#

+
    +
  • !!!!Warning!!!! When passing assumed shape arrays as arguments you must provide an interface
  • +
  • Similar to C prototypes but much more versatile
  • +
  • The interface is a copy of the invocation line and the argument definitions
  • +
  • Modules are a good place for interfaces
  • +
  • If a procedure is part of a "contains" section in a module an interface + is not required
  • +
  • !!!!Warning!!!! The compiler may not tell you that you need an interface +
    module numz
    +    integer, parameter:: b8 = selected_real_kind(14)
    +    integer,allocatable :: a_gene(:),many_genes(:,:)
    +end module module face
    +    interface fitness
    +        function fitness(vector)
    +        use numz
    +        implicit none
    +        real(b8) fitness
    +        integer, dimension(:) ::  vector
    +        end function fitness
    +    end interface
    +end module program darwin
    +    use numz
    +    use face
    +    implicit none
    +    integer i
    +    integer vect(10) ! just a regular array
    +    allocate(a_gene(10));allocate(many_genes(3,10))
    +    a_gene=1  !sets every element of a_gene to 1
    +    write(*,*)fitness(a_gene)
    +    vect=8
    +    write(*,*)fitness(vect) ! also works with regular arrays
    +    many_genes=3  !sets every element to 3
    +    many_genes(1,:)=a_gene  !sets column 1 to a_gene
    +    many_genes(2,:)=2*many_genes(1,:)
    +    do i=1,3
    +        write(*,*)fitness(many_genes(i,:))
    +    enddo
    +    write(*,*)fitness(many_genes(:,1))  !go along other dimension
    +!!!!write(*,*)fitness(many_genes)!!!!does not work
    +end program
    +function fitness(vector)
    +    use numz
    +    implicit none
    +    real(b8) fitness
    +    integer, dimension(:)::  vector ! must match interface
    +    fitness=sum(vector)
    +end function
    +
  • +
+
+
+

Exersize 2: Run this program using the "does not work line". +Why? Using intrinsic functions make it work?

+

Exersize 3: Prove that f90 does not "pass by address".

+
+
+

Optional arguments and intent#

+
    +
  • Motivation:
      +
    • We may have a function or subroutine that we may not want to always pass + all arguments
    • +
    • Initialization
    • +
    +
  • +
  • Two examples
      +
    • Seeding the intrinsic random number generator requires keyword arguments
    • +
    • To define an optional argument in our own function we use the optional + attribute
    • +
    +
  • +
+
integer :: my_seed
+
+

becomes#

+
integer, optional :: my_seed
+
+

Used like this:

+
! ran1 returns a uniform random number between 0-1
+! the seed is optional and used to reset the generator
+contains
+   function ran1(my_seed)
+      use numz
+      implicit none
+      real(b8) ran1,r
+      integer, optional ,intent(in) :: my_seed  ! optional argument not changed in the routine
+      integer,allocatable :: seed(:)
+      integer the_size,j
+      if(present(my_seed))then            ! use the seed if present
+          call random_seed(size=the_size) ! how big is the intrisic seed?
+          allocate(seed(the_size))        ! allocate space for seed
+          do j=1,the_size                 ! create the seed
+             seed(j)=abs(my_seed)+(j-1)   ! abs is generic
+          enddo
+          call random_seed(put=seed)      ! assign the seed
+          deallocate(seed)                ! deallocate space
+      endif
+      call random_number(r)
+      ran1=r
+  end function ran1
+end module program darwin
+    use numz
+    use ran_mod          ! interface required if we have
+                         ! optional or intent arguments
+    real(b8) x,y
+    x=ran1(my_seed=12345) ! we can specify the name of the argument
+    y=ran1()
+    write(*,*)x,y
+    x=ran1(12345)         ! with only one optional argument we don't need to
+    y=ran1()
+    write(*,*)x,y
+end program
+
+
    +
  • Intent is a hint to the compiler to enable optimization
      +
    • intent(in)
        +
      • We will not change this value in our subroutine
      • +
      +
    • +
    • intent(out)
        +
      • We will define this value in our routine
      • +
      +
    • +
    • intent(inout)
        +
      • The normal situation
      • +
      +
    • +
    +
  • +
+
+
+

Derived data types#

+
    +
  • +

    Motivation:

    +
      +
    • Derived data types can be used to group different types of data together + (integers, reals, character, complex)
    • +
    • Can not be done in F77 although people have "faked" it
    • +
    +
  • +
  • +

    Example

    +
      +
    • In our GA we define a collection of genes as a 2d array
    • +
    • We call the fitness function for every member of the collection
    • +
    • We want to sort the collection of genes based on result of fitness function
    • +
    • Define a data type that holds the fitness value and an index into the 2d + array
    • +
    • Create an array of this data type, 1 for each member of the collection
    • +
    • Call fitness function with the result being placed into the new data type + along with a pointer into the array
    • +
    +
  • +
  • Again modules are a good place for data type definitions
  • +
+
module galapagos
+    use numz
+    type thefit !the name of the type
+      sequence  ! sequence forces the data elements
+                ! to be next to each other in memory
+                ! where might this be useful?
+      real(b8) val   ! our result from the fitness function
+      integer index  ! the index into our collection of genes
+    end type thefit
+end module
+
+
+
+

Using defined types#

+
    +
  • Use the % to reference various components of the derived data type +
    program darwin
    +    use numz
    +    use galapagos ! the module that contains the type definition
    +    use face      ! contains various interfaces
    + implicit none
    +! define an allocatable array of the data type
    +! than contains an index and a real value
    +    type (thefit),allocatable ,target  :: results(:)
    +! create a single instance of the data type
    +    type (thefit) best
    +    integer,allocatable :: genes(:,:) ! our genes for the genetic algorithm
    +    integer j
    +    integer num_genes,gene_size
    +    num_genes=10
    +    gene_size=10
    +    allocate(results(num_genes))         ! allocate the data type
    +                                         ! to hold fitness and index
    +    allocate(genes(num_genes,gene_size)) ! allocate our collection of genes
    +    call init_genes(genes)               ! starting data
    +    write(*,'("input")' ) ! we can put format in write statement
    +    do j=1,num_genes
    +       results(j)%index =j
    +       results(j)%val =fitness(genes(j,:)) ! just a dummy routine for now
    +       write(*,"(f10.8,i4)")results(j)%val,results(j)%index
    +    enddo
    +end program
    +
  • +
+
+
+

User defined operators#

+
    +
  • +

    Motivation

    +
      +
    • With derived data types we may want (need) to define operations
    • +
    • (Assignment is predefined)
    • +
    +
  • +
  • +

    Example:

    +
      +
    • .lt. .gt. == not defined for our data types + - We want to find the minimum of our fitness values so we need < operator + - In our sort routine we want to do <, >, == + - In C++ terms the operators are overloaded
    • +
    • We are free to define new operators
    • +
    +
  • +
  • +

    Two step process to define operators

    +
      +
    • Define a special interface
    • +
    • Define the function that performs the operation +
      module sort_mod
      +!defining the interfaces
      +  interface operator (.lt.)  ! overloads standard .lt.
      +    module procedure theless ! the function that does it
      +  end interface   interface operator (.gt.)   ! overloads standard .gt.
      +    module procedure thegreat ! the function that does it
      +  end interface   interface operator (.ge.)  ! overloads standard .ge.
      +    module procedure thetest ! the function that does it
      +  end interface   interface operator (.converged.)  ! new operator
      +    module procedure index_test     ! the function that does it
      +  end interface
      +  contains      ! our module will contain
      +              ! the required functions
      +    function theless(a,b) ! overloads .lt. for the type (thefit)
      +    use galapagos
      +    implicit none
      +    type(thefit), intent (in) :: a,b
      +    logical theless           ! what we return
      +    if(a%val .lt. b%val)then     ! this is where we do the test
      +        theless=.true.
      +    else
      +        theless=.false.
      +    endif
      +    return
      +  end function theless   function thegreat(a,b) ! overloads .gt. for the type (thefit)
      +    use galapagos
      +    implicit none
      +    type(thefit), intent (in) :: a,b
      +    logical thegreat
      +    if(a%val .gt. b%val)then
      +        thegreat=.true.
      +    else
      +        thegreat=.false.
      +    endif
      +    return
      +  end function thegreat
      +  function thetest(a,b)   ! overloads .gt.= for the type (thefit)
      +    use galapagos
      +    implicit none
      +    type(thefit), intent (in) :: a,b
      +    logical thetest
      +    if(a%val >= b%val)then
      +        thetest=.true.
      +    else
      +        thetest=.false.
      +    endif
      +    return
      +end function thetest
      +  function index_test(a,b) ! defines a new operation for the type (thefit)
      +    use galapagos
      +    implicit none
      +    type(thefit), intent (in) :: a,b
      +    logical index_test
      +    if(a%index .gt. b%index)then   ! check the index value for a difference
      +        index_test=.true.
      +    else
      +        index_test=.false.
      +    endif
      +    return
      +end function index_test
      +
    • +
    +
  • +
+
+
+

Recursive functions introduction#

+
    +
  • +

    Notes

    +
      +
    • Recursive function is one that calls itself
    • +
    • Anything that can be done with a do loop can be done using a recursive + function
    • +
    +
  • +
  • +

    Motivation

    +
      +
    • Sometimes it is easier to think recursively
    • +
    • Divide an conquer algorithms are recursive by nature + - Fast FFTs + - Searching + - Sorting
    • +
    +
  • +
+

Algorithm of searching for minimum of an array#

+
    function findmin(array)
+        is size of array 1?
+           min in the array is first element
+        else
+           find minimum in left half of array using findmin function
+           find minimum in right half of array using findmin function
+           global minimum is min of left and right half
+    end function
+
+
+
+

Fortran 90 recursive functions#

+
    +
  • Recursive functions should have an interface
  • +
  • The result and recursive keywords are required as part of the function definition
  • +
  • Example is a function finds the minimum value for an array
  • +
+
recursive function realmin(ain) result (themin)
+! recursive and result are required for recursive functions
+    use numz
+    implicit none
+    real(b8) themin,t1,t2
+    integer n,right
+    real(b8) ,dimension(:) :: ain
+    n=size(ain)
+    if(n == 1)then
+       themin=ain(1) ! if the size is 1 return value
+    return
+    else
+      right=n/2
+      t1=realmin(ain(1:right))   ! find min in left half
+      t2=realmin(ain(right+1:n)) ! find min in right half
+      themin=min(t1,t2)          ! find min of the two sides
+     endif
+end function
+
+
    +
  • Example 2 is the same except the input data is our derived data type
  • +
+
!this routine works with the data structure thefit not reals
+recursive function typemin(ain) result (themin)
+    use numz
+ use sort_mod
+ use galapagos
+ implicit none
+ real(b8) themin,t1,t2
+ integer n,right
+    type (thefit) ,dimension(:) :: ain ! this line is different
+ n=size(ain)
+ if(n == 1)then
+     themin=ain(1)%val  ! this line is different
+  return
+ else
+  right=n/2
+  t1=typemin(ain(1:right))
+  t2=typemin(ain(right+1:n))
+  themin=min(t1,t2)
+ endif
+end function
+
+
+
+

Pointers#

+
    +
  • +

    Motivation

    +
      +
    • Can increase performance
    • +
    • Can improve readability
    • +
    • Required for some derived data types (linked lists and trees)
    • +
    • Useful for allocating "arrays" within subroutines
    • +
    • Useful for referencing sections of arrays
    • +
    +
  • +
  • +

    Notes

    +
      +
    • Pointers can be thought of as an alias to another variable
    • +
    • In some cases can be used in place of an array
    • +
    • To assign a pointer use => instead of just =
    • +
    • Unlike C and C++, pointer arithmetic is not allowed
    • +
    +
  • +
  • +

    First pointer example

    +
      +
    • Similar to the last findmin routine
    • +
    • Return a pointer to the minimum
    • +
    +
  • +
+
recursive function pntmin(ain) result (themin) ! return a pointer
+ use numz
+ use galapagos
+ use sort_mod ! contains the .lt. operator for thefit type
+ implicit none
+ type (thefit),pointer:: themin,t1,t2
+ integer n,right
+    type (thefit) ,dimension(:),target :: ain
+ n=size(ain)
+ if(n == 1)then
+     themin=>ain(1) !this is how we do pointer assignment
+  return
+ else
+  right=n/2
+  t1=>pntmin(ain(1:right))
+  t2=>pntmin(ain(right+1:n))
+  if(t1 .lt. t2)then; themin=>t1; else; themin=>t2; endif
+ endif
+end function
+
+
+
+

Exercise 4: Carefully write a recursive N! program.

+
+
+

Function and subroutine overloading#

+
    +
  • +

    Motivation

    +
      +
    • Allows us to call functions or subroutine with the same name with different + argument types
    • +
    • Increases readability
    • +
    +
  • +
  • +

    Notes:

    +
      +
    • Similar in concept to operator overloading
    • +
    • Requires an interface
    • +
    • Syntax for subroutines is same as for functions
    • +
    • Many intrinsic functions have this capability + - abs (reals,complex,integer) + - sin,cos,tan,exp(reals, complex) + - array functions(reals, complex,integer)
    • +
    • Example + - Recall we had two functions that did the same thing but with different argument types
    • +
    +
  • +
+

         recursive function realmin(ain) result (themin)
+         real(b8) ,dimension(:) :: ain         recursive function typemin(ain) result (themin)
+         type (thefit) ,dimension(:) :: ain
+
+- We can define a generic interface for these two functions and call + them using the same name

+
! note we have two functions within the same interface
+! this is how we indicate function overloading
+! both functions are called "findmin" in the main program
+interface findmin
+! the first is called with an array of reals as input
+        recursive function realmin(ain) result (themin)
+          use numz
+       real(b8) themin
+          real(b8) ,dimension(:) :: ain
+        end function ! the second is called with a array of data structures as input
+     recursive function typemin(ain) result (themin)
+          use numz
+    use galapagos
+       real(b8) themin
+          type (thefit) ,dimension(:) :: ain
+     end function
+    end interface
+
+

Example usage#

+
program darwin
+    use numz
+    use ran_mod
+    use galapagos ! the module that contains the type definition
+    use face      ! contains various interfaces
+    use sort_mod  ! more about this later it
+                  ! contains our sorting routine
+      ! and a few other tricks
+    implicit none
+! create an allocatable array of the data type
+! than contains an index and a real value
+    type (thefit),allocatable ,target :: results(:)
+! create a single instance of the data type
+    type (thefit) best
+! pointers to our type
+    type (thefit) ,pointer :: worst,tmp
+    integer,allocatable :: genes(:,:) ! our genes for the ga
+    integer j
+    integer num_genes,gene_size
+    real(b8) x
+    real(b8),allocatable :: z(:)
+    real(b8),pointer :: xyz(:) ! we'll talk about this next
+    num_genes=10
+    gene_size=10
+    allocate(results(num_genes))         ! allocate the data type to
+    allocate(genes(num_genes,gene_size)) ! hold our collection of genes
+    call init_genes(genes)               ! starting data
+    write(*,'("input")')
+    do j=1,num_genes
+       results(j)%index=j
+       results(j)%val=fitness(genes(j,:)) ! just a dummy routine
+       write(*,"(f10.8,i4)")results(j)%val,results(j)%index
+    enddo     allocate(z(size(results)))
+    z=results(:)%val ! copy our results to a real array ! use a recursive subroutine operating on the real array
+    write(*,*)"the lowest fitness: ",findmin(z)
+! use a recursive subroutine operating on the data structure
+    write(*,*)"the lowest fitness: ",findmin(results)
+end program
+
+
+
+

Fortran Minval and Minloc routines#

+
    +
  • Fortran has routines for finding minimum and maximum values in arrays and + the locations
      +
    • minval
    • +
    • maxval
    • +
    • minloc (returns an array)
    • +
    • maxloc (returns an array)
    • +
    +
  • +
+
! we show two other methods of getting the minimum fitness
+! use the built in f90 routines  on a real array
+    write(*,*)"the lowest fitness: ",minval(z),minloc(z)
+
+
+
+

Pointer assignment#

+
    +
  • This is how we use the pointer function defined above
  • +
  • worst is a pointer to our data type
  • +
  • note the use of => +
    ! use a recursive subroutine operating on the data
    +! structure and returning a pointer to the result
    +    worst=>pntmin(results) ! note pointer assignment
    +! what will this line write?
    + write(*,*)"the lowest fitness: ",worst
    +
  • +
+
+
+

More pointer usage, association and nullify#

+
    +
  • +

    Motivation

    +
      +
    • Need to find if pointers point to anything
    • +
    • Need to find if two pointers point to the same thing
    • +
    • Need to deallocate and nullify when they are no longer used
    • +
    +
  • +
  • +

    Usage

    +
      +
    • We can use associated() to tell if a pointer has been set
    • +
    • We can use associated() to compare pointers
    • +
    • We use nullify to zero a pointer
    • +
    +
  • +
+
! This code will print "true" when we find a match,
+! that is the pointers point to the same object
+    do j=1,num_genes
+     tmp=>results(j)
+        write(*,"(f10.8,i4,l3)")results(j)%val,   &
+                                results(j)%index, &
+           associated(tmp,worst)
+    enddo
+    nullify(tmp)
+
+
    +
  • Notes:
      +
    • If a pointer is nullified the object to which it points is not deallocated.
    • +
    • In general, pointers as well as allocatable arrays become undefined on leaving a subroutine
    • +
    • This can cause a memory leak
    • +
    +
  • +
+
+
+

Pointer usage to reference an array without copying#

+
    +
  • Motivation
      +
    • Our sort routine calls a recursive sorting routine
    • +
    • It is messy and inefficient to pass the array to the recursive routine
    • +
    +
  • +
  • Solution
      +
    • We define a "global" pointer in a module
    • +
    • We point the pointer to our input array
    • +
    +
  • +
+
module Merge_mod_types
+    use galapagos
+    type(thefit),allocatable :: work(:) ! a "global" work array
+    type(thefit), pointer:: a_pntr(:)   ! this will be the pointer to our input array
+end module Merge_mod_types
+  subroutine Sort(ain, n)
+    use Merge_mod_types
+    implicit none
+    integer n
+    type(thefit), target:: ain(n)
+    allocate(work(n))
+    nullify(a_pntr)
+    a_pntr=>ain  ! we assign the pointer to our array
+                 ! in RecMergeSort we reference it just like an array
+    call RecMergeSort(1,n) ! very similar to the findmin functions
+    deallocate(work)
+    return
+end subroutine Sort
+
+
    +
  • In our main program sort is called like this: +
    ! our sort routine is also recursive but
    +! also shows a new usage for pointers
    +    call sort(results,num_genes)
    +    do j=1,num_genes
    +       write(*,"(f10.8,i4)")results(j)%val,   &
    +                            results(j)%index
    +    enddo
    +
  • +
+
+
+

Data assignment with structures#

+
! we can copy a whole structure
+! with a single assignment
+    best=results(1)
+    write(*,*)"best result ",best
+
+
+
+

Using the user defined operator#

+
! using the user defined operator to see if best is worst
+! recall that the operator .converged. checks to see if %index matches
+    worst=>pntmin(results)
+    write(*,*)"worst result ",worst
+    write(*,*)"converged=",(best .converged. worst)
+
+
+
+

Passing arrays with a given arbitrary lower bounds#

+
    +
  • +

    Motivation

    +
      +
    • +

      Default lower bound within a subroutine is 1

      +
    • +
    • +

      May want to use a different lower bound

      +
    • +
    +
  • +
+
    if(allocated(z))deallocate(z)
+    allocate(z(-10:10)) ! a 21 element array
+    do j=-10,10
+       z(j)=j
+    enddo ! pass z and its lower bound
+! in this routine we give the array a specific lower
+! bound and show how to use a pointer to reference
+! different parts of an array using different indices
+  call boink1(z,lbound(z,1)) ! why not just lbound(z) instead of lbound(z,1)?
+                             ! lbound(z) returns a rank 1 array
+     subroutine boink1(a,n)
+     use numz
+     implicit none
+     integer,intent(in) :: n
+     real(b8),dimension(n:):: a ! this is how we set lower bounds in a subroutine
+     write(*,*)lbound(a),ubound(a)
+   end subroutine
+
+

Warning: because we are using an assumed shape array we need an interface#

+

Using pointers to access sections of arrays#

+
    +
  • Motivation
      +
    • Can increase efficiency
    • +
    • Can increase readability
    • +
    +
  • +
+
call boink2(z,lbound(z,1))
+
+subroutine boink2(a,n)
+use numz
+implicit none
+integer,intent(in) :: n
+real(b8),dimension(n:),target:: a
+real(b8),dimension(:),pointer::b
+b=>a(n:) ! b(1) "points" to a(-10)
+write(*,*)"a(-10) =",a(-10),"b(1) =",b(1)
+b=>a(0:) ! b(1) "points" to a(0)
+write(*,*)"a(-6) =",a(-6),"b(-5) =",b(-5)
+end subroutine
+
+
+
+

Allocating an array inside a subroutine and passing it back#

+
    +
  • Motivation
      +
    • Size of arrays are calculated in the subroutine
    • +
    +
  • +
+
module numz
+    integer, parameter:: b8 = selected_real_kind(14)
+end module
+program bla
+   use numz
+   real(b8), dimension(:) ,pointer :: xyz
+   interface boink
+     subroutine boink(a)
+     use numz
+     implicit none
+     real(b8), dimension(:), pointer :: a
+     end subroutine
+   end interface
+   nullify(xyz) ! nullify sets a pointer to null
+   write(*,'(l5)')associated(xyz) ! is a pointer null, should be
+   call boink(xyz)
+   write(*,'(l5)',advance="no")associated(xyz)
+   if(associated(xyz))write(*,'(i5)')size(xyz)
+end program
+subroutine boink(a)
+    use numz
+    implicit none
+    real(b8),dimension(:),pointer:: a
+    if(associated(a))deallocate(a)
+    allocate(a(10))
+end subroutine
+
+

An example run#

+
     F
+     T
+10
+
+
+
+

Our fitness function#

+

Given a fixed number of colors, M, and a description of a map of a collection +of N states.

+

Find a coloring of the map such that no two states that share a boarder +have the same coloring.

+

Example input is a sorted list of 22 western states#

+
22
+ar ok tx la mo xx
+az ca nm ut nv xx
+ca az nv or xx
+co nm ut wy ne ks xx
+ia mo ne sd mn xx
+id wa or nv ut wy mt xx
+ks ne co ok mo xx
+la tx ar xx
+mn ia sd nd xx
+mo ar ok ks ne ia xx
+mt wy id nd xx
+nd mt sd wy xx
+ne sd wy co ks mo ia xx
+nm az co ok tx mn xx
+nv ca or id ut az xx
+ok ks nm tx ar mo xx
+or ca wa id xx
+sd nd wy ne ia mn xx
+tx ok nm la ar xx
+ut nv az co wy id xx
+wa id or mt xx
+wy co mt id ut nd sd ne xx
+
+

Our fitness function takes a potential coloring, that is, an integer +vector of length N and a returns the number of boarders that have states +of the same coloring

+
    +
  • How do we represent the map in memory?
      +
    • One way would be to use an array but it would be very sparse
    • +
    • Linked lists are often a better way
    • +
    +
  • +
+
+
+

Linked lists#

+
    +
  • +

    Motivation

    +
      +
    • We have a collection of states and for each state a list of adjoining states. (Do not count a boarder twice.)
    • +
    • Problem is that you do not know the length of the list until runtime.
    • +
    • +

      List of adjoining states will be different lengths for different states

      +
    • +
    • +

      Solution + - Linked list are a good way to handle such situations

      +
    • +
    • Linked lists use a derived data type with at least two components
        +
      • Data
      • +
      • Pointer to next element
      • +
      +
    • +
    +
  • +
+
module list_stuff
+type llist
+integer index ! data
+type(llist),pointer::next ! pointer to the
+! next element
+end type llist
+end module
+
+
+
+

Linked list usage#

+

One way to fill a linked list is to use a recursive function +`fortran +recursive subroutine insert (item, root) +use list_stuff +implicit none +type(llist), pointer :: root +integer item +if (.not. associated(root)) then +allocate(root) +nullify(root%next) +root%index = item +else +call insert(item,root%next) +endif +end subroutine +

- - -
+- - -
+
+## Our map representation
+- An array of the derived data type states
+            -   State is name of a state
+    -   Linked list containing boarders
+
+```fortran
+    type states
+        character(len=2)name
+        type(llist),pointer:: list
+    end type states
+
+- Notes: + - We have an array of linked lists + - This data structure is often used to represent sparse arrays + - We could have a linked list of linked lists + - State name is not really required

+
+
+

Date and time functions#

+
    +
  • +

    Motivation

    +
      +
    • +

      May want to know the date and time of your program

      +
    • +
    • +

      Two functions

      +
    • +
    +
  • +
+
! all arguments are optional
+call date_and_time(date=c_date, &  ! character(len=8) ccyymmdd
+                   time=c_time, &  ! character(len=10) hhmmss.sss
+                   zone=c_zone, &  ! character(len=10) +/-hhmm (time zone)
+                   values=ivalues) ! integer ivalues(8) all of the above
+           call system_clock(count=ic,           & ! count of system clock (clicks)
+                  count_rate=icr,     & ! clicks / second
+                  count_max=max_c)      ! max value for count
+
+
+
+

Non advancing and character IO#

+
    +
  • +

    Motivation

    +
      +
    • +

      We read the states using the two character identification

      +
    • +
    • +

      One line per state and do not know how many boarder states per line

      +
    • +
    +
  • +
  • +

    Note: Our list of states is presorted +

    character(len=2) a ! we have a character variable of length 2
    +read(12,*)nstates ! read the number of states
    +allocate(map(nstates)) ! and allocate our map
    +do i=1,nstates
    +    read(12,"(a2)",advance="no")map(i)%name ! read the name
    +    !write(*,*)"state:",map(i)%name
    +    nullify(map(i)%list) ! "zero out" our list
    +    do
    +        read(12,"(1x,a2)",advance="no")a ! read list of states
    +        ! without going to the
    +        ! next line
    +        if(lge(a,"xx") .and. lle(a,"xx"))then ! if state == xx
    +        backspace(12) ! go to the next line
    +        read(12,"(1x,a2)",end=1)a ! go to the next line
    +        exit
    +        endif
    +        1 continue
    +        if(llt(a,map(i)%name))then ! we only add a state to
    +        ! our list if its name
    +        ! is before ours thus we
    +        ! only count boarders 1 time
    +        ! what we want put into our linked list is an index
    +        ! into our map where we find the bordering state
    +        ! thus we do the search here
    +        ! any ideas on a better way of doing this search?
    +        found=-1
    +        do j=1,i-1
    +            if(lge(a,map(j)%name) .and. lle(a,map(j)%name))then
    +            !write(*,*)a
    +            found=j
    +            exit
    +            endif
    +        enddo
    +        if(found == -1)then
    +            write(*,*)"error"
    +            stop
    +        endif
    +        ! found the index of the boarding state insert it into our list
    +        ! note we do the insert into the linked list for a particular state
    +        call insert(found,map(i)%list)
    +        endif
    +    enddo
    +enddo
    +

    +
  • +
+
+
+

Internal IO#

+
    +
  • +

    Motivation

    +
      +
    • +

      May need to create strings on the fly

      +
    • +
    • +

      May need to convert from strings to reals and integers

      +
    • +
    • +

      Similar to sprintf and sscanf

      +
    • +
    +
  • +
  • +

    How it works

    +
      +
    • +

      Create a string

      +
    • +
    • +

      Do a normal write except write to the string instead of file number

      +
    • +
    +
  • +
  • +

    Example 1: creating a date and time stamped file name

    +
  • +
+
character (len=12)tmpstr
+
+write(tmpstr,"(a12)")(c_date(5:8)//c_time(1:4)//".dat") ! // does string concatination
+write(*,*)"name of file= ",tmpstr
+open(14,file=tmpstr)
+name of file= 03271114.dat
+
+
    +
  • Example 2: Creating a format statement at run time (array of integers and a real)
  • +
+

! test_vect is an array that we do not know its length until run time
+nstate=9 ! the size of the array
+write(fstr,'("(",i4,"i1,1x,f10.5)")')nstates
+write(*,*)"format= ",fstr
+write(*,fstr)test_vect,fstr
+format= ( 9i1,1x,f10.5)
+
+Any other ideas for writing an array when you do not know its length?

+
    +
  • Example 3: Reading from a string +
    integer ht,minut,sec
    +read(c_time,"(3i2)")hr,minut,sec
    +
  • +
+
+
+

Inquire function#

+
    +
  • Motivation
      +
    • Need to get information about I/O
    • +
    +
  • +
  • +

    Inquire statement has two forms

    +
      +
    • Information about files (23 different requests can be done)
    • +
    • Information about space required for binary output of a value
    • +
    +
  • +
  • +

    Example: find the size of your real relative to the "standard" real

    +
      +
    • Useful for inter language programming
    • +
    • Useful for determining data types in MPI (MPI_REAL or MPI_DOUBLE_PRECISION)
    • +
    +
  • +
+
inquire(iolength=len_real)1.0
+inquire(iolength=len_b8)1.0_b8
+write(*,*)"len_b8 ",len_b8
+write(*,*)"len_real",len_real
+iratio=len_b8/len_real
+select case (iratio)
+    case (1)
+      my_mpi_type=mpi_real
+    case(2)
+      my_mpi_type=mpi_double_precision
+    case default
+      write(*,*)"type undefined"
+      my_mpi_type=0
+end select
+
+

An example run#

+
len_b8 2
+len_real 1
+
+
+
+

Namelist#

+
    +
  • Now part of the standard
  • +
  • Motivation
      +
    • A convenient method of doing I/O
    • +
    • Good for cases where you have similar runs but change one or two variables
    • +
    • Good for formatted output
    • +
    +
  • +
  • +

    Notes:

    +
      +
    • A little flaky
    • +
    • No options for overloading format
    • +
    +
  • +
  • +

    Example: +

    integer ncolor
    +logical force
    +namelist /the_input/ncolor,force
    +ncolor=4
    +force=.true.
    +read(13,the_input)
    +write(*,the_input)
    +
    +On input: +
    & THE_INPUT NCOLOR=4,FORCE = F /
    +
    +Output is +
    &THE_INPUT
    +NCOLOR = 4,
    +FORCE = F
    +/
    +

    +
  • +
+
+
+

Vector valued functions#

+
    +
  • Motivation
      +
    • May want a function that returns a vector
    • +
    +
  • +
  • +

    Notes

    +
      +
    • Again requires an interface
    • +
    • Use explicit or assumed size array
    • +
    • Do not return a pointer to a vector unless you really want a pointer
    • +
    +
  • +
  • +

    Example:

    +
      +
    • Take an integer input vector which represents an integer in some base and + add 1
    • +
    • Could be used in our program to find a "brute force" solution
    • +
    +
  • +
+
  function add1(vector,max) result (rtn)
+  integer, dimension(:),intent(in) ::  vector
+  integer,dimension(size(vector)) :: rtn
+  integer max
+  integer len
+  logical carry
+  len=size(vector)
+  rtn=vector
+  i=0
+  carry=.true.
+  do while(carry)         ! just continue until we do not do a carry
+      i=i+1
+   rtn(i)=rtn(i)+1
+   if(rtn(i) .gt. max)then
+       if(i == len)then   ! role over set everything back to 0
+        rtn=0
+    else
+        rtn(i)=0
+       endif
+   else
+       carry=.false.
+   endif
+  enddo
+end function
+
+

Usage#

+
test_vect=0
+        do
+           test_vect=add1(test_vect,3)
+           result=fitness(test_vect)
+           if(result .lt. 1.0_b8)then
+               write(*,*)test_vect
+               stop
+           endif
+        enddo
+
+
+
+

Complete source for recent discussions#

+ +
+
+

Exersize 5 Modify the program to use the random +number generator given earlier.

+
+
+

Some array specific intrinsic functions#

+
    +
  • ALL True if all values are true (LOGICAL)
  • +
  • ANY True if any value is true (LOGICAL)
  • +
  • COUNT Number of true elements in an array (LOGICAL)
  • +
  • DOT_PRODUCT Dot product of two rank one arrays
  • +
  • MATMUL Matrix multiplication
  • +
  • MAXLOC Location of a maximum value in an array
  • +
  • MAXVAL Maximum value in an array
  • +
  • MINLOC Location of a minimum value in an array
  • +
  • MINVAL Minimum value in an array
  • +
  • PACK Pack an array into an array of rank one
  • +
  • PRODUCT Product of array elements
  • +
  • RESHAPE Reshape an array
  • +
  • SPREAD Replicates array by adding a dimension
  • +
  • SUM Sum of array elements
  • +
  • TRANSPOSE Transpose an array of rank two
  • +
  • +

    UNPACK Unpack an array of rank one into an array under a mask

    +
  • +
  • +

    Examples

    +
  • +
+
program matrix
+    real w(10),x(10),mat(10,10)
+    call random_number(w)
+    call random_number(mat)
+    x=matmul(w,mat)   ! regular matrix multiply  USE IT
+    write(*,'("dot(x,x)=",f10.5)'),dot_product(x,x)
+end program
+program allit
+     character(len=10):: f1="(3l1)"
+     character(len=10):: f2="(3i2)"
+     integer b(2,3),c(2,3),one_d(6)
+     logical l(2,3)
+     one_d=(/ 1,3,5 , 2,4,6 /)
+     b=transpose(reshape((/ 1,3,5 , 2,4,6 /),shape=(/3,2/)))
+     C=transpose(reshape((/ 0,3,5 , 7,4,8 /),shape=(/3,2/)))
+     l=(b.ne.c)
+     write(*,f2)((b(i,j),j=1,3),i=1,2)
+     write(*,*)
+     write(*,f2)((c(i,j),j=1,3),i=1,2)
+     write(*,*)
+     write(*,f1)((l(i,j),j=1,3),i=1,2)
+     write(*,*)
+     write(*,f1)all ( b .ne. C ) !is .false.
+     write(*,f1)all ( b .ne. C, DIM=1) !is [.true., .false., .false.]
+     write(*,f1)all ( b .ne. C, DIM=2) !is [.false., .false.]
+end
+
+
    +
  • The output is:
  • +
+
 1 3 5
+ 2 4 6
+ 0 3 5
+ 7 4 8
+ TFF
+ TFT
+ F
+ TFF
+ FF
+
+
+
+

The rest of our GA#

+ +
+
+

Compiler Information#

+

gfortran#

+
    +
  • .f, .for, .ftn .f77
      +
    • fixed-format Fortran source; compile
    • +
    +
  • +
  • .f90, .f95
      +
    • free-format Fortran source; compile
    • +
    +
  • +
  • -fbacktrace
      +
    • Add debug information for runtime traceback
    • +
    +
  • +
  • -ffree-form -ffixed-form
      +
    • source form
    • +
    +
  • +
  • -O0, -O1, -O2, -O3
      +
    • optimization level
    • +
    +
  • +
  • .fpp, .FPP, .F, .FOR, .FTN, .F90, .F95, .F03 or .F08
      +
    • Fortran source file with preprocessor directives
    • +
    +
  • +
  • -fopenmp
      +
    • turn on OpenMP
    • +
    +
  • +
+

Intel#

+
    +
  • .f, .for, .ftn
      +
    • fixed-format Fortran source; compile
    • +
    +
  • +
  • .f90, .f95
      +
    • free-format Fortran source; compile
    • +
    +
  • +
  • -O0, -O1, -O2, -O3, -O4
      +
    • optimization level
    • +
    +
  • +
  • .fpp, .F, .FOR, .FTN, .FPP, .F90
      +
    • Fortran source file with preprocessor directives
    • +
    +
  • +
  • -g
      +
    • compile for debug * -traceback -notraceback (default)
    • +
    • Add debug information for runtime traceback
    • +
    +
  • +
  • -nofree, -free
      +
    • Source is fixed or free format
    • +
    +
  • +
  • -fopenmp
      +
    • turn on OpenMP
    • +
    +
  • +
+

Portland Group (x86)#

+
    +
  • .f, .for, .ftn
      +
    • fixed-format Fortran source; compile
    • +
    +
  • +
  • .f90, .f95, .f03
      +
    • free-format Fortran source; compile
    • +
    +
  • +
  • .cuf
      +
    • free-format CUDA Fortran source; compile
    • +
    +
  • +
  • +

    .CUF

    +
      +
    • free-format CUDA Fortran source; preprocess, compile
    • +
    +
  • +
  • +

    -O0, -O1, -O2, -O3, -O4

    +
      +
    • optimization level
    • +
    +
  • +
  • +

    -g

    +
      +
    • compile for debug * -traceback (default) -notraceback
    • +
    • Add debug information for runtime traceback
    • +
    +
  • +
  • -Mfixed, -Mfree
      +
    • Source is fixed or free format
    • +
    +
  • +
  • -qmp
      +
    • turn on OpenMP
    • +
    +
  • +
+

IBM xlf#

+
    +
  • xlf, xlf_r, f77, fort77
      +
    • Compile FORTRAN 77 source files. _r = thread safe
    • +
    +
  • +
  • xlf90, xlf90_r, f90
      +
    • Compile Fortran 90 source files. _r = thread safe
    • +
    +
  • +
  • xlf95, xlf95_r, f95
      +
    • Compile Fortran 95 source files. _r = thread safe
    • +
    +
  • +
  • xlf2003, xlf2003_r,f2003 * Compile Fortran 2003 source files. _r = thread safe
  • +
  • xlf2008, xlf2008_r, f2008 * Compile Fortran 2008 source files.
  • +
  • .f, .f77, .f90, .f95, .f03, .f08
      +
    • Fortran source file
    • +
    +
  • +
  • .F, .F77, .F90, .F95, .F03, .F08
      +
    • Fortran source file with preprocessor directives
    • +
    +
  • +
  • -qtbtable=full
      +
    • Add debug information for runtime traceback
    • +
    +
  • +
  • -qsmp=omp
      +
    • turn on OpenMP
    • +
    +
  • +
  • -O0, -O1, -O2, -O3, -O4, O5
      +
    • optimization level
    • +
    +
  • +
  • -g , g0, g1,...g9
      +
    • compile for debug
    • +
    +
  • +
+
+
+

Summary#

+
    +
  • +

    Fortran 90 has features to:

    +
      +
    • Enhance performance
    • +
    • Enhance portability
    • +
    • Enhance reliability
    • +
    • Enhance maintainability
    • +
    +
  • +
  • +

    Fortran 90 has new language elements

    +
      +
    • Source form
    • +
    • Derived data types
    • +
    • Dynamic memory allocation functions
    • +
    • Kind facility for portability and easy modification
    • +
    • Many new intrinsic function
    • +
    • Array assignments
    • +
    +
  • +
  • +

    Examples

    +
      +
    • Help show how things work
    • +
    • Reference for future use
    • +
    +
  • +
+

Introduction to Fortran Language#

+
  Brought to you by ANSI committee X3J3 and ISO-IEC/JTC1/SC22/WG5 (Fortran)
+  This is neither complete nor precisely accurate, but hopefully, after
+  a small investment of time it is easy to read and very useful.
+
+  This is the free form version of Fortran, no statement numbers,
+  no C in column 1, start in column 1 (not column 7),
+  typically indent 2, 3, or 4 spaces per each structure.
+  The typical extension is  .f90  .
+
+  Continue a statement on the next line by ending the previous line with
+  an ampersand  &amp; .  Start the continuation with  &amp;  for strings.
+
+  The rest of any line is a comment starting with an exclamation mark  ! .
+
+  Put more than one statement per line by separating statements with a
+  semicolon  ; . Null statements are OK, so lines can end with semicolons.
+
+  Separate words with space or any form of "white space" or punctuation.
+
+

Meta language used in this compact summary#

+
  <xxx> means fill in something appropriate for xxx and do not type
+        the  "<"  or  ">" .
+
+  ...  ellipsis means the usual, fill in something, one or more lines
+
+  [stuff] means supply nothing or at most one copy of "stuff"
+          [stuff1 [stuff2]] means if "stuff1" is included, supply nothing
+          or at most one copy of stuff2.
+
+  "old" means it is in the language, like almost every feature of past
+  Fortran standards, but should not be used to write new programs.
+
+

Structure of files that can be compiled#

+
  program <name>                  usually file name is  <name>.f90
+    use <module_name>             bring in any needed modules
+    implicit none                 good for error detection
+    <declarations>
+    <executable statements>       order is important, no more declarations
+  end program <name>
+
+
+  block data <name>               old
+    <declarations>                common, dimension, equivalence now obsolete
+  end block data <name>
+
+
+  module <name>                   bring back in with   use <name>
+    implicit none                 good for error detection
+    <declarations>                can have private and public and interface
+  end module <name>
+
+  subroutine <name>               use:  call <name>   to execute
+    implicit none                 good for error detection
+    <declarations>
+    <executable statements>
+  end subroutine <name>
+
+
+  subroutine <name>(par1, par2, ...) 
+                                  use:  call <name>(arg1, arg2,... ) to execute
+    implicit none                 optional, good for error detection
+    <declarations>                par1, par2, ... are defined in declarations 
+                                  and can be specified in, inout, pointer, etc.
+    <executable statements>
+    return                        optional, end causes automatic return
+    entry <name> (par...)         old, optional other entries
+  end subroutine <name>
+
+
+  function <name>(par1, par2, ...) result(<rslt>)
+                                  use: <name>(arg1, arg2, ... argn) as variable
+    implicit none                 optional, good for error detection
+    <declarations>                rslt, par1, ... are defined in declarations
+    <executable statements>
+    <rslt> = <expression>         required somewhere in execution
+    [return]                      optional, end causes automatic return
+  end function <name>
+
+                                  old
+  <type> function(...) <name>     use: <name>(arg1, arg2, ... argn) as variable
+    <declarations>
+    <executable statements>
+    <name> = <expression>         required somewhere in execution
+    [return]                      optional, end causes automatic return
+  end function <name>
+
+

Executable Statements and Constructs#

+
  <statement> will mean exactly one statement in this section
+
+  a construct is multiple lines
+
+  <label> : <statement>      any statement can have a label (a name)
+
+  <variable> = <expression>  assignment statement
+
+  <pointer>  >= <variable>   the pointer is now an alias for the variable
+  <pointer1> >= <pointer2>    pointer1 now points same place as pointer2
+
+  stop                       can be in any executable statement group,
+  stop <integer>             terminates execution of the program,
+  stop <string>              can have optional integer or string
+
+  return                     exit from subroutine or function
+
+  do <variable>=<from>,<to> [,<increment&gt]   optional:  <label> : do ...
+     <statements>
+
+     exit                                   \_optional   or exit <label&gt
+     if (<boolean expression>) exit         /
+                                            exit the loop
+     cycle                                  \_optional   or cycle <label>
+     if (<boolean expression>) cycle        /
+                                            continue with next loop iteration
+  end do                                    optional:    end do <name>
+
+
+  do while (<boolean expression>)
+     ...                                   optional exit and cycle allowed
+  end do
+
+
+  do
+     ...                                   exit required to end the loop
+                                           optional  cycle  can be used
+  end do
+
+
+
+  if ( <boolean expression> ) <statement>  execute the statement if the
+                                           boolean expression is true
+
+  if ( <boolean expression1> ) then
+    ...                                    execute if expression1 is true
+  else if ( <boolean expression2> ) then
+    ...                                    execute if expression2 is true
+  else if ( <boolean expression3> ) then
+    ...                                    execute if expression3 is true
+  else
+    ...                                    execute if none above are true
+  end if
+
+
+  select case (<expression>)            optional <name> : select case ...
+     case (<value>)
+        <statements>                    execute if expression == value
+     case (<value1>:<value2>)           
+        <statements>                    execute if value1 &le; expression &le; value2
+     ...
+     case default
+        <statements>                    execute if no values above match
+  end select                            optional  end select <name>
+
+
+  real, dimension(10,12) :: A, R     a sample declaration for use with "where"
+    ...
+  where (A /= 0.0)                   conditional assignment, only assignment allowed
+     R = 1.0/A
+  elsewhere
+     R = 1.0                         elements of R set to 1.0 where A == 0.0
+  end where
+
+    go to <statement number>          old
+
+    go to (<statement number list>), <expression>   old
+
+    for I/O statements, see:  section 10.0  Input/Output Statements
+
+    many old forms of statements are not listed
+
+

Declarations#

+
  There are five (5) basic types: integer, real, complex, character and logical.
+  There may be any number of user derived types.  A modern (not old) declaration
+  starts with a type, has attributes, then ::, then variable(s) names
+
+  integer i, pivot, query                             old
+
+  integer, intent (inout) :: arg1
+
+  integer (selected_int_kind (5)) :: i1, i2
+
+  integer, parameter :: m = 7
+
+  integer, dimension(0:4, -5:5, 10:100) :: A3D
+
+  double precision x                                 old
+
+  real  (selected_real_kind(15,300) :: x
+
+  complex :: z
+
+  logical, parameter :: what_if = .true.
+
+  character, parameter :: me = "Jon Squire"
+
+  type <name>       a new user type, derived type
+    declarations
+  end type <name>
+
+  type (<name>) :: stuff    declaring stuff to be of derived type <name>
+
+  real, dimension(:,:), allocatable, target :: A
+
+  real, dimension(:,:), pointer :: P
+
+  Attributes may be:
+
+    allocatable  no memory used here, allocate later
+    dimension    vector or multi dimensional array
+    external     will be defined outside this compilation
+    intent       argument may be  in, inout or out
+    intrinsic    declaring function to be an intrinsic
+    optional     argument is optional
+    parameter    declaring a constant, can not be changed later
+    pointer      declaring a pointer
+    private      in a module, a private declaration
+    public       in a module, a public declaration
+    save         keep value from one call to the next, static
+    target       can be pointed to by a pointer
+    Note:        not all combinations of attributes are legal
+
+

Key words (other than I/O)#

+
  note: "statement" means key word that starts a statement, one line
+                    unless there is a continuation "&amp;"
+        "construct" means multiple lines, usually ending with "end ..."
+        "attribute" means it is used in a statement to further define
+        "old"       means it should not be used in new code
+
+  allocatable          attribute, no space allocated here, later allocate
+  allocate             statement, allocate memory space now for variable
+  assign               statement, old, assigned go to
+  assignment           attribute, means subroutine is assignment (=)
+  block data           construct, old, compilation unit, replaced by module
+  call                 statement, call a subroutine
+  case                 statement, used in  select case structure
+  character            statement, basic type, intrinsic data type
+  common               statement, old, allowed overlaying of storage
+  complex              statement, basic type, intrinsic data type
+  contains             statement, internal subroutines and functions follow
+  continue             statement, old, a place to put a statement number
+  cycle                statement, continue the next iteration of a do loop
+  data                 statement, old, initialized variables and arrays
+  deallocate           statement, free up storage used by specified variable
+  default              statement, in a select case structure, all others
+  do                   construct, start a do loop
+  double precision     statement, old, replaced by selected_real_kind(15,300)
+  else                 construct, part of if   else if   else   end if
+  else if              construct, part of if   else if   else   end if
+  elsewhere            construct, part of where  elsewhere  end where
+  end block data       construct, old, ends block data
+  end do               construct, ends do
+  end function         construct, ends function
+  end if               construct, ends if
+  end interface        construct, ends interface
+  end module           construct, ends module
+  end program          construct, ends program
+  end select           construct, ends select case
+  end subroutine       construct, ends subroutine
+  end type             construct, ends type
+  end where            construct, ends where
+  entry                statement, old, another entry point in a procedure
+  equivalence          statement, old, overlaid storage
+  exit                 statement, continue execution outside of a do loop
+  external             attribute, old statement, means defines else where
+  function             construct, starts the definition of a function
+  go to                statement, old, requires fixed form statement number
+  if                   statement and construct, if(...) statement
+  implicit             statement, "none" is preferred to help find errors
+  in                   a keyword for intent, the argument is read only
+  inout                a keyword for intent, the argument is read/write
+  integer              statement, basic type, intrinsic data type
+  intent               attribute, intent(in) or intent(out) or intent(inout)
+  interface            construct, begins an interface definition
+  intrinsic            statement, says that following names are intrinsic
+  kind                 attribute, sets the kind of the following variables
+  len                  attribute, sets the length of a character string
+  logical              statement, basic type, intrinsic data type
+  module               construct, beginning of a module definition
+  namelist             statement, defines a namelist of input/output
+  nullify              statement, nullify(some_pointer) now points nowhere
+  only                 attribute, restrict what comes from a module
+  operator             attribute, indicates function is an operator, like +
+  optional             attribute, a parameter or argument is optional
+  out                  a keyword for intent, the argument will be written
+  parameter            attribute, old statement, makes variable real only
+  pause                old, replaced by stop
+  pointer              attribute, defined the variable as a pointer alias
+  private              statement and attribute, in a module, visible inside
+  program              construct, start of a main program
+  public               statement and attribute, in a module, visible outside
+  real                 statement, basic type, intrinsic data type
+  recursive            attribute, allows functions and derived type recursion
+  result               attribute, allows naming of function result  result(Y)
+  return               statement, returns from, exits, subroutine or function
+  save                 attribute, old statement, keep value between calls
+  select case          construct, start of a case construct
+  stop                 statement, terminate execution of the main procedure
+  subroutine           construct, start of a subroutine definition
+  target               attribute, allows a variable to take a pointer alias
+  then                 part of if construct
+  type                 construct, start of user defined type
+  type ( )             statement, declaration of a variable for a users type
+  use                  statement, brings in a module
+  where                construct, conditional assignment
+  while                construct, a while form of a do loop
+
+ +
  backspace            statement, back up one record
+  close                statement, close a file
+  endfile              statement, mark the end of a file
+  format               statement, old, defines a format
+  inquire              statement, get the status of a unit
+  open                 statement, open or create a file
+  print                statement, performs output to screen
+  read                 statement, performs input
+  rewind               statement, move read or write position to beginning
+  write                statement, performs output
+
+

Operators#

+
  **    exponentiation
+  *     multiplication
+  /     division
+  +     addition
+  -     subtraction
+  //    concatenation
+  ==    .eq.  equality
+  /=    .ne.  not equal
+  <     .lt.  less than
+  >     .gt.  greater than
+  <=    .le.  less than or equal
+  >=    .ge.  greater than or equal
+  .not.       complement, negation
+  .and.       logical and
+  .or.        logical or
+  .eqv.       logical equivalence
+  .neqv.      logical not equivalence, exclusive or
+
+  .eq.  ==    equality, old
+  .ne.  /=    not equal. old
+  .lt.  <     less than, old
+  .gt.  >     greater than, old
+  .le.  <=    less than or equal, old
+  .ge.  >=    greater than or equal, old
+
+
+  Other punctuation:
+
+   /  ...  /  used in data, common, namelist and other statements
+   (/ ... /)  array constructor, data is separated by commas
+   6*1.0      in some contexts, 6 copies of 1.0
+   (i:j:k)    in some contexts, a list  i, i+k, i+2k, i+3k, ... i+nk&le;j
+   (:j)       j and all below
+   (i:)       i and all above
+   (:)        undefined or all in range
+
+

Constants#

+
  Logical constants:
+
+    .true.      True
+    .false.     False
+
+  Integer constants:
+
+     0    1     -1     123456789
+
+  Real constants:
+
+     0.0   1.0   -1.0    123.456   7.1E+10   -52.715E-30
+
+  Complex constants:
+
+     (0.0, 0.0)    (-123.456E+30, 987.654E-29)
+
+  Character constants:
+
+      "ABC"   "a"  "123'abc$%#@!"    " a quote "" "
+      'ABC'   'a'  '123"abc$%#@!'    ' a apostrophe '' '
+
+  Derived type values:
+
+      type name
+        character (len=30) :: last
+        character (len=30) :: first
+        character (len=30) :: middle
+      end type name
+
+      type address
+        character (len=40) :: street
+        character (len=40) :: more
+        character (len=20) :: city
+        character (len=2)  :: state
+        integer (selected_int_kind(5)) :: zip_code
+        integer (selected_int_kind(4)) :: route_code
+      end type address
+
+      type person
+        type (name) lfm
+        type (address) snail_mail
+      end type person
+
+      type (person) :: a_person = person( name("Squire","Jon","S."), &amp;
+          address("106 Regency Circle", "", "Linthicum", "MD", 21090, 1936))
+
+      a_person%snail_mail%route_code == 1936
+
+

Input/Output Statements#

+
    open (<unit number>)
+    open (unit=<unit number>, file=<file name>, iostat=<variable>)
+    open (unit=<unit number>, ... many more, see below )
+
+    close (<unit number>)
+    close (unit=<unit number>, iostat=<variable>,
+           err=<statement number>, status="KEEP")
+
+    read (<unit number>) <input list>
+    read (unit=<unit number>, fmt=<format>, iostat=<variable>,
+          end=<statement number>, err=<statement number>) <input list>
+    read (unit=<unit number>, rec=<record number>) <input list>
+
+    write (<unit number>) <output list>
+    write (unit=<unit number>, fmt=<format>, iostat=<variable>,
+           err=<statement number>) <output list>
+    write (unit=<unit number>, rec=<record number>) <output list>
+
+    print *, <output list>
+
+    print "(<your format here, use apostrophe, not quote>)", <output list>
+
+    rewind <unit number>
+    rewind (<unit number>, err=<statement number>)
+
+    backspace <unit number>
+    backspace (<unit number>, iostat=<variable>)
+
+    endfile <unit number>
+    endfile (<unit number>, err=<statement number>, iostat=<variable>)
+
+    inquire ( <unit number>, exists = <variable>)
+    inquire ( file=<"name">, opened = <variable1>, access = <variable2> )
+    inquire ( iolength = <variable> ) x, y, A   ! gives "recl" for "open"
+
+    namelist /<name>/ <variable list>      defines a name list
+    read(*,nml=<name>)                     reads some/all variables in namelist
+    write(*,nml=<name>)                    writes all variables in namelist
+    &amp;<name> <variable>=<value> ... <variable=value> /  data for namelist read
+
+  Input / Output specifiers
+
+    access   one of  "sequential"  "direct"  "undefined"
+    action   one of  "read"  "write"  "readwrite"
+    advance  one of  "yes"  "no"  
+    blank    one of  "null"  "zero"
+    delim    one of  "apostrophe"  "quote"  "none"
+    end      =       <integer statement number>  old
+    eor      =       <integer statement number>  old
+    err      =       <integer statement number>  old
+    exist    =       <logical variable>
+    file     =       <"file name">
+    fmt      =       <"(format)"> or <character variable> format
+    form     one of  "formatted"  "unformatted"  "undefined"
+    iolength =       <integer variable, size of unformatted record>
+    iostat   =       <integer variable> 0==good, negative==eof, positive==bad
+    name     =       <character variable for file name>
+    named    =       <logical variable>
+    nml      =       <namelist name>
+    nextrec  =       <integer variable>    one greater than written
+    number   =       <integer variable unit number>
+    opened   =       <logical variable>
+    pad      one of  "yes"  "no"
+    position one of  "asis"  "rewind"  "append"
+    rec      =       <integer record number>
+    recl     =       <integer unformatted record size>
+    size     =       <integer variable>  number of characters read before eor
+    status   one of  "old"  "new"  "unknown"  "replace"  "scratch"  "keep"
+    unit     =       <integer unit number>
+
+  Individual questions
+    direct      =    <character variable>  "yes"  "no"  "unknown"
+    formatted   =    <character variable>  "yes"  "no"  "unknown"
+    read        =    <character variable>  "yes"  "no"  "unknown"
+    readwrite   =    <character variable>  "yes"  "no"  "unknown"
+    sequential  =    <character variable>  "yes"  "no"  "unknown"
+    unformatted =    <character variable>  "yes"  "no"  "unknown"
+    write       =    <character variable>  "yes"  "no"  "unknown"
+
+

Formats#

+
    format                    an explicit format can replace * in any
+                              I/O statement. Include the format in
+                              apostrophes or quotes and keep the parenthesis.
+
+    examples:
+         print "(3I5,/(2X,3F7.2/))", <output list>
+         write(6, '(a,E15.6E3/a,G15.2)' ) <output list>
+         read(unit=11, fmt="(i4, 4(f3.0,TR1))" ) <input list>
+
+    A format includes the opening and closing parenthesis.
+    A format consists of format items and format control items separated by comma.
+    A format may contain grouping parenthesis with an optional repeat count.
+
+  Format Items, data edit descriptors:
+
+    key:  w  is the total width of the field   (filled with *** if overflow)
+          m  is the least number of digits in the (sub)field (optional)
+          d  is the number of decimal digits in the field
+          e  is the number of decimal digits in the exponent subfield
+          c  is the repeat count for the format item
+          n  is number of columns
+
+    cAw     data of type character (w is optional)
+    cBw.m   data of type integer with binary base
+    cDw.d   data of type real -- same as E,  old double precision
+    cEw.d   or Ew.dEe  data of type real
+    cENw.d  or ENw.dEe  data of type real  -- exponent a multiple of 3
+    cESw.d  or ESw.dEe  data of type real  -- first digit non zero
+    cFw.d   data of type real  -- no exponent printed
+    cGw.d   or Gw.dEe  data of type real  -- auto format to F or E
+    nH      n characters follow the H,  no list item
+    cIw.m   data of type integer
+    cLw     data of type logical  --  .true.  or  .false.
+    cOw.m   data of type integer with octal base
+    cZw.m   data of type integer with hexadecimal base
+    "<string>"  literal characters to output, no list item
+    '<string>'  literal characters to output, no list item
+
+  Format Control Items, control edit descriptors:
+
+    BN      ignore non leading blanks in numeric fields
+    BZ      treat nonleading blanks in numeric fields as zeros
+    nP      apply scale factor to real format items   old
+    S       printing of optional plus signs is processor dependent
+    SP      print optional plus signs
+    SS      do not print optional plus signs
+    Tn      tab to specified column
+    TLn     tab left n columns
+    TRn     tab right n columns
+    nX      tab right n columns
+    /       end of record (implied / at end of all format statements)
+    :       stop format processing if no more list items
+
+  <input list> can be:
+    a variable
+    an array name
+    an implied do   ((A(i,j),j=1,n) ,i=1,m)    parenthesis and commas as shown
+
+    note: when there are more items in the input list than format items, the
+          repeat rules for formats applies.
+
+  <output list> can be:
+    a constant
+    a variable
+    an expression
+    an array name
+    an implied do   ((A(i,j),j=1,n) ,i=1,m)    parenthesis and commas as shown
+
+    note: when there are more items in the output list than format items, the
+          repeat rules for formats applies.
+
+  Repeat Rules for Formats:
+
+    Each format item is used with a list item.  They are used in order.
+    When there are more list items than format items, then the following
+    rule applies:  There is an implied end of record, /, at the closing
+    parenthesis of the format, this is processed.  Scan the format backwards
+    to the first left parenthesis.  Use the repeat count, if any, in front
+    of this parenthesis, continue to process format items and list items.
+
+    Note: an infinite loop is possible
+          print "(3I5/(1X/))", I, J, K, L    may never stop
+
+

Intrinsic Functions#

+
  Intrinsic Functions are presented in alphabetical order and then grouped
+  by topic.  The function name appears first. The argument(s) and result
+  give an indication of the type(s) of argument(s) and results.
+  [,dim=] indicates an optional argument  "dim".
+  "mask" must be logical and usually conformable.
+  "character" and "string" are used interchangeably.
+  A brief description or additional information may appear.
+
+

Intrinsic Functions (alphabetical):#

+
    abs(integer_real_complex) result(integer_real_complex)
+    achar(integer) result(character)  integer to character
+    acos(real) result(real)  arccosine  |real| &le; 1.0   0&le;result&le;Pi
+    adjustl(character)  result(character) left adjust, blanks go to back
+    adjustr(character)  result(character) right adjust, blanks to front
+    aimag(complex) result(real)  imaginary part
+    aint(real [,kind=]) result(real)  truncate to integer toward zero
+    all(mask [,dim]) result(logical)  true if all elements of mask are true
+    allocated(array) result(logical)  true if array is allocated in memory
+    anint(real [,kind=]) result(real)  round to nearest integer
+    any(mask [,dim=}) result(logical)  true if any elements of mask are true
+    asin(real) result(real)  arcsine  |real| &le; 1.0   -Pi/2&le;result&le;Pi/2
+    associated(pointer [,target=]) result(logical)  true if pointing
+    atan(real) result(real)  arctangent  -Pi/2&le;result&le;Pi/2 
+    atan2(y=real,x=real) result(real)  arctangent  -Pi&le;result&le;Pi
+    bit_size(integer) result(integer)  size in bits in model of argument
+    btest(i=integer,pos=integer) result(logical)  true if pos has a 1, pos=0..
+    ceiling(real) result(real)  truncate to integer toward infinity
+    char(integer [,kind=]) result(character)  integer to character [of kind]
+    cmplx(x=real [,y=real] [kind=]) result(complex)  x+iy
+    conjg(complex) result(complex)  reverse the sign of the imaginary part
+    cos(real_complex) result(real_complex)  cosine
+    cosh(real) result(real)  hyperbolic cosine
+    count(mask [,dim=]) result(integer)  count of true entries in mask
+    cshift(array,shift [,dim=]) circular shift elements of array, + is right
+    date_and_time([date=] [,time=] [,zone=] [,values=])  y,m,d,utc,h,m,s,milli
+    dble(integer_real_complex) result(real_kind_double)  convert to double
+    digits(integer_real) result(integer)  number of bits to represent model
+    dim(x=integer_real,y=integer_real) result(integer_real) proper subtraction
+    dot_product(vector_a,vector_b) result(integer_real_complex) inner product
+    dprod(x=real,y=real) result(x_times_y_double)  double precision product
+    eoshift(array,shift [,boundary=] [,dim=])  end-off shift using boundary
+    epsilon(real) result(real)  smallest positive number added to 1.0 /= 1.0
+    exp(real_complex) result(real_complex)  e raised to a power
+    exponent(real) result(integer)  the model exponent of the argument
+    floor(real) result(real)  truncate to integer towards negative infinity
+    fraction(real) result(real)  the model fractional part of the argument
+    huge(integer_real) result(integer_real)  the largest model number
+    iachar(character) result(integer)  position of character in ASCII sequence
+    iand(integer,integer) result(integer)  bit by bit logical and
+    ibclr(integer,pos) result(integer)  argument with pos bit cleared to zero
+    ibits(integer,pos,len) result(integer)  extract len bits starting at pos
+    ibset(integer,pos) result(integer)  argument with pos bit set to one
+    ichar(character) result(integer)  pos in collating sequence of character
+    ieor(integer,integer) result(integer)  bit by bit logical exclusive or
+    index(string,substring [,back=])  result(integer)  pos of substring
+    int(integer_real_complex) result(integer)  convert to integer
+    ior(integer,integer) result(integer)  bit by bit logical or
+    ishft(integer,shift) result(integer)  shift bits in argument by shift
+    ishftc(integer, shift) result(integer)  shift circular bits in argument
+    kind(any_intrinsic_type) result(integer)  value of the kind
+    lbound(array,dim) result(integer)  smallest subscript of dim in array
+    len(character) result(integer)  number of characters that can be in argument
+    len_trim(character) result(integer)  length without trailing blanks
+    lge(string_a,string_b) result(logical)  string_a &ge; string_b
+    lgt(string_a,string_b) result(logical)  string_a > string_b
+    lle(string_a,string_b) result(logical)  string_a &le; string_b
+    llt(string_a,string_b) result(logical)  string_a < string_b
+    log(real_complex) result(real_complex)  natural logarithm
+    log10(real) result(real)  logarithm base 10
+    logical(logical [,kind=])  convert to logical
+    matmul(matrix,matrix) result(vector_matrix)  on integer_real_complex_logical
+    max(a1,a2,a3,...) result(integer_real)  maximum of list of values
+    maxexponent(real) result(integer)  maximum exponent of model type
+    maxloc(array [,mask=]) result(integer_vector)  indices in array of maximum
+    maxval(array [,dim=] [,mask=])  result(array_element)  maximum value
+    merge(true_source,false_source,mask) result(source_type)  choose by mask
+    min(a1,a2,a3,...) result(integer-real)  minimum of list of values
+    minexponent(real) result(integer)  minimum(negative) exponent of model type
+    minloc(array [,mask=]) result(integer_vector)  indices in array of minimum
+    minval(array [,dim=] [,mask=])  result(array_element)  minimum value
+    mod(a=integer_real,p) result(integer_real)  a modulo p
+    modulo(a=integer_real,p) result(integer_real)  a modulo p
+    mvbits(from,frompos,len,to,topos) result(integer)  move bits
+    nearest(real,direction) result(real)  nearest value toward direction
+    nint(real [,kind=]) result(real)  round to nearest integer value
+    not(integer) result(integer)  bit by bit logical complement
+    pack(array,mask [,vector=]) result(vector)  vector of elements from array
+    present(argument) result(logical)  true if optional argument is supplied
+    product(array [,dim=] [,mask=]) result(integer_real_complex)  product
+    radix(integer_real) result(integer)  radix of integer or real model, 2
+    random_number(harvest=real_out)  subroutine, uniform random number 0 to 1
+    random_seed([size=] [,put=] [,get=])  subroutine to set random number seed
+    range(integer_real_complex) result(integer_real)  decimal exponent of model
+    real(integer_real_complex [,kind=]) result(real)  convert to real
+    repeat(string,ncopies) result(string)  concatenate n copies of string
+    reshape(source,shape,pad,order) result(array)  reshape source to array
+    rrspacing(real) result(real)  reciprocal of relative spacing of model
+    scale(real,integer) result(real)  multiply by  2**integer
+    scan(string,set [,back]) result(integer)  position of first of set in string
+    selected_int_kind(integer) result(integer)  kind number to represent digits
+    selected_real_kind(integer,integer) result(integer)  kind of digits, exp
+    set_exponent(real,integer) result(real)  put integer as exponent of real
+    shape(array) result(integer_vector)  vector of dimension sizes
+    sign(integer_real,integer_real) result(integer_real) sign of second on first
+    sin(real_complex) result(real_complex)  sine of angle in radians
+    sinh(real) result(real)  hyperbolic sine of argument
+    size(array [,dim=]) result(integer)  number of elements in dimension
+    spacing(real) result(real)  spacing of model numbers near argument
+    spread(source,dim,ncopies) result(array)  expand dimension of source by 1
+    sqrt(real_complex) result(real_complex)  square root of argument
+    sum(array [,dim=] [,mask=]) result(integer_real_complex)  sum of elements
+    system_clock([count=] [,count_rate=] [,count_max=])  subroutine, all out
+    tan(real) result(real)  tangent of angle in radians
+    tanh(real) result(real)  hyperbolic tangent of angle in radians
+    tiny(real) result(real)  smallest positive model representation
+    transfer(source,mold [,size]) result(mold_type)  same bits, new type
+    transpose(matrix) result(matrix)  the transpose of a matrix
+    trim(string) result(string)  trailing blanks are removed
+    ubound(array,dim) result(integer)  largest subscript of dim in array
+    unpack(vector,mask,field) result(v_type,mask_shape)  field when not mask
+    verify(string,set [,back]) result(integer)  pos in string not in set
+
+

Intrinsic Functions (grouped by topic):#

+
Intrinsic Functions (Numeric)#
+
    abs(integer_real_complex) result(integer_real_complex)
+    acos(real) result(real)  arccosine  |real| &le; 1.0   0&le;result&le;Pi
+    aimag(complex) result(real)  imaginary part
+    aint(real [,kind=]) result(real)  truncate to integer toward zero
+    anint(real [,kind=]) result(real)  round to nearest integer
+    asin(real) result(real)  arcsine  |real| &le; 1.0   -Pi/2&le;result&le;Pi/2
+    atan(real) result(real)  arctangent  -Pi/2&le;result&le;Pi/2 
+    atan2(y=real,x=real) result(real)  arctangent  -Pi&le;result&le;Pi
+    ceiling(real) result(real)  truncate to integer toward infinity
+    cmplx(x=real [,y=real] [kind=]) result(complex)  x+iy
+    conjg(complex) result(complex)  reverse the sign of the imaginary part
+    cos(real_complex) result(real_complex)  cosine
+    cosh(real) result(real)  hyperbolic cosine
+    dble(integer_real_complex) result(real_kind_double)  convert to double
+    digits(integer_real) result(integer)  number of bits to represent model
+    dim(x=integer_real,y=integer_real) result(integer_real) proper subtraction
+    dot_product(vector_a,vector_b) result(integer_real_complex) inner product
+    dprod(x=real,y=real) result(x_times_y_double)  double precision product
+    epsilon(real) result(real)  smallest positive number added to 1.0 /= 1.0
+    exp(real_complex) result(real_complex)  e raised to a power
+    exponent(real) result(integer)  the model exponent of the argument
+    floor(real) result(real)  truncate to integer towards negative infinity
+    fraction(real) result(real)  the model fractional part of the argument
+    huge(integer_real) result(integer_real)  the largest model number
+    int(integer_real_complex) result(integer)  convert to integer
+    log(real_complex) result(real_complex)  natural logarithm
+    log10(real) result(real)  logarithm base 10
+    matmul(matrix,matrix) result(vector_matrix)  on integer_real_complex_logical
+    max(a1,a2,a3,...) result(integer_real)  maximum of list of values
+    maxexponent(real) result(integer)  maximum exponent of model type
+    maxloc(array [,mask=]) result(integer_vector)  indices in array of maximum
+    maxval(array [,dim=] [,mask=])  result(array_element)  maximum value
+    min(a1,a2,a3,...) result(integer-real)  minimum of list of values
+    minexponent(real) result(integer)  minimum(negative) exponent of model type
+    minloc(array [,mask=]) result(integer_vector)  indices in array of minimum
+    minval(array [,dim=] [,mask=])  result(array_element)  minimum value
+    mod(a=integer_real,p) result(integer_real)  a modulo p
+    modulo(a=integer_real,p) result(integer_real)  a modulo p
+    nearest(real,direction) result(real)  nearest value toward direction
+    nint(real [,kind=]) result(real)  round to nearest integer value
+    product(array [,dim=] [,mask=]) result(integer_real_complex)  product
+    radix(integer_real) result(integer)  radix of integer or real model, 2
+    random_number(harvest=real_out)  subroutine, uniform random number 0 to 1
+    random_seed([size=] [,put=] [,get=])  subroutine to set random number seed
+    range(integer_real_complex) result(integer_real)  decimal exponent of model
+    real(integer_real_complex [,kind=]) result(real)  convert to real
+    rrspacing(real) result(real)  reciprocal of relative spacing of model
+    scale(real,integer) result(real)  multiply by  2**integer
+    set_exponent(real,integer) result(real)  put integer as exponent of real
+    sign(integer_real,integer_real) result(integer_real) sign of second on first
+    sin(real_complex) result(real_complex)  sine of angle in radians
+    sinh(real) result(real)  hyperbolic sine of argument
+    spacing(real) result(real)  spacing of model numbers near argument
+    sqrt(real_complex) result(real_complex)  square root of argument
+    sum(array [,dim=] [,mask=]) result(integer_real_complex)  sum of elements
+    tan(real) result(real)  tangent of angle in radians
+    tanh(real) result(real)  hyperbolic tangent of angle in radians
+    tiny(real) result(real)  smallest positive model representation
+    transpose(matrix) result(matrix)  the transpose of a matrix
+
+
Intrinsic Functions (Logical and bit)#
+
    all(mask [,dim]) result(logical)  true if all elements of mask are true
+    any(mask [,dim=}) result(logical)  true if any elements of mask are true
+    bit_size(integer) result(integer)  size in bits in model of argument
+    btest(i=integer,pos=integer) result(logical)  true if pos has a 1, pos=0..
+    count(mask [,dim=]) result(integer)  count of true entries in mask
+    iand(integer,integer) result(integer)  bit by bit logical and
+    ibclr(integer,pos) result(integer)  argument with pos bit cleared to zero
+    ibits(integer,pos,len) result(integer)  extract len bits starting at pos
+    ibset(integer,pos) result(integer)  argument with pos bit set to one
+    ieor(integer,integer) result(integer)  bit by bit logical exclusive or
+    ior(integer,integer) result(integer)  bit by bit logical or
+    ishft(integer,shift) result(integer)  shift bits in argument by shift
+    ishftc(integer, shift) result(integer)  shift circular bits in argument
+    logical(logical [,kind=])  convert to logical
+    matmul(matrix,matrix) result(vector_matrix)  on integer_real_complex_logical
+    merge(true_source,false_source,mask) result(source_type)  choose by mask
+    mvbits(from,frompos,len,to,topos) result(integer)  move bits
+    not(integer) result(integer)  bit by bit logical complement
+    transfer(source,mold [,size]) result(mold_type)  same bits, new type
+
+
Intrinsic Functions (Character or string)#
+
    achar(integer) result(character)  integer to character
+    adjustl(character)  result(character) left adjust, blanks go to back
+    adjustr(character)  result(character) right adjust, blanks to front
+    char(integer [,kind=]) result(character)  integer to character [of kind]
+    iachar(character) result(integer)  position of character in ASCII sequence
+    ichar(character) result(integer)  pos in collating sequence of character
+    index(string,substring [,back=])  result(integer)  pos of substring
+    len(character) result(integer)  number of characters that can be in argument
+    len_trim(character) result(integer)  length without trailing blanks
+    lge(string_a,string_b) result(logical)  string_a &ge; string_b
+    lgt(string_a,string_b) result(logical)  string_a > string_b
+    lle(string_a,string_b) result(logical)  string_a &le; string_b
+    llt(string_a,string_b) result(logical)  string_a < string_b
+    repeat(string,ncopies) result(string)  concatenate n copies of string
+    scan(string,set [,back]) result(integer)  position of first of set in string
+    trim(string) result(string)  trailing blanks are removed
+    verify(string,set [,back]) result(integer)  pos in string not in set
+
+

Fortran 95#

+
    +
  • New Features
      +
    • The statement FORALL as an alternative to the DO-statement
    • +
    • Partial nesting of FORALL and WHERE statements
    • +
    • Masked ELSEWHERE
    • +
    • Pure procedures
    • +
    • Elemental procedures
    • +
    • Pure procedures in specification expressions
    • +
    • Revised MINLOC and MAXLOC
    • +
    • Extensions to CEILING and FLOOR with the KIND keyword argument
    • +
    • Pointer initialization
    • +
    • Default initialization of derived type objects
    • +
    • Increased compatibility with IEEE arithmetic
    • +
    • A CPU_TIME intrinsic subroutine
    • +
    • A function NULL to nullify a pointer
    • +
    • Automatic deallocation of allocatable arrays at exit of scoping unit
    • +
    • Comments in NAMELIST at input
    • +
    • Minimal field at input
    • +
    • Complete version of END INTERFACE
    • +
    +
  • +
  • Deleted Features
      +
    • real and double precision DO loop index variables
    • +
    • branching to END IF from an outer block
    • +
    • PAUSE statements
    • +
    • ASSIGN statements and assigned GO TO statements and the use of an assigned + integer as a FORMAT specification
    • +
    • Hollerith editing in FORMAT
    • +
    • See http://www.nsc.liu.se/~boein/f77to90/f95.html#17.5
    • +
    +
  • +
+
+
+

References#

+ + + +
+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Fortran/index.html b/Documentation/Development/Languages/Fortran/index.html new file mode 100644 index 000000000..fe0b56957 --- /dev/null +++ b/Documentation/Development/Languages/Fortran/index.html @@ -0,0 +1,5106 @@ + + + + + + + + + + + + + + + + + + + + + + + Fortran - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Fortran#

+

Despite its age, Fortran is still a common language in scientific computing on account of its speed and ease of use in writing numerical computing-centric code.

+

Getting Started#

+

This section walks through how to compile and run a basic Fortran code, and then a basic Fortran MPI code, adapted from here. See Compilers and Toolchains for compiler and programming environment information on NREL HPC systems. For an extensive guide to Fortran 90, see our page on Advanced Fortran. See External Resources for general Fortran language tutorials and Fortran-MPI tutorials.

+

Hello World#

+

Create a file named hello.f90, and save the following text to the file:

+
PROGRAM hello
+
+write(*,*) "Hello World"
+
+END PROGRAM hello
+
+

Now, we must choose the compiler with which to compile our program. We can choose between the GNU, Intel, Nvidia, and Cray compilers, depending on which system we're on (see Compilers and Toolchains).

+

To see available versions of a chosen compiler, use module avail. For this example, we'll use gfortran, which is part of GNU's gcc package:

+
module avail gcc 
+   gcc/10.3.0          gcc/11.2.0          gcc/12.1.0(default)
+
+

We'll use gcc/12.1.0:

+
module load gcc/12.1.0
+
+

Now, we can compile the program with the following command:

+

gfortran hello.f90 -o hello

+

This creates an executable named hello. Execute it by typing the following into your terminal:

+

./hello

+

It should return the following output:

+

Hello World

+

Hello World in MPI Parallel#

+

The purpose of Fortran today is to run large scale computations fast. For the "large scale" part, we use MPI. Now that we have a working Hello World program, let's modify it to run on multiple MPI tasks.

+

On Kestrel, there are multiple implementations of MPI available. We can choose between OpenMPI, Intel MPI, MPICH, and Cray MPICH. These MPI implementations are associated with an underlying Fortran compiler. For example, if we type:

+

module avail openmpi

+

we find that both openmpi/4.1.4-gcc and openmpi/4.1.4-intel are available.

+

Let's choose the openmpi/gcc combination:

+

module load openmpi/4.1.4-gcc

+

Now, create a new file named hello_mpi.f90 and save the following contents to the file:

+
PROGRAM hello_mpi
+include 'mpif.h'
+
+integer :: ierr, my_rank, number_of_ranks
+
+call MPI_INIT(ierr)
+call MPI_COMM_SIZE(MPI_COMM_WORLD, number_of_ranks, ierr)
+call MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
+
+write(*,*) "Hello World from MPI task: ", my_rank, "out of ", number_of_ranks
+
+call MPI_FINALIZE(ierr)
+
+END PROGRAM hello_mpi
+
+

To compile this program, type:

+

mpif90 hello_mpi.f90 -o hello_mpi

+

To run this code on the login node, type:

+

mpirun -n 4 ./hello_mpi

+

You should receive a similar output to the following (the rank ordering may differ):

+
 Hello World from MPI task:            1 out of            4
+ Hello World from MPI task:            2 out of            4
+ Hello World from MPI task:            3 out of            4
+ Hello World from MPI task:            0 out of            4
+
+

Generally, we don't want to run MPI programs on the login node! Let's submit this as a job to the scheduler. Create a file named job.in and modify the file to contain the following:

+

#!/bin/bash
+
+#SBATCH --time=00:01:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=<your account here>
+
+module load openmpi/4.1.4-gcc
+
+srun -n 4 ./hello_mpi &> hello.out
+
+Be sure to replace the <your account here> with your account name.

+

Submit the job:

+

sbatch job.in

+

When the job is done, the file hello.out should contain the same output as you found before (the ordering of ranks may differ).

+

Compilers and Toolchains#

+

Fortran compilers#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CompilerCompiler ExecutableModule AvailSystems available on
gccgfortrangccKestrel, Swift, Vermilion
intelifortintel-oneapiKestrel, Swift, Vermilion
intelifortintel-classicKestrel
+

Fortran-MPI Toolchains#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CompilerMPICompiler ExecutableModule AvailSystems available on
gccopenmpimpifortopenmpiKestrel, Swift, Vermilion
intelopenmpimpifortopenmpi/4.1.x-intelKestrel
intelintelmpiifortintel-oneapi-mpiKestrel, Swift, Vermilion
gccMPICHmpifortmpichKestrel, Swift, Vermilion
intelMPICHmpifortmpich/4.0.2-intelKestrel only
crayMPICHftncray-mpichKestrel only
+

External Resources#

+ + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Julia/index.html b/Documentation/Development/Languages/Julia/index.html new file mode 100644 index 000000000..d3d47d82b --- /dev/null +++ b/Documentation/Development/Languages/Julia/index.html @@ -0,0 +1,4982 @@ + + + + + + + + + + + + + + + + + + + + + + + Julia - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Julia#

+

Julia is a dynamic programming language that offers high performance while being easy to learn and develop code in.

+

This section contains demos (in the form of scripts and notebooks) and how-to guides for doing various things with Julia on NREL HPC environments.

+

Available modules#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SwiftVermilionKestrel (CPU)Kestrel (GPU)
julia/1.6.2-ocsfign
julia/1.7.2-gdp7a25julia/1.7.2julia/1.7.2
julia/1.8.5-generic-linux
julia/1.10.0-gcc
julia/1.10.4
+
+

Julia 1.9.x does not work well on Sapphire Rapids

+

We advise against installing and using Julia 1.9.x on Kestrel as packages can fail to precompile and result in a segmentation fault. This is a known issue with Julia 1.9.x on Sapphire Rapids processors, possibly due to an LLVM issue. Julia 1.10 will be installed as a module once a stable release is available. Until then, please use Julia 1.7 or 1.8.

+
+

Contents#

+
    +
  1. Installing Julia
  2. +
  3. Tour of Julia
  4. +
  5. Parallel Computing in Julia
  6. +
  7. Calling Python, C, and FORTRAN from Julia
  8. +
+

Demo Scripts and Notebooks#

+

The following scripts and notebooks are available on the master branch of NREL/HPC to download and run,

+ +

Requirements and Installation#

+

Running the demos requires the python modules mpi4py and julia. For details on installing these modules, see the 'Environment Setup' section of the README found in the demos/scripts directory.

+

For more information on mpi4py, see the mpi4py documentation

+

For more information on PyJulia, see the PyJulia documentation.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Julia/julia_ccall_pycall/index.html b/Documentation/Development/Languages/Julia/julia_ccall_pycall/index.html new file mode 100644 index 000000000..b93844878 --- /dev/null +++ b/Documentation/Development/Languages/Julia/julia_ccall_pycall/index.html @@ -0,0 +1,5145 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + PyCall and CCall - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Calling Python, C, and FORTRAN from Julia#

+

The following sections describe Julia packages and native function calls that can be used to call Python, C, and FORTRAN libraries.

+

Calling Python#

+

We can use the PyCall.jl package to call Python code from Julia.

+
using PyCall
+
+
# The following makes it so that print statements in python will appear in this notebook
+# This is not necessary when using PyCall in a terminal based Julia instance
+pyimport("sys")."stdout" = PyTextIO(stdout)
+pyimport("sys")."stderr" = PyTextIO(stderr);
+
+

We can execute arbitrary Python code with the special Julia strings py"..." and py"""...""".

+
py"""
+import math
+class Point:
+    def __init__(self, x,y):
+        self.x = x
+        self.y = y
+    def distance(self, p):
+        return math.sqrt((self.x - p.x)**2 + (self.y - p.y)**2)
+"""
+
+p = py"Point(1.0, 2.0)"
+
+
PyObject <__main__.Point object at 0x7fa3d66bd340>
+
+

We can even use Julia's string interpolation to give values to the Python code:

+
x = rand()
+q = py"Point($(x), $rand())"
+
+
PyObject <__main__.Point object at 0x7fa3d66bdb80>
+
+

Attributes are directly accessible through the standard dot syntax:

+
@show p.x
+@show p.distance(q);
+
+
p.x = 1.0
+p.distance(q) = 1.7581695820873517
+
+

But say we have a module in Python that we want to call from Julia. We can do that too (otherwise this wouldn't be much use would it?). The pyimport function returns an object that gives us access to that modules functions:

+
np = pyimport("numpy")
+A = rand(3,3)
+b = rand(3)
+x = np.linalg.solve(A, b)
+@show maximum(abs.(A * x - b));
+
+
maximum(abs.(A * x - b)) = 1.1102230246251565e-16
+
+

In the previous slide A and b are created by Julia while x is created by Python, but we are using them interchangeably. We can do this because PyCall handles most type conversions automatically.

+
for x in [5.0, 2, ["a", "b"], Dict("a"=>rand(), "b"=>rand()), A]
+    @show typeof(x)
+    py"""print(type($x))"""
+end
+
+
typeof(x) = Float64
+<class 'float'>
+typeof(x) = Int64
+<class 'int'>
+typeof(x) = Vector{String}
+<class 'list'>
+typeof(x) = Dict{String, Float64}
+<class 'dict'>
+typeof(x) = Matrix{Float64}
+<class 'numpy.ndarray'>
+
+

Note that the matrix is converted to a numpy array if numpy is installed.

+

The same is true going from Python to Julia.

+
py"""
+objs = [{'a':1,'b':2}, [1, 'a', 3.0], 2.0+3j]
+for k in range(len(objs)):
+    $println($typeof(objs[k]))
+    print(type(objs[k]))
+"""
+
+
Dict{Any, Any}
+<class 'dict'>
+Vector{Any}
+<class 'list'>
+ComplexF64
+<class 'complex'>
+
+

We do need to be a little careful with some of Julia's less common types especially if we give it to python and bring it back:

+
a = Int32(5)
+@show typeof(a)
+@show typeof(py"$a");
+
+
typeof(a) = Int32
+typeof(py"$a") = Int64
+
+

In these cases, we may want to handle the conversion ourselves. One option is getting the raw PyObject back by using the py"..."o syntax and then calling an appropriate convert function:

+
@show typeof(a)
+@show typeof(py"$a"o)
+@show typeof(convert(Int32, py"$a"o));
+
+
typeof(a) = Int32
+typeof(py"$a"o) = PyObject
+typeof(convert(Int32, py"$a"o)) = Int32
+
+

Another way of handling (or preventing) type conversions is to use the pycall function.

+
pycall(np.random.normal, PyObject, size=3)
+
+
PyObject array([ 1.27173788, -0.55905635, -1.81371862])
+
+

Here we specified to leave the object as a raw PyObject (i.e. no type conversion at all)

+

We can also give it a Julia type to convert to

+
pycall(np.random.normal, Vector{ComplexF32}, size=3)
+
+
3-element Vector{ComplexF32}:
+ 0.82824904f0 + 0.0f0im
+ -1.8152742f0 + 0.0f0im
+  0.6555549f0 + 0.0f0im
+
+

Here we forced the type conversion to complex numbers with 32-bit precision for the real and imaginary parts.

+

But what if we need to call a Python function that requires a callback? Not a problem. PyCall will automatically convert Julia functions to Python callable objects!

+
si = pyimport("scipy.integrate")
+tk = 0.0:1e-2:10.0
+function my_ode(t::Float64, y::Vector{Float64})::Vector{Float64}
+    dy = zeros(length(y))
+    dy[1] = 5.0*y[1] - 5.0*y[1]*y[2]
+    dy[2] = y[1]*y[2] - y[2]
+    return dy
+end
+soln = si.solve_ivp(my_ode, (0.0, 10.0), [5.0, 1.0], t_eval=tk);
+
+
using Plots
+plot(soln["t"], soln["y"]')
+
+

svg

+
plot(soln["y"][1,:], soln["y"][2,:])
+
+

svg

+

For more details, see the PyCall github repo.

+

Calling C/FORTRAN Libraries#

+

Here we will discuss how to call a C library function from within Julia.

+

Calling a FORTRAN library function is the same except that FORTRAN compilers "mangle" the function names. This means that they are not precisely the same names as in the source code and you need to know what compiler was used to compile the FORTRAN library so you can determine the mangling scheme.

+

Note that the library we are calling must be compiled as a shared library.

+

As an example we will use the "silly" library that was written just for this.

+

Here are the functions available in the silly library: +

void fill_zeros(double *to_fill, int size);
+void fill_value(double *to_fill, int size, double value);
+void fill_cb(double *to_fill, int size, double (*func)(int));
+

+

To call one of these functions, we will use the builtin Julia function ccall:

+
N = 4
+my_vector = Vector{Float64}(undef, N)
+@show my_vector
+ccall((:fill_zeros,"fake-lib/libsilly"), # function and library
+    Cvoid, # return type
+    (Ref{Float64}, Cint), # argument types
+    my_vector, N # arguments
+)
+@show my_vector
+ccall((:fill_value,"fake-lib/libsilly"),
+    Cvoid,
+    (Ref{Float64}, Cint, Cdouble),
+    my_vector, N, pi
+)
+@show my_vector;
+
+
my_vector = [2.257468188e-314, 0.0, 2.257517705e-314, 2.257468188e-314]
+my_vector = [0.0, 0.0, 0.0, 0.0]
+my_vector = [3.141592653589793, 3.141592653589793, 3.141592653589793, 3.141592653589793]
+
+

What if we want to use a function that requires a callback (so one of its arguments is a function pointer)? We can create a pointer to a Julia function with the @cfunction macro.

+
function my_filler(index::Int)::Float64
+    return index / 10.0
+end
+cfunc = @cfunction(my_filler, Float64, (Int,))
+
+
Ptr{Nothing} @0x000000017ee10ec0
+
+

Now we call the C function with ccall as before. The type of the function pointer is Ptr{Cvoid}.

+
ccall((:fill_cb, "fake-lib/libsilly"),
+    Cvoid,
+    (Ref{Float64}, Cint, Ptr{Cvoid}),
+    my_vector, N, cfunc)
+@show my_vector;
+
+
my_vector = [0.0, 0.1, 0.2, 0.3]
+
+

For more details, see the Calling C and FORTRAN Code section of the Julia documentation. (If the link does not work, just google "julia call c library".)

+

A more complex example is provided by Ipopt.jl. You may also wish to look at the Ipopt library C API. The easiest way to do this is actually to just look at the header file at src/Interfaces/IpStdCInterface.h which is viewable at the Ipopt github repo.

+

Other Interfaces#

+

Interested in calling a function/library written in something other than Python, C or FORTRAN? Checkout the Julia Interop group on GitHub. Interfaces already exist for C++, MATLAB, Mathematica and R to name a few.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Julia/julia_install/index.html b/Documentation/Development/Languages/Julia/julia_install/index.html new file mode 100644 index 000000000..b48e442b4 --- /dev/null +++ b/Documentation/Development/Languages/Julia/julia_install/index.html @@ -0,0 +1,5182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Installing Julia - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Installation#

+

Julia modules exist on NREL HPC systems. Access simply with

+
module load julia
+
+

To see all available Julia modules on the system, use the command

+
module spider julia
+
+

However, if you need a version of Julia for which a module does not exist or want your own personal Julia build, there are several options described in the rest of this document. Below is a general guide for what approach to use:

+
    +
  • fast and easy - Anaconda
  • +
  • performance and ease - Spack
  • +
  • performance or need to customize Julia build - do it yourself (i.e. build from source)
  • +
+
+

Warning

+

Julia built on Kestrel CPU nodes will not work on the GPU nodes and vice versa. Always build Julia on the same type of compute node that you intend to run on.

+
+

Anaconda#

+

Older versions of Julia are available from conda-forge channel

+
conda create -n julia-env
+source activate julia-env
+conda install -c conda-forge julia
+
+

Spack Build#

+

Prerequisites#

+

A working version of Spack. For detailed instructions on getting Spack setup see the GitHub repository. Briefly, this can be done with the following

+
git clone https://github.com/spack/spack.git
+cd spack
+git checkout releases/v0.15 # Change to desired release
+. share/spack/setup-env.sh # Activate spack shell support
+
+

Instructions#

+
+

Info

+

Steps 1 and 2 may be skipped when using the develop branch or any release branch after v0.15.

+
+
    +
  1. In the Spack repository, open the file var/spack/repos/builtin/packages/julia/package.py in your favorite editor.
  2. +
  3. There is an if-else statement under the if statement +
    if spec.target.family == 'x86_64'  or spec.target.family == 'x86':
    +
    + Change the else clause to read +
    else:
    +    target_str = str(spec.target).replace('_','-')
    +    options += [
    +        'MARCH={0}'.format(target_str),
    +        'JULIA_CPU_TARGET={0}'.format(target_str)
    +    ]
    +
  4. +
  5. Now install Julia with Spack +
    spack install julia
    +
  6. +
+

Do It Yourself Build (v 1.2 or later)#

+

Prerequisites#

+

All the required build tools and libraries are available on the clusters either by default or through modules. The needed modules are covered in the instructions.

+

Terms#

+
    +
  • JULIA_HOME is the base directory of Julia source code (initially called julia after git clone)
  • +
+

Instructions#

+

When compiling Julia you can choose to compile against Intel's MKL libraries or OpenBLAS for the Julia linear algebra operations. If you are going to be doing significant matrix-vector operations directly in Julia, then you will want to compile it with MKL. If most of the matrix-vector operations are being done in a subprogram or library (e.g. Ipopt) then it will make no difference what you compile Julia with. In this latter case, it is recommended that you compile with OpenBLAS since that is significantly easier. Instructions for both choices are given below.

+
+

Note

+

When compiling Julia with MKL, Julia uses the single dynamic library option for linking. Any dynamic libraries (e.g. Ipopt or CoinHSL) loaded by Julia also need to be linked to MKL with this approach. Failing to do so will result in unusual behavior, e.g. getting garbage values passed to the MKL function calls.

+
+
    +
  1. Load the following modules:
      +
    • gcc (>= 5.1)
    • +
    • cmake (>= 3.4.3)
    • +
    • mkl (any version -- optional)
    • +
    +
  2. +
  3. Get the Julia source code +git clone https://github.com/JuliaLang/julia.git
  4. +
  5. cd julia
  6. +
  7. Change to the version of Julia you want to build git checkout <julia_version>
  8. +
  9. In Make.user (you will need to create the file if it doesn't exist) in JULIA_HOME put the following:
      +
    • If you want to compile Julia with MKL also add the following
        +
      • USE_INTEL_MKL=1 -- Use Intel versions of BLAS and LAPACK (this is why we loaded mkl module)
      • +
      • USE_BLAS64=0 -- Use the 64-bit library with the 32-bit integer interface. This will necessitate changes in Make.inc. The reasons for this are discussed in step 7.
      • +
      +
    • +
    +
    +

    Tip

    +

    I found it useful to create the file Make.user in another location (e.g. home directory) and drop a link into the Julia build directory as I used git clean -x -f -d to make sure everything is completely clean

    +
    +
  10. +
  11. (Skip to step 8 if compiling Julia without MKL.) There are a couple of problems to overcome when compiling Julia with MKL. The first is that a makefile in the SuiteSparse library package defines a USER variable that leads to problems with xalt/ld (a script that invokes ld). To fix this do the following:
      +
    • In JULIA_HOME fetch and unpack the SuiteSparse libraries +make -C deps/ extract-suitesparse
    • +
    • With your favorite editor, open the file +JULIA_HOME/deps/scratch/SuiteSparse-5.4.0/UMFPACK/Lib/Makefile
    • +
    • In the Makefile, do a global replace on USER --i.e. change all occurrences of the variable USER to something else like MUSER
    • +
    +
  12. +
  13. The second problem is that when compiling against MKL, Julia either uses the 32-bit MKL libraries or the 64-bit MKL libraries with 64-bit interface. It is common for other libraries (e.g. Ipopt or HSL) to compile against the 64-bit MKL libraries with 32-bit interface. This causes unusual behavior. To make Julia compile against the 64-bit MKL libraries with 32-bit interface, do the following:
      +
    • Open Make.inc in your favorite editor and make the following change
        +
      • find where MKLLIB is set (there will be an if-else statement depending on the value of USE_BLAS64)
      • +
      • change the else clause to read MKLLIB := $(MKLROOT)/lib/intel64
      • +
      +
    • +
    +
  14. +
  15. make -j4 -- -j4 allows make to use 4 processes to build and can speed up compilation (additional speed ups may be possible by increasing the number of processes)
  16. +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Julia/julia_parallel/index.html b/Documentation/Development/Languages/Julia/julia_parallel/index.html new file mode 100644 index 000000000..d4034c305 --- /dev/null +++ b/Documentation/Development/Languages/Julia/julia_parallel/index.html @@ -0,0 +1,5796 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Julia Parallel Computing - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Parallel Computing in Julia#

+

We will make use of the following basic Monte Carlo integration function throughout this presentation

+
using Statistics
+using BenchmarkTools # for the `@btime` macro
+
+function mc_integrate(f::Function, a::Real=0, b::Real=1, n::Int=100000)
+    ihat = 0.0
+    for k in 1:n
+        x = (b - a)*rand() + a
+        ihat += (f(x) - ihat) / k
+    end
+    return ihat
+end
+
+function intense_computation(t::Real)
+    sleep(t)
+    return rand()
+end;
+
+

Asynchronous Tasks#

+

What are Tasks?#

+

Tasks are execution streams that do not depend on each other and can be done in any order. They can be executed asynchronously but they are not executed in parallel. That is, only one task is running at a given time but the order of execution is not predetermined.

+

Tasks are also known as coroutines.

+

Creating and Running Tasks#

+

Running a task is done in 3 steps:

+
    +
  1. Creation
  2. +
  3. Scheduling
  4. +
  5. Collect Results
  6. +
+

Creating a task can be done directly with the Task object:

+
my_task = Task(()->mc_integrate(sin, -pi, pi))
+
+
Task (runnable) @0x000000011ecc0ab0
+
+

Note the Task constructor takes a function with no arguments.

+

We can always define an zero argument anonymous function to pass to the Task constructor. The @task macro exists for this purpose:

+
my_task = @task mc_integrate(sin, -pi, pi)
+
+
Task (runnable) @0x0000000136384cd0
+
+

Next we schedule the task to run using the schedule function

+
schedule(my_task)
+
+
Task (done) @0x0000000136384cd0
+
+

Many times we want to create and schedule a task immediately. We can do this with the @async macro:

+
my_task = @async mc_integrate(sin, -pi, pi)
+
+
Task (done) @0x000000011d14edc0
+
+

We can collect the results of the task once it has completed with the fetch function

+
fetch(my_task)
+
+
0.0020294747408654656
+
+

There are a few helpful details to know about fetch:

+
    +
  1. If the task has not finished when fetch is called, the call to fetch will block until the task has completed.
  2. +
  3. If the task raises an exception, fetch will raise a TaskFailedException which wraps the original exception.
  4. +
+

Remember that tasks are not inherently parallel, just asynchronous execution streams.

+
function run_mci()
+    N = 10
+    result = zeros(N)
+    for k in 1:N
+        result[k] = mc_integrate(sin, -pi, pi)
+    end
+    return mean(result)
+end
+
+function run_mci_task()
+    N = 10
+    task_res = zeros(N)
+    @sync for k in 1:N
+        @async(task_res[k] = mc_integrate(sin, -pi, pi))
+    end
+    return mean(task_res)
+end;
+
+
@btime run_mci()
+@btime run_mci_task();
+
+
  22.094 ms (1 allocation: 160 bytes)
+  24.318 ms (75 allocations: 4.78 KiB)
+
+
+

Note

+

The @sync macro will block at the end of the code block until all enclosed @async statements have completed execution.

+
+

Communicating Between Tasks#

+

Sometimes we need to communicate between tasks. An easy way to accomplish this is to use Julia's Channel type. We can think of a Channel like a pipe or a queue: objects are put in at one end and taken off at the other.

+

Let's rewrite run_mci_task to use channels by dividing the run_mci workflow into two functions.

+

The first function will perform small Monte-Carlo integrations and put the results on a channel with the put! function. When it has finished the requested number of computations it will close the channel with close and return.

+
function integrator(output::Channel{Float64}, N::Int)
+    for k in 1:N
+        result = mc_integrate(sin, -pi, pi)
+        put!(output, result)
+    end
+    close(output)
+    return
+end;
+
+
+

Note

+

If the channel is full, put! will block until space opens up.

+
+

The second function will take the results off the channel using the take! function and accumulate them into an average. We keep pulling results from the channel as long as there is a result or the channel is open. We can check the former with isready and the latter with isopen.

+
function accumulator(input::Channel{Float64})
+    mean_val = 0.0
+    k = 0
+    while isready(input) || isopen(input)
+        value = take!(input)
+        k += 1
+        mean_val += (value - mean_val) / k
+    end
+    return mean_val
+end;
+
+
+

Note

+

If the channel is empty, the take! function will block until there is an item available.

+
+

Now we create channel which can hold 10 results, create and schedule both tasks and finally fetch the result.

+
function run_mci_chan()
+    comm_ch = Channel{Float64}(10)
+    atask = @async accumulator(comm_ch)
+    @async integrator(comm_ch, 10)
+    result = fetch(atask)    
+    return result
+end;
+
+
@btime run_mci_chan();
+
+
  22.097 ms (25 allocations: 1.45 KiB)
+
+

Why Tasks?#

+

If tasks aren't parallel, why are we talking about them in a parallel computing tutorial?

+

Remeber that tasks are discrete computation units. They naturally define boundaries between computational tasks. Julia's native parallel capabilities are ways of scheduling tasks on other processors.

+

Multi-Threading#

+

Starting Julia with Multiple Threads#

+

Julia (v1.3 or greater) has multithreading built into the language. By default, Julia starts with a single thread. To start Julia with multiple threads either +* set the environment variable JULIA_NUM_THREADS to some value > 1 +* start Julia with --threads or -t option (Julia v1.5 or greater)

+

Once started, we can see how many threads are running with the function Threads.nthreads

+
Threads.nthreads()
+
+
2
+
+

@threads Macro#

+

Many computations take the form of looping over an array where the result of the computation is put into an element in the array and these computations do not interact. In this case, we can make use of the Threads.@threads macro.

+

Lets apply this to our Monte-Carlo integration.

+
function run_mci_mt()
+    N = 10
+    mt_res = zeros(N)
+    Threads.@threads for k in 1:N
+        mt_res[k] = mc_integrate(sin, -pi, pi)
+    end
+    return mean(mt_res)
+end;
+
+
@btime run_mci_mt();
+
+
  11.118 ms (12 allocations: 1.00 KiB)
+
+

@spawn Macro#

+

Some applications require dispatching individual tasks on different threads. We can do this using the Threads.@spawn macro. This is like the @async macro but will schedule the task on an available thread. That is, it creates a Task and schedules it but on an available thread.

+
function run_mci_mt2()
+    N = 10
+    mt_res = Vector{Float64}(undef, N)
+    @sync for k in 1:N
+        @async(mt_res[k] = fetch(Threads.@spawn mc_integrate(sin, -pi, pi)))
+    end
+    return mean(mt_res)
+end;
+
+
@btime run_mci_mt2();
+
+
  11.385 ms (126 allocations: 8.80 KiB)
+
+

There are a couple of oddities about Julia's multi-threading capability to remember:

+
    +
  1. An available thread is any thread that has completed all assigned tasks or any remaining tasks are blocked.
  2. +
  3. As of Julia 1.6, once a task has been assigned to a thread, it remains on that thread even after blocking operations. This will likely change in future releases of Julia.
  4. +
+

The combination of these two behaviors can lead to load imbalances amongst threads when there are blocking operations within a thread's tasks.

+

Using Channels#

+

Just as before, we can use a Channel to communicate between tasks in a multi-threaded environment. The only difference is that we replace @async with Threads.@spawn.

+
function run_mci_mt3()
+    comm_ch = Channel{Float64}(10)
+    itask = Threads.@spawn integrator(comm_ch, 10)
+    atask = Threads.@spawn accumulator(comm_ch)
+    result = fetch(atask)
+    return result
+end;
+
+
@btime run_mci_mt3();
+
+
  22.183 ms (35 allocations: 1.61 KiB)
+
+
+

Note

+

We can see from the timing results this is not the best way to distribute the work since the integrator function has much more computational work than the accumulator function.

+
+

Distributed Computing with Distributed.jl#

+

Architecture#

+

Communication patterns are one-sided, so users only manage one process. Communication itself takes the form of function or macro calls rather than explicit send and receive calls.

+

Distributed.jl is built on two basic types: remote calls and remote references. A remote call is a directive to execute a particular function on a particular process. A remote reference is a reference to a variable stored on a particular process.

+

There is a strong resemblance to the way Julia handles tasks: Function calls (wrapped in appropriate types) are scheduled on worker processes through remote calls which return remote references. The results of these calls are then retrieved by fetching the values using the remote references.

+

Setting Up#

+

We can launch more Julia processes on the same or other machines with the addprocs function. Here we launch 2 worker processes on the local machine:

+
using Distributed
+addprocs(2);
+
+

Each Julia process is identified by a (64-bit) integer. We can get a list of all active processes with procs:

+
@show procs();
+
+
procs() = [1, 2, 3]
+
+

There is a distinction between the original Julia process and those we launched. The original Julia process is often called the master process and always has id equal to 1. The launched processes are called workers. We can obtain a list of workers with the workers function:

+
@show workers();
+
+
workers() = [2, 3]
+
+

By default, distributed processing operations use the workers only.

+

We can also start up worker processes from the command lines using the -p or --procs option.

+

In order to launch Julia processes on other machines, we give addprocs a vector of tuples where each tuple is the hostname as a string paired with the number of processes to start on that host.

+

The Julia global state is not copied in the new processes. We need to manually load any modules and define any functions we need. This is done with the Distributed.@everywhere macro:

+
@everywhere using Statistics
+@everywhere function mc_integrate(f::Function, a::Real=0, b::Real=1, n::Int=100000)
+    ihat = 0.0
+    for k in 1:n
+        x = (b - a)*rand() + a
+        ihat += (f(x) - ihat) / k
+    end
+    return ihat
+end;
+
+

@distributed Macro#

+

The @distributed macro is the distributed memory equivalent of the Threads.@threads macro. This macro partitions the range of the for loop and executes the computation on all worker processes.

+
function run_mci_dist()
+    N = 10
+    total = @distributed (+) for k in 1:N
+        mc_integrate(sin, -pi, pi)
+    end
+    return total/N
+end;
+
+
@btime run_mci_dist();
+
+
  11.224 ms (157 allocations: 7.16 KiB)
+
+

Between the macro and the for loop is an optional reduction. Here we have used + but this can be any valid reduction operator including a user defined function. The values given to the reduction are the values of the last expression in the loop.

+
+

Note

+

If we do not provide a reduction, @distributed creates a task for each element of the loop and schedules them on worker processes and returns without waiting for the tasks to complete. To wait for completion of the tasks, the whole block can be wrapped with @sync macro.

+
+

@spawnat Macro#

+

Julia also provides more fine grained control for launching tasks on workers with the @spawnat Macro:

+
function run_mci_dist2()
+    N = 10
+    futures = Vector{Future}(undef, N)
+    for k in 1:N
+        futures[k] = @spawnat(:any, mc_integrate(sin, -pi, pi))
+    end
+    return mean(fetch.(futures))
+end;
+
+

The first argument to @spawnat is the worker to run the computation on. Here we have used :any indicating that Julia should pick a process for us. If we wanted to execute the computation on a particular worker, we could specify which one with the worker id value. The second argument is the expression to compute.

+

@spawnat returns a Future which is a remote reference. We call fetch on it to retrieve the value of the computation. Note that fetch will block until the computation is complete.

+
@btime run_mci_dist2();
+
+
  13.020 ms (1119 allocations: 44.34 KiB)
+
+
+

Warning

+

The entire expression is sent to the worker process before anything in the expression is executed. This can cause performance issues if we need a small part of a big object or array.

+
+
@everywhere struct MyData
+    Data::Vector{Float64}
+    N::Int
+end
+function slow(my_data::MyData)
+    return fetch(@spawnat(2, mean(rand(my_data.N))))
+end;
+
+
large_data = MyData(rand(1000000), 5)
+@btime slow(large_data);
+
+
  1.731 ms (108 allocations: 4.08 KiB)
+
+

This is easily fixed using a local variable:

+
function fast(my_data::MyData)
+    n = my_data.N
+    return fetch(@spawnat(2, mean(rand(n))))
+end;
+
+
@btime fast(large_data);
+
+
  192.843 μs (100 allocations: 3.80 KiB)
+
+

Remote Channels#

+

As suggested by the name, these are the remote versions of the Channel type we've already seen. If you look at the source code, they are actually wrap an AbstractChannel to provide the needed remote functionality. We can effectively treat them just like a Channel.

+

Let's redo our integrator - accumulator workflow, but this time let's do a better job of distributing the work:

+
@everywhere function integrator(output::RemoteChannel{Channel{Float64}}, N::Int)
+    for k in 1:N
+        result = mc_integrate(sin, -pi, pi)
+        put!(output, result)
+    end
+    put!(output, NaN)
+    return
+end;
+@everywhere function accumulator(input::RemoteChannel{Channel{Float64}}, nworkers::Int)
+    mean_val = 0.0
+    k = 0
+    finished = 0
+    while finished < nworkers
+        value = take!(input)
+        if value === NaN
+            finished += 1
+        else
+            k += 1
+            mean_val += (value - mean_val) / k
+        end
+    end
+    return mean_val
+end;
+
+
function run_mci_rc()
+    comm_ch = RemoteChannel(()->Channel{Float64}(10), 1)
+    @spawnat(2, integrator(comm_ch, 5))
+    @spawnat(3, integrator(comm_ch, 5))
+    atask = @async accumulator(comm_ch, nworkers())
+    return fetch(atask)
+end;
+
+

Here we create a RemoteChannel on the master process, divide the computationally intensive integrator function into two calls and remotely execute them on the worker processes. Then we start a task on the master process to accumulate the values and call fetch to wait for and retrieve the result.

+
@btime run_mci_rc();
+
+
  12.328 ms (1066 allocations: 41.97 KiB)
+
+

Shutting Down#

+

To shutdown the worker processes we can use rmprocs.

+
rmprocs(workers())
+
+
Task (done) @0x000000011cd3cde0
+
+

Alternatively, we can also just exit Julia and the workers will be shutdown as part of the exit process.

+

Distributed Computing with MPI.jl#

+ + +

Overview of MPI.jl#

+

MPI.jl is a Julia wrapper around an MPI library. By default it will download an MPI library suitable for running on the installing system. However, it is easily configured to use an existing system MPI implementation (e.g. one of the MPI modules on the cluster). See the documentation for instructions on how to do this.

+

MPI.jl mostly requires transmitted things to be buffers of basic types (types that are easily converted to C). Some functions can transmit arbitrary data by serializing them, but this functionality is not as fleshed out as in mpi4py.

+

Example#

+

We first need to load and initialize MPI.

+
using MPI
+MPI.Init()
+
+

MPI.Init loads the MPI library and calls MPI_Init as well as sets up types for that specific MPI library.

+

Now we can implement our Monte-Carlo integration workflow using MPI

+
function run_mci_mpi()
+
+    comm = MPI.COMM_WORLD
+    rank = MPI.Comm_rank(comm)
+    size = MPI.Comm_size(comm)
+
+    if rank == 0
+        N = 10
+        num = [N]
+    else
+        num = Vector{Int}(undef, 1)
+    end
+    MPI.Bcast!(num, 0, comm)
+
+    rank_sum = 0.0
+    for k in rank+1:size:num[1]
+        rank_sum += mc_integrate(sin, -pi, pi)
+    end
+
+    total = MPI.Reduce([rank_sum], MPI.SUM, 0, comm)
+    if rank == 0
+        result = total / N
+    else
+        result = nothing
+    end
+
+    return result
+end
+
+

To benchmark this we time it many (10000) times and track the minimal value (this is similar to what the @btime macro does).

+
function run_loop(nruns::Int)
+
+    min_time = 1e10
+    result = 0.0
+
+    for _ in 1:nruns
+        MPI.Barrier(MPI.COMM_WORLD)
+        start = time()
+        result = run_mci_mpi()
+        stop = time()
+        elapsed = stop - start
+        if elapsed < min_time
+            min_time = elapsed
+        end
+    end
+
+    if MPI.Comm_rank(MPI.COMM_WORLD) == 0
+        println("Elapsed time: ", min_time)
+    end
+
+    return
+end
+
+run_loop(10000)
+
+

Here are the results:

+
mpirun -n 2 julia mpi_mci.jl
+  Activating environment at `~/HPC_Apps/julia-tutorial/Project.toml`
+  Activating environment at `~/HPC_Apps/julia-tutorial/Project.toml`
+Elapsed time: 0.01108694076538086
+
+

GPU Computing#

+

We provide a brief survey of available packages that can be used to get started.

+

Packages exist for NVIDIA's CUDA, AMD's ROCm, and Intel's oneAPI. CUDA.jl is the most mature while the other two, as of this writing, are still underdevelopment.

+

The package KernelAbstractions.jl is an abstraction layer for enabling different GPU backends.

+

See the JuliaGPU organization's webpage or github repo for a great place to get started.

+

Additional Resources#

+

The following are great resource for learning more

+ + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Julia/julia_tour/index.html b/Documentation/Development/Languages/Julia/julia_tour/index.html new file mode 100644 index 000000000..f35a3dae6 --- /dev/null +++ b/Documentation/Development/Languages/Julia/julia_tour/index.html @@ -0,0 +1,6147 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Tour of Julia - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Tour of Julia#

+

"Julia aims to create an unprecedented combination of ease-of-use, power, and efficiency in a single language." --Julia Documentation

+

Why Julia?#

+

Feature Highlights:

+
    +
  • Designed for scientific computing
  • +
  • Non-vectorized code is just as fast as vectorized code
  • +
  • Designed for distributed and parallel computing
  • +
  • Call C/FORTRAN functions directly
  • +
  • Metaprogramming
  • +
+

Basics#

+

REPL (Read-Evaluate-Print-Loop)#

+
    +
  • Command line julia interface
  • +
  • Type the command julia in a terminal (assuming Julia is in your path)
  • +
  • Basic way to interact with objects, packages and environments
  • +
+

jmaack-32918s:~ jmaack$ julia
+               _
+   _       _ _(_)_     |  Documentation: https://docs.julialang.org
+  (_)     | (_) (_)    |
+   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
+  | | | | | | |/ _` |  |
+  | | |_| | | | (_| |  |  Version 1.6.1 (2021-04-23)
+ _/ |\__'_|_|_|\__'_|  |  
+|__/                   |
+
+
julia> 4 * pi^2 + sqrt(2)im
+39.47841760435743 + 1.4142135623730951im
+
+help?> Int
+search: Int Int8 Int64 Int32 Int16 Int128 Integer intersect intersect! InteractiveUtils InterruptException
+
+  Int64 <: Signed
+
+  64-bit signed integer type.
+
+julia> exit()
+

+
+

Tip

+

When using the REPL, the result of the (last) expression is always printed. This is sometimes undesirable. We can suppress printing by ending the last expression with a semicolon ;. This is used throughout this presentation for appearance purposes. Unless otherwise stated any semicolon in code is not needed.

+
+

Defining Functions#

+

There are two ways to define functions

+
    +
  • +

    Standard way:

    +
    function my_function(x)
    +    return x^2
    +end;
    +
    +
  • +
  • +

    Short form way:

    +
    my_func(x) = x^2;
    +
    +
  • +
+

It is also possible to define anonymous functions (and save pointers to them):

+
f = (x)->x^2;
+
+
@show my_function(pi)
+@show my_func(pi)
+@show f(pi);
+
+
my_function(pi) = 9.869604401089358
+my_func(pi) = 9.869604401089358
+f(pi) = 9.869604401089358
+
+
+

Info

+

Julia uses the standard control flow keywords such as for, while, if, elseif, else. See the Control Flow section of the Julia documentation for more details. Obviously, these are helpful in writing functions.

+
+

Using Installed Packages#

+

Packages can be accessed in two ways:

+
    +
  • +

    import statement -- makes all module attributes (i.e. functions and types) available by prefixing the module name followed by a dot

    +
    x = rand(5)
    +import Statistics
    +Statistics.mean(x)
    +
    +
    0.3339056277968421
    +
    +
  • +
  • +

    using statement -- everything exported by the module is directly accessible

    +
    using Statistics
    +mean(x)
    +
    +
    0.3339056277968421
    +
    +
  • +
+

Any attribute that is not exported by the module can still be accessed by prefixing the module name followed by a dot.

+
Statistics._conj(x)
+
+
5-element Vector{Float64}:
+ 0.17922586649673145
+ 0.7155842248637634
+ 0.29280412953665125
+ 0.10325841440419592
+ 0.3786555036828685
+
+
+

Note

+

Like in python, there are no private attributes. Users may access anything created by a module. Package authors can suggest attributes that users should not use by not exporting them or with naming conventions (e.g. prefixing _ to any name that is internal only).

+
+

Julia 1.6 introduced the "pythonic" import syntax

+
import Statistics as Stats
+Stats.mean(x)
+
+
0.3339056277968421
+
+

In older Julia versions, we can declare a constant for our packages

+
import Statistics
+const St = Statistics
+St.mean(x)
+
+
0.3339056277968421
+
+
+

Tip

+

When writing Julia code, use import rather than using. This makes code easier to follow as well as giving hints on where to look for documentation.

+
+

Vectorizing#

+

Julia uses the MATLAB dot syntax to operate component-wise on arrays (i.e. vectors and matrices)

+
x = rand(3)
+y = rand(3)
+(x.*y).^2
+
+
3-element Vector{Float64}:
+ 0.5367929263482071
+ 0.008092183589557244
+ 0.36146876615689527
+
+

Julia also extends this syntax to ANY function that operates on vector elements

+
number_op(x) = x + 5
+number_op.(x)
+
+
3-element Vector{Float64}:
+ 5.754141942494573
+ 5.8412967567631
+ 5.637813968303307
+
+

In Julia, vectorizing is done for convenience rather than performance:

+
function my_mult_for(x,y)
+    z = zeros(length(x))
+    for k in length(x)
+        z[k] = x[k] * y[k]
+    end
+    return z
+end
+
+function my_mult_vect(x,y)
+    return x .* y
+end;
+
+
# This forces Julia to compile the function definitions
+# so that the timing results in the next cell are correct
+x = rand(2)
+y = rand(2)
+@time my_mult_vect(x,y)
+@time my_mult_for(x,y);
+
+
  0.055219 seconds (145.07 k allocations: 8.243 MiB, 99.96% compilation time)
+  0.009099 seconds (15.42 k allocations: 873.090 KiB, 99.82% compilation time)
+
+
x = rand(10000)
+y = rand(10000)
+@time my_mult_vect(x,y)
+@time my_mult_for(x,y);
+
+
  0.000015 seconds (2 allocations: 78.203 KiB)
+  0.000032 seconds (2 allocations: 78.203 KiB)
+
+

Package Manager#

+

Managing Packages (REPL)#

+

Open the REPL and hit the [ key to enter package management mode. From here we can add or remove packages:

+
(@v1.6) pkg> add Compat
+   Resolving package versions...
+    Updating `~/.julia/environments/v1.6/Project.toml`
+  [34da2185] + Compat v3.31.0
+    Updating `~/.julia/environments/v1.6/Manifest.toml`
+  [34da2185] + Compat v3.31.0
+  [8bb1440f] + DelimitedFiles
+  [8ba89e20] + Distributed
+  [1a1011a3] + SharedArrays
+  [2f01184e] + SparseArrays
+  [10745b16] + Statistics
+
+(@v1.6) pkg> rm Compat
+    Updating `~/.julia/environments/v1.6/Project.toml`
+  [34da2185] - Compat v3.31.0
+    Updating `~/.julia/environments/v1.6/Manifest.toml`
+  [34da2185] - Compat v3.31.0
+  [8bb1440f] - DelimitedFiles
+  [8ba89e20] - Distributed
+  [1a1011a3] - SharedArrays
+  [2f01184e] - SparseArrays
+  [10745b16] - Statistics
+
+

We can also print out what packages are available +

(@v1.6) pkg> st
+      Status `~/.julia/environments/v1.6/Project.toml`
+  [7073ff75] IJulia v1.23.2
+  [438e738f] PyCall v1.92.3
+
+or update the packages +
(@v1.6) pkg> up
+    Updating registry at `~/.julia/registries/General`
+    Updating git-repo `https://github.com/JuliaRegistries/General.git`
+  No Changes to `~/.julia/environments/v1.6/Project.toml`
+  No Changes to `~/.julia/environments/v1.6/Manifest.toml`
+

+

Managing Packages (Scripts)#

+

Package management mode in the REPL is actually just a convenient interface to the Julia package Pkg.jl which is part of the Julia standard library.

+

All package mode commands are functions in Pkg.jl:

+
import Pkg; Pkg.add("Compat"); Pkg.rm("Compat")
+
+    Updating registry at `~/.julia/registries/General`
+    Updating git-repo `https://github.com/JuliaRegistries/General.git`
+   Resolving package versions...
+    Updating `~/.julia/environments/v1.6/Project.toml`
+  [34da2185] + Compat v3.31.0
+    Updating `~/.julia/environments/v1.6/Manifest.toml`
+  [34da2185] + Compat v3.31.0
+  [8bb1440f] + DelimitedFiles
+  [8ba89e20] + Distributed
+  [1a1011a3] + SharedArrays
+  [2f01184e] + SparseArrays
+  [10745b16] + Statistics
+    Updating `~/.julia/environments/v1.6/Project.toml`
+  [34da2185] - Compat v3.31.0
+    Updating `~/.julia/environments/v1.6/Manifest.toml`
+  [34da2185] - Compat v3.31.0
+  [8bb1440f] - DelimitedFiles
+  [8ba89e20] - Distributed
+  [1a1011a3] - SharedArrays
+  [2f01184e] - SparseArrays
+  [10745b16] - Statistics
+
+
Pkg.status(); Pkg.update()
+
+      Status `~/.julia/environments/v1.6/Project.toml`
+  [7073ff75] IJulia v1.23.2
+  [438e738f] PyCall v1.92.3
+    Updating registry at `~/.julia/registries/General`
+    Updating git-repo `https://github.com/JuliaRegistries/General.git`
+  No Changes to `~/.julia/environments/v1.6/Project.toml`
+  No Changes to `~/.julia/environments/v1.6/Manifest.toml`
+
+
+

Warning

+

If you want to use Julia within Jupyter notebook, some package management features (like adding new packages) do not work well. It is best to add/remove/update either with a script or using the REPL.

+
+

Environments#

+

Environments allow us to install different versions of packages for use with different projects. Very similar to python virtual environments or conda environments.

+
Pkg.activate("env-one"); Pkg.status()
+
+  Activating environment at `~/HPC_Apps/julia-tutorial/env-one/Project.toml`
+      Status `~/HPC_Apps/julia-tutorial/env-one/Project.toml`
+  [91a5bcdd] Plots v1.13.1
+
+
Pkg.activate("env-two"); Pkg.status()
+
+  Activating environment at `~/HPC_Apps/julia-tutorial/env-two/Project.toml`
+      Status `~/HPC_Apps/julia-tutorial/env-two/Project.toml`
+  [91a5bcdd] Plots v1.16.6
+
+

The environment names are given by the directory in which they reside. The explicitly added packages are given in the Project.toml file. The entire environment with all the required dependencies (down to specific commits) are in the Manifest.toml file.

+

Activating Environments#

+

There are 3 ways to activate an environment:

+
    +
  • Using the Pkg.activate function: +
    Pkg.activate("path/to/environment/")
    +
  • +
  • Within package management mode with the activate command: +
    activate path/to/environment
    +
  • +
  • From the command line with the --project option: +
    julia --project=<path/to/environment>
    +
  • +
+

The first 2 ways can also be used to create new environments.

+

Copying Environments#

+

To copy an environment, all you need is the Project.toml file. Put it in the desired directory and activate that environment. Finally, in package management mode, use the instantiate command:

+
(fake-env) pkg> st
+      Status `~/fake-env/Project.toml`
+ [da04e1cc] MPI v0.18.1
+        Info packages marked with  not downloaded, use `instantiate` to download
+
+(fake-env) pkg> instantiate
+   Installed MPI  v0.18.1
+    Building MPI  `~/.julia/scratchspaces/44cfe95a-1eb2-52ea-b672-e2afdf69b78f/494d99052881a83f36f5ef08b23de07cc7c03a96/build.log`
+Precompiling project...
+  1 dependency successfully precompiled in 2 seconds (11 already precompiled)
+
+
+

Note

+

Alternatively, you can use the Pkg.instantiate function.

+
+
+

Info

+

If you need to exactly copy an environment exactly copy both the Project.toml and Manifest.toml files into the desired directory and use the instantiate command.

+
+

Environment Layering#

+

Julia environments can be layered such that packages from more than just the top layer environment can be imported. This allows us to have access to debugging and development tools without putting them in whatever environment were working on. This is a major difference from conda environments.

+
Pkg.status()
+      Status `~/HPC_Apps/julia-tutorial/env-one/Project.toml`
+  [91a5bcdd] Plots v1.13.1
+
+
import BenchmarkTools as BT # THIS IS NOT IN OUR TOP ENVIRONMENT!!!
+
+

When loading a package, Julia has a hierarchy of environments that it checks for the package. Julia loads the first version of the package it encounters in this hierarchy. The environment hierarchy can be altered by the JULIA_LOAD_PATH environment variable.

+

These environment stacks are discussed more in the Environments subsection of the Code Loading part of the Julia Manual.

+

Types#

+

Type Hierarchy#

+

In Julia everything has a type. We can access an object's type with the typeof function:

+
typeof(7.5)
+
+
Float64
+
+

Even types have a type:

+
typeof(Float64)
+
+
DataType
+
+

Julia also has a type hierarchy. There are subtypes and supertypes. We can access explore these with the functions subtypes and supertype:

+
subtypes(Float64)
+
+
Type[]
+
+
supertype(Float64)
+
+
AbstractFloat
+
+

Float64 has no subtypes because it is a Concrete Type. All the supertypes are an Abstract Type. Only Concrete Types can actually exist.

+

Every type has only one immediate supertype. However, each supertype has a supertype. We can get the whole chain with the supertypes (plural) function:

+
supertypes(Float64)
+
+
(Float64, AbstractFloat, Real, Number, Any)
+
+

Let us see all the floating point types available in Julia:

+
subtypes(AbstractFloat)
+
+
4-element Vector{Any}:
+ BigFloat
+ Float16
+ Float32
+ Float64
+
+

We can test whether or not a type is a subtype of something with the <: operator:

+
Float64 <: AbstractFloat
+
+
true
+
+
Float64 <: Float64
+
+
true
+
+
Int <: AbstractFloat
+
+
false
+
+
+

Warning

+

Subtypes and supertypes get complicated when dealing with containers:

+
+
Float64 <: Real
+
+
true
+
+
Vector{Float64} <: Vector{Real}
+
+
false
+
+
Vector{Float64} <: Vector
+
+
true
+
+

We can use this to write functions:

+
function my_abs_sub(x)
+    if typeof(x) <: Complex
+        println("Complex!")
+        return sqrt(x.re^2 + x.im^2)
+    elseif typeof(x) <: Real
+        println("Real!")
+        return x < 0 ? -x : x
+    else
+        error("Not a number!")
+    end
+end
+@show my_abs_sub(-5)
+@show my_abs_sub(-5.0)
+@show my_abs_sub(-1 + 2im);
+
+
Real!
+my_abs_sub(-5) = 5
+Real!
+my_abs_sub(-5.0) = 5.0
+Complex!
+my_abs_sub(-1 + 2im) = 2.23606797749979
+
+

Multiple Dispatch#

+

A more Julia way of doing this is to write the typing information directly into the function definition:

+
function my_abs_md(x::Real)
+    println("Multiple Dispatch Real!")
+    return x < 0 ? -x : x
+end
+function my_abs_md(x::Complex)
+    println("Multiple Dispatch Complex!")
+    return sqrt(x.re^2 + x.im^2)
+end
+@show my_abs_md(-5)
+@show my_abs_md(-1 + 2im);
+
+
Multiple Dispatch Real!
+my_abs_md(-5) = 5
+Multiple Dispatch Complex!
+my_abs_md(-1 + 2im) = 2.23606797749979
+
+

Notice that the functions have the same name, but the correct one is executed based on the type of the argument. This is called Multiple Dispatch.

+
+

Tip

+

Add typing information for any function you are likely to use a lot. There are two reasons:

+
    +
  1. Type information is used by the Julia compiler to make code more efficient
  2. +
  3. Type information is a fast and easy way to document your code and catch bugs.
  4. +
+
+

Structs#

+

Defining Structs#

+

Julia allows us to define our own (composite) types:

+
struct Point
+    x::Float64
+    y::Float64
+end
+p0 = Point(0, 0)
+p1 = Point(1.0, 2.0)
+
+
Point(1.0, 2.0)
+
+

We can define functions with this type as the argument now

+
function distance(p::Point, q::Point)
+    return sqrt((p.x - q.x)^2 + (p.y - q.y)^2)
+end
+distance(p0, p1)
+
+
2.23606797749979
+
+

We can build structs with other structs as components:

+
struct Circle
+    center::Point
+    radius::Float64
+end
+
+my_circle = Circle(p1, 5)
+
+
Circle(Point(1.0, 2.0), 5.0)
+
+
function is_in(p::Point, c::Circle)
+    return distance(p, c.center) < c.radius
+end
+@show is_in(p0, my_circle)
+@show is_in(Point(100,0), my_circle);
+
+
is_in(p0, my_circle) = true
+is_in(Point(100, 0), my_circle) = false
+
+

Mutable Structs#

+

What if we want to change the radius of the circle?

+
my_circle.radius = 10.0 # Causes an error!!
+
+
setfield! immutable struct of type Circle cannot be changed
+
+
+
+Stacktrace:
+
+ [1] setproperty!(x::Circle, f::Symbol, v::Float64)
+
+   @ Base ./Base.jl:34
+
+ [2] top-level scope
+
+   @ In[34]:1
+
+ [3] eval
+
+   @ ./boot.jl:360 [inlined]
+
+ [4] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
+
+   @ Base ./loading.jl:1116
+
+

Structs are immutable (cannot be changed) by default in Julia. This allows for some optimizations behind the scenes and most of the time we do not need to change the values in a Struct.

+

If we need to change fields in a struct, we add the mutable keyword:

+
mutable struct MutableCircle
+    center::Point
+    radius::Float64
+end
+my_mutable_circle = MutableCircle(p1, 5.0)
+@show my_mutable_circle
+my_mutable_circle.radius = 10.0
+@show my_mutable_circle;
+
+
my_mutable_circle = MutableCircle(Point(1.0, 2.0), 5.0)
+my_mutable_circle = MutableCircle(Point(1.0, 2.0), 10.0)
+
+

Parametric Types#

+

Let us go back to our Point type:

+
struct Point
+    x::Float64
+    y::Float64
+end
+
+

We locked in the types in the fields of this struct. What if we want to use a Point struct with a different type? Such as an Int. We use a Parametric Type.

+

We define a Parametric Type in the following way:

+
struct ParametricPoint{R <: Real}
+    x::R
+    y::R
+end
+
+function distance(p::ParametricPoint{<:Real},
+        q::ParametricPoint{<:Real})
+    return sqrt((p.x - q.x)^2 + (p.y - q.y)^2)
+end;
+
+
p0 = ParametricPoint(1, -1)
+@show typeof(p0)
+p1 = ParametricPoint(2.0, 0.0)
+@show typeof(p1)
+@show distance(p0,p1);
+
+
typeof(p0) = ParametricPoint{Int64}
+typeof(p1) = ParametricPoint{Float64}
+distance(p0, p1) = 1.4142135623730951
+
+

Metaprogramming#

+

How Julia Code is Executed#

+

At a very high level, Julia code is executed in two phases:

+
    +
  1. Parsing a string and turning it into an expression
  2. +
  3. Evaluating that expression
  4. +
+

Expressions#

+

Julia code is parsed and turned into expressions. These expressions are themselves Julia data structures.

+
expr = Meta.parse("z^2 + 1")
+expr
+
+
:(z ^ 2 + 1)
+
+

While the expression prints as a human-readable mathematical expression, it is actually a tree:

+
dump(expr)
+
+
Expr
+  head: Symbol call
+  args: Array{Any}((3,))
+    1: Symbol +
+    2: Expr
+      head: Symbol call
+      args: Array{Any}((3,))
+        1: Symbol ^
+        2: Symbol z
+        3: Int64 2
+    3: Int64 1
+
+

Since this is a data structure, we can change the expression

+
expr.args[1] = :-
+expr.args[2].args[1] = :*
+expr
+
+
:(z * 2 - 1)
+
+

Then evaluate it

+
z = 3
+@show eval(expr)
+z = 2.5
+@show eval(expr);
+
+
eval(expr) = 5
+eval(expr) = 4.0
+
+

Note we gave z a value after we wrote the expression.

+

Macros#

+

A macro is a special function that takes expressions, symbols and literal values as arguments and returns an expression. The biggest difference between a macro and a normal function is that a macro is executed during the parse phase. This means that in a macro we have access to the expression!

+

Let's take a look at the @assert macro:

+
x = 5; y = 4;
+@assert x == y
+
+
AssertionError: x == y
+
+
+
+Stacktrace:
+
+ [1] top-level scope
+
+   @ In[42]:2
+
+ [2] eval
+
+   @ ./boot.jl:360 [inlined]
+
+ [3] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
+
+   @ Base ./loading.jl:1116
+
+

The error contains the expression that caused the error! This is not possible to do with a function because that expression is not available at runtime.

+

How do we write macros? More or less like we write functions but using the macro keyword instead of the function keyword:

+
macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+    x = [gensym() for _ in 1:nargs]
+    quote
+        $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+    end
+end
+
+
@fadd (macro with 1 method)
+
+

This macro takes two functions and creates an expression that for a function that computes the sum of the two. It is actually generating code!

+
p(x) = x^2
+q(x) = (2x + 5) / x^2
+@fadd(h, p, q, 1)
+@show p(pi) + q(pi)
+@show h(pi);
+
+
p(pi) + q(pi) = 11.012830091668627
+h(pi) = 11.012830091668627
+
+

We can look at the expression that the macro generates with the macro @macroexpand:

+
@macroexpand(@fadd(h, p, q, 1))
+
+
quote
+    #= In[43]:4 =#
+    h(var"#73###258") = begin
+            #= In[43]:4 =#
+            p(var"#73###258") + q(var"#73###258")
+        end
+end
+
+

Ignoring all the stuff with # symbols we can see that the expression returned by the macro looks more or less like a function definition.

+

Having seen how this works let's unpack the macro definition a bit more. For context, here's the whole definition again:

+
macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+    x = [gensym() for _ in 1:nargs]
+    quote
+        $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+    end
+end
+
+

We'll unpack it one line at a time.

+

Having seen how this works let's unpack the macro definition a bit more. For context, here's the whole definition again:

+
macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+    x = [gensym() for _ in 1:nargs]
+    quote
+        $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+    end
+end
+
+

First Line:

+
macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+    ...
+end
+
+

The macro definition looks a lot like a function definition but with macro instead of function.

+

Second Line:

+
    x = [gensym() for _ in 1:nargs]
+
+

Here we create a vector of symbols of size nargs. The gensym function generates a symbol for a variable that is guaranteed not to clash with existing variables. These symbols will be the arguments of our new function.

+

Third Line:

+
    quote
+        # expression here
+    end
+
+

This is an easy way to generate an expression. The contents of this block is the expression returned by the macro.

+

Fourth Line:

+
        $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+
+

This is the meat of the macro and it may seem a bit much at first. However, each term is essentially the same. So let's just focus on the left hand side of the equality.

+
        $(esc(name))($(x...))
+
+
    +
  • The name variable is local to the macro. It's value is what we want to put into the expression. So we interpolate it into the expression using $.
  • +
  • However, we want that symbol to be evaluated in the context in which the macro was called. So we tell Julia to leave the value as is with the esc function.
  • +
  • Without the call to esc, Julia will assume that the variable is local and needs to be renamed with gensym transformed so that it will not clash with other variables.
  • +
  • Finally, we want to interpolate the contents of the vector x into the expression. This is done with the splat operator ... in conjunction with $.
  • +
+

Why can't we just write a function to do this? Let's try:

+
function fadd(name, f::Function, g::Function, nargs::Int)
+    x = [gensym() for _ in 1:nargs]
+    [WHAT HERE?](x...) = f(x...) + g(x...)
+    return [WHAT TO RETURN?]
+end
+
+

There are a couple problems here:

+
    +
  1. What do we put for the function name? We want the value of the argument name. If we just put name we would end up with a function called name.
  2. +
  3. What do we return? Even if we knew what to name the function, that name is only bound to the function in our current scope--the function fadd. Once we return from fadd, the name is no longer bound to this function.
  4. +
+

If we do not care about creating function names, we could construct and return an anonymous function:

+
function fadd(f::Function, g::Function, nargs::Int)
+    x = [gensym() for _ in 1:nargs]
+    return (x...)->(f(x...) + g(x...))
+end
+h1 = fadd(p,q,1)
+h1(pi)
+
+
11.012830091668627
+
+

This gets us pretty close to the same functionality since we could assign the function pointer to any valid variable name.

+

However, we did not maximize the value of the macro. We can actually generate documentation for our function as well:

+
macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+    x = [gensym() for _ in 1:nargs]
+    local help = "Functions $f and $g added together. Created with the `@fadd` macro!"
+    quote
+        @doc string($help)
+        $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+    end
+end
+@fadd(h,p,q,1);
+
+
?h
+
+
Functions p and q added together. Created with the `@fadd` macro!
+
+

Other Resources#

+

The Julia Documentation is a great place to read about Julia features. Numerous examples are normally given along with detailed explanation.

+

The official Julia website is a great place to find Julia tutorials, learn about the Julia community or discover research using Julia.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Python/dask/index.html b/Documentation/Development/Languages/Python/dask/index.html new file mode 100644 index 000000000..2463ae8d9 --- /dev/null +++ b/Documentation/Development/Languages/Python/dask/index.html @@ -0,0 +1,5158 @@ + + + + + + + + + + + + + + + + + + + + + + + Dask - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Dask#

+

Dask is a framework for parallelizing Python code. The most common use case is to enable Python programmers to scale scientific and machine learning analyses to run on distributed hardware. Dask has similarities to Apache Spark (see FAQ for comparison), but Dask is more Python native and interfaces with common scientific libraries such as NumPy and Pandas.

+

Installation#

+
+

Warning

+

Conda environments should be always be installed outside of your home directory for storage and performance reasons. This is especially important for frameworks like Dask, whose parallel processes can particularly strain the /home filesystem. Please refer to our dedicated conda documentation for more information on how to setup your conda environments to redirect the installation outside of /home by default.

+
+

Dask can be installed via Conda/Mamba. For example, to install Dask into a new environment from conda-forge into your /projects allocation folder, first load the appropriate conda (or mamba) module (e.g., module load mamba on Kestrel), and then run the following on a compute node.

+
# Be sure to replace "<allocation_handle>" with your HPC project.
+
+# interactive job
+salloc -A <allocation_handle> -p debug -t 01:00:00
+
+# load mamba module
+ml mamba
+
+# create and activate `dask-env` environment with Python 3.12
+mamba create --prefix=/projects/<allocation_handle>/dask-env conda-forge::python=3.12 conda-forge::dask
+conda activate /projects/<allocation_handle>/dask-env
+
+

This installs Dask along with common dependencies such as NumPy. Additionally, the dask-jobqueue package (discussed below), can be installed via:

+
mamba install conda-forge::dask-jobqueue
+
+

Further, there is the dask-mpi package (also discussed below). To ensure compatibility with the system MPI libraries, it is recommended to install dask-mpi using pip. As such, we recommending installing any conda packages first. dask-mpi depends on mpi4py, although we have found that the pip install command does not automatically install mpi4py, so we install it explicitly. Also, installation of mpi4py will link against the system libraries, so the desired MPI library should be loaded first. In addition, it may be necessary to explicitly specify the MPI compiler driver. For example, to install mpi4py on Kestrel using the Intel programming environment and its associated MPI (PrgEnv-intel), you would do the following:

+
module load PrgEnv-intel
+MPICC=`which mpicc` pip install dask-mpi mpi4py
+
+

Dask single node#

+

Dask can be used locally on your laptop or an individual node. Additionally, it provides wrappers for multiprocessing and threadpools. One advantage of using LocalCluster is that you can easily drop in another cluster configuration to further parallelize, with minimal modification of the code.

+

The following is a simple example that uses a local cluster with the dask.delayed interface, which can be used when the problem doesn't fit into one of the built-in collection types such as dask.array or dask.dataframe:

+
+Dask local cluster +
from distributed import Client, LocalCluster
+import dask
+import time
+import random 
+
+@dask.delayed
+def inc(x):
+    time.sleep(random.random())
+    return x + 1
+
+@dask.delayed
+def dec(x):
+    time.sleep(random.random())
+    return x - 1
+
+@dask.delayed
+def add(x, y):
+    time.sleep(random.random())
+    return x + y
+
+def main ():
+   cluster = LocalCluster(n_workers=2)
+   client = Client(cluster)
+   zs = []
+   for i in range(256):
+      x = inc(i)
+      y = dec(x)
+      z = add(x, y)
+      zs.append(z)
+
+   result = dask.compute(*zs)
+   print (result)
+
+
+if __name__ == "__main__":
+   main()
+
+
+

Dask Jobqueue#

+

The dask-jobqueue library makes it easy to deploy Dask to a distributed cluster using Slurm (via SLURMCluster). This is particularly useful when running an interactive notebook, where the workers can be scaled dynamically.

+

For the following example, first make sure that both dask and dask-jobqueue have been installed. Create a file named dask_slurm_example.py with the following contents, and replace <project> with your project allocation.

+

Assuming you are on Kestrel, this example will request two jobs from the shared partition.

+
+dask_slurm_example.py +
from dask_jobqueue import SLURMCluster
+import socket
+from dask.distributed import Client
+from collections import Counter
+
+cluster = SLURMCluster(
+   cores=18,
+   memory='24GB',
+   account='<allocation_handle>',
+   walltime='00:30:00',
+   processes=17,
+   queue='shared'
+)
+
+client = Client(cluster)
+
+def test():
+   return socket.gethostname()
+
+result = []
+cluster.scale(jobs=2)
+
+for i in range(2000):
+   result.append(client.submit(test).result())
+
+print(Counter(result))
+print(cluster.job_script())
+
+
+

Then the script can simply be executed directly from a login node:

+
python dask_slurm_example.py
+
+

Note that although 2 jobs are requested, Dask launches the jobs dynamically, so depending on the status of the job queue, your results may indicate that only a single node was used.

+

Dask MPI#

+

Dask also provides a package called dask-mpi that uses MPI to create the cluster. Note that dask-mpi only uses MPI to start the cluster, not for inter-node communication.

+

Dask-MPI provides two interfaces to launch Dask, either from a batch script using the Python API, or from the command line.

+

Here we show a simple example that uses Dask-MPI with a batch script. Make sure that you have installed dask-mpi following the Installation Instructions. Create dask_mpi_example.py and dask_mpi_launcher.sh with the contents below. In dask_mpi_launcher.sh, replace <project> with your allocation, and /path/to/dask-env with the full conda prefix path into which you installed dask.

+
+dask_mpi_example.py +
from dask_mpi import initialize
+from dask.distributed import Client
+import socket
+import time
+from collections import Counter
+
+def test():
+   return socket.gethostname()
+
+def main():
+   initialize(nthreads=5)
+   client = Client()
+   time.sleep(15)
+
+   result = []
+
+   for i in range (0,100):
+      result.append(client.submit(test).result())
+      time.sleep(1)
+
+   out = str(Counter(result))
+   print(f'nodes: {out}')
+
+main()
+
+
+
+dask_mpi_launcher.sh +
#!/bin/bash 
+#SBATCH --nodes=2
+#SBATCH --ntasks=4
+#SBATCH --time=10
+#SBATCH --account=<project>
+
+ml mamba
+conda activate /path/to/dask-env
+srun -n 4 python dask_mpi_example.py
+
+
+

The job is then launched as:

+
sbatch dask_mpi_launcher.sh
+
+
+

Warning

+

We have observed errors such as distributed.comm.core.CommClosedError when using dask-mpi. These errors may be related to known issues such as GitHub Issue #94. Users that experience issues with dask-mpi are encouraged to use dask-jobqueue instead.

+
+

References#

+

Dask documentation

+

Dask Jobqueue

+

Dask MPI

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/Python/index.html b/Documentation/Development/Languages/Python/index.html new file mode 100644 index 000000000..c8f6538f2 --- /dev/null +++ b/Documentation/Development/Languages/Python/index.html @@ -0,0 +1,4896 @@ + + + + + + + + + + + + + + + + + + + + + + + Python - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Python#

+

NREL HPC Tutorials#

+ +

HPC Python#

+

Links to External resources:

+
    +
  • MPI4PY: Python bindings to use MPI to distribute computations across cluster nodes
  • +
  • Dask: Easily launch Dask workers on one node or across nodes
  • +
  • Numba: Optimize your Python code to run faster
  • +
  • PyCUDA: Utilize GPUs to accelerate computations
  • +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/bash/index.html b/Documentation/Development/Languages/bash/index.html new file mode 100644 index 000000000..26aa93f54 --- /dev/null +++ b/Documentation/Development/Languages/bash/index.html @@ -0,0 +1,5082 @@ + + + + + + + + + + + + + + + + + + + + + + + Intro to Bash Scripting - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

An Introduction to Bash Scripting#

+

Bash (Bourne Again Shell) is one of the most widely available and used command line shell applications. Along with basic shell functionality, it offers a wide variety of features which, if utilized thoughtfully, can create powerful automated execution sequences that run software, manipulate text and files, parallelize otherwise single-process software, or anything else you may want to do from the command line.

+

Shell scripts are also one of the most common ways our HPC community submits jobs, and running a large parallel workload often requires some initialization of the software environment before meaningful computations can begin. This typically involves tasks such as declaring environment variables, preparing input files or staging directories for data, loading modules and libraries that the software needs to run, preparing inputs, manipulating datasets, and so on. Bash can even be used to launch several single-core jobs, effectively taking on the role of an ad hoc batch executor, as well.

+

This article provides a brief introduction to bash, as well as a list of tips, tricks, and good practices when it comes to writing effective bash scripts that can apply widely in both HPC and non-HPC environments. We will also provide links to some additional resources to help further your bash scripting skills.

+

Executing/Invoking Scripts#

+

All of bash commands work at the command prompt "live", i.e. interpreted line-by-line as you type commands and press enter. A bash "script" may be regarded as a list of bash commands that have been saved to a file for convenience, usually with some basic formatting, and possibly comments, for legibility.

+

All bash scripts must begin with a special character combination, called the "shebang" or #! character, followed by the name of an interpreter:

+

#!/bin/bash

+

This declares that the contents of the file that follow are to be interpreted as commands, using /bin/bash as the interpreter. This includes commands, control structures, and comments.

+

Plenty of other interpreters exist. For example, Python scripts begin with: #!/usr/bin/python or /usr/bin/env python, perl scripts: #!/usr/bin/perl, and so on.

+

Bash Scripting Syntax#

+

If you read a bash script, you may be tempted to default to your usual understanding of how code generally works. For example, with most languages, typically there is a binary or kernel which digests the code you write (compilers/gcc for C, the python interpreter/shell, Java Virtual Machine for Java, and so on.) The binary/kernel/interpreter then interprets the text into some sort of data structure which enforces the priority of certain commands over others, and finally generates some execution of operations based on that data structure.

+

Bash isn't too far off from this model, and in some respects functions as any other interpreted language: you enter a command (or a control structure) and it is executed.

+

However, as a shell that also serves as your major interface to the underlying operating system, it does have some properties and features that may blur the lines between what you think of as 'interpreted' versus 'compiled'.

+

For instance, many aspects of the bash "language" are actually just the names of pre-compiled binaries which do the heavy lifting. Much the same way you can run python or ssh in a command line, under the hood normal bash operations such as if, echo, and exit are actually just programs that expect a certain cadence for the arguments you give it. A block such as:

+

if true; then echo "true was true"; fi
+
+This is really just a sequence of executing many compiled applications or shell built-ins with arguments; the names of these commands were just chosen to read as a typical programming grammar.

+

A good example is the program [ which is just an oddly-named command you can invoke. Try running which [ at a command prompt. The results may surprise you: /usr/bin/[ is actually a compiled program on disk, not a "built-in" function!

+

This is why you need to have a space between the brackets and your conditional, because the conditional itself is passed as an argument to the command [. In languages like C it's common to write the syntax as if (conditional) { ...; }. However, in bash, if you try to run if [true] you will likely get an error saying there isn't a command called [true] that you can run. This is also why you often see stray semicolons that seem somewhat arbitrary, as semicolons separate the execution of two binaries. Take this snippet for example: +

echo "First message." ; echo "Second message."
+
+This is equivalent to: +
echo "First message."
+echo "Second message."
+
+In the first snippet, if the semicolon was not present, the second echo would be interpreted as an argument to the first echo and would end up outputting: First message. echo Second message.

+

Bash interprets ; and \n (newline) as separators. If you need to pass these characters into a function (for example, common in find's -exec flag) you need to escape them with a \. This is useful for placing arguments on separate lines to improve readability like this example: +

chromium-browser \
+--start-fullscreen \
+--new-window \
+--incognito \
+'https://google.com'
+

+

Similarly, normal if-then-else control flow that you would expect of any programming/scripting language has the same caveats. Consider this snippet: +

if true
+then
+  echo "true is true"
+else
+  echo "false is true?"
+fi
+
+If we break down what's essentially happening here (omitting some of the technical details):

+
    +
  • if invokes the command true which always exits with a successful exit code (0)
  • +
  • if interprets a success exit code (0) as a truism and runs the then.
  • +
  • the then command will execute anything it's given until else, elif, or fi
  • +
  • the else command is the same as then but will only execute if if returned an erroneous exit code.
  • +
  • the fi command indicates that no more conditional branches exist relative to the logical expression given to the original if.
  • +
+

All this to say, this is why you often see if-then-else blocks written succinctly as if [ <CONDITIONAL> ]; then <COMMANDS>; fi with seemingly arbitrary semicolons and spaces. It is exactly why things work this way that bash is able to execute arbitrary executables (some of which you may end up writing) and not require something like Python's subprocess module.

+

This is just to give you an understanding for why some of the syntax you will encounter is the way it is. Everything in bash is either a command or an argument to a command.

+

Parentheses, Braces, and Brackets#

+

Bash utilizes many flavors of symbolic enclosures. A complete guide is beyond the scope of this document, but you may see the following:

+
    +
  • ( ) - Single parentheses: run enclosed commands in a subshell
      +
    • a='bad';(a='good'; mkdir $a); echo $a +result: directory "good" is made, echoes "bad" to screen
    • +
    +
  • +
  • $( ) - Single parentheses with dollar sign: subshell output to string(command substitution) (preferred method)
      +
    • echo "my name is $( whoami )" +result: prints your username
    • +
    +
  • +
  • <( ) - Parentheses with angle bracket: process substitution
      +
    • sort -n -k 5 <( ls -l ./dir1) <(ls -l ./dir2) +result: sorts ls -l results of two directories by column 5 (size)
    • +
    +
  • +
  • [ ] - Single Brackets: truth testing with filename expansion or word splitting
      +
    • if [ -e myfile.txt ]; then echo "yay"; else echo "boo"; fi +result: if myfile.txt exists, celebrate
    • +
    +
  • +
  • { } - Single Braces/curly brackets: expansion of a range
  • +
  • ${ } - Single braces with dollar sign: expansion with interpolation
  • +
  • ` ` - Backticks: command/process substitution
  • +
  • (( )) - Double parentheses: integer arithmetic
  • +
  • $(( )) - Double parentheses with dollar sign: integer arithmatic to string
  • +
  • [[ ]] - Double brackets: truth testing with regex
  • +
+

Additional Notes on ( ) (Single Parentheses)#

+

There are 3 features in Bash which are denoted by a pair of parentheses, which are Bash subshells, Bash array declarations, and Bash function declarations. See the table below for when each feature is enacted:

+ + + + + + + + + + + + + + + + + + + + + +
SyntaxBash Feature
Command/line begins with (Run the contained expression(s) in a subshell. This will pass everything until a closing ) to a child-fork of Bash that inherits the environment from the invoking Bash instance, and exits with the exit code of the last command the subshell exitted with. See the section on subshells for more info.
A valid Bash identifier is set equal to a parnethetically enclosed list of items
(.e.g. arr=("a" "b" "c") )
Creates a Bash array with elements enclosed by the parentheses. The default indexing of the elements is numerically incremental from 0 in the given order, but this order can be overridden or string-based keys can be used. See the section on arrays for more info.
A valid Bash identifier is followed by () and contains some function(s) enclosed by { }
(i.e. func() { echo "test"; } )
Declare a function which can be re/used throughout a Bash script. See the either of "{ }" or functions for more info.
+

Examples of Enclosure Usage#

+

Note that whitespace is required, prohibited, or ignored in certain situations. See this block for specific examples of how to use whitespace in the various contexts of parantheses. +

### Subshells
+(echo hi)   # OK
+( echo hi)  # OK
+(echo hi )  # OK
+( echo hi ) # OK
+
+### Arrays
+arr=("a" "b" "c")   # Array of 3 strings
+arr =("a" "b" "c")    # ERROR
+arr= ("a" "b" "c")    # ERROR
+arr = ("a" "b" "c")   # ERROR
+arr=("a""b""c")     # Array of one element that is "abc"
+arr=("a","b","c")   # Array of one element that is "a,b,c"
+arr=("a", "b", "c") # ${arr[0]} == "a,"
+
+### Functions 
+func(){echo hi;} # ERROR
+func(){ echo hi;}     # OK
+func (){ echo hi;}    # OK
+func () { echo hi;}   # OK
+func () { echo hi; }  # OK
+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
CommandBehavior
(ls -1 | head -n 1)Run the command in a subshell. This will return the exit code of the last process that was ran.
test_var=(ls -1)Create a bash array with the elements ls and -1, meaning ${test_var[1]} will evaluate to -1.
test_var=$(ls -1)Evaluate ls -1 and capture the output as a string.
test_var=(`ls -1`) or test_var=($(ls -1))Evaluate ls -1 and capture the output as an array.
+

Bracket Usage:#

+

Correct:

+
    +
  • +

    [ cmd ] - There must be spaces or terminating characters (\n or ;) surrounding any brackets.

    +
  • +
  • +

    Like many common bash commands, "[" is actually a standalone executable, usually located at /usr/bin/[, so it requires spaces to invoke correctly.

    +
  • +
+

Erroneous:

+
    +
  • [cmd] - tries to find a command called [cmd] which likely doesn't exist
  • +
  • [cmd ] - tries to find a command called [cmd and pass ] as an argument to it
  • +
  • [ cmd] - tries to pass cmd] as an argument to [ which expects an argument of ] that isn't technically provided.
  • +
+

There are many other examples of using enclosures in bash scripting beyond the scope of this introduction. Please see the resources section for more information.

+

Variables#

+

Variable assignment in bash is simply to assign a value to a string of characters. All subsequent references to that variable must be prefixed by $:

+
$ MYSTRING="a string"
+$ echo $MYSTRING
+a string
+$ MYNUMBER="42"
+$ echo $MYNUMBER
+42
+
+

Exporting Variables#

+

When you declare a variable in bash, that variable is only available in the shell in which it is declared; if you spawn a sub-shell, the variable will not be accessible. Using the export command, you can essentially declare the variable to be inheritable.

+
# without exporting:
+$ TESTVAR=100  
+$ echo $TESTVAR
+100     # returns a result
+$ bash  # spawn a sub-shell
+$ echo $TESTVAR
+        # no result
+$ exit  # exit the subshell
+# with exporting: 
+$ export TESTVAR=100
+$ echo $TESTVAR
+100     # returns a result 
+$ bash  # spawn a sub-shell
+$ echo $TESTVAR  
+100     # value is passed into the subshell
+$ exit  # exit the subshell
+$
+
+

Sourcing Variables#

+

"Source" (shortcut: .) is a built-in bash command that takes a bash script as an argument. Bash will execute the contents of that file in the current shell, instead of spawning a sub-shell. This will load any variables, function declarations, and so on into your current shell.

+

A common example of using the source command is when making changes to your ~/.bashrc, which is usually only parsed once upon login. Rather than logging out and logging back in every time you wish to make a change, you can simply run source ~/.bashrc or . ~/.bashrc and the changes will take effect immediately.

+

Declaring Variables#

+

Variable typing in bash is implicit, and the need to declare a type is rare, but the declare command can be used when necessary: +

$ declare -i MYNUMBER # set type as an integer
+$ echo $MYNUMBER
+0
+$ declare -l MYWORD="LOWERCASE" # set type as lowercase 
+$ echo $MYWORD
+lowercase
+$
+
+see help declare at the command line for more information on types that can be declared.

+

Further Resources#

+

NREL HPC Github - User-contributed bash script and examples that you can use on HPC systems.

+

BASH cheat sheet - A concise and extensive list of example commands, built-ins, control structures, and other useful bash scripting material.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/c++/index.html b/Documentation/Development/Languages/c++/index.html new file mode 100644 index 000000000..36a03a4e4 --- /dev/null +++ b/Documentation/Development/Languages/c++/index.html @@ -0,0 +1,5027 @@ + + + + + + + + + + + + + + + + + + + + + + + C++ - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

C++#

+

"C++ is a general-purpose programming language providing a direct and efficient model of hardware combined with facilities for defining lightweight abstractions." + - Bjarne Stroustrup, "The C++ Programming Language, Fourth Edition"

+

Getting Started#

+

This section illustrates the process to compile and run a basic C++ program on the HPC systems.

+

Hello World#

+

Begin by creating a source file named hello.cpp with the following contents:

+
#include <iostream>
+
+int main(void) {
+  std::cout << "Hello, World!\n";
+  return 0;
+}
+
+

Next, we must select the compiler to use for compiling our program. We can choose among GNU, Intel, and Cray compilers, depending on the system that we are using (see Compilers and Toolchains). To see available modules and versions, use module avail. For this example, we will use the g++ compiler, which is part of GNU's gcc package. We will load the default version of the compiler, which in this case is gcc 10.1:

+
$ module load gcc
+$ module list
+Currently Loaded Modules:
+  1) gcc/10.1.0
+$ gcc --version | head -1
+gcc (Spack GCC) 10.1.0
+
+

With the gcc package, the C++ compiler is provided by the g++ command. To compile the program, run:

+
$ g++ hello.cpp -o hello
+
+

This creates an executable named hello. Now run the program and observe the output:

+
$ ./hello
+Hello, World!
+
+

Compilers and Toolchains#

+

The following is a summary of available compilers and toolchains. User are encouraged to run module avail to check for the most up-to-date information on a particular system.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ToolchainC++ CompilerModuleSystems
gccg++gccAll
Intelicpcintel-oneapi-compilersSwift, Vermilion, Kestrel
CrayCCPrgEnv-crayKestrel
+

Note that Kestrel also provides the PrgEnv-intel and PrgEnv-gnu modules, which combine the Intel or gcc compilers together with Cray MPICH. Please refer to Kestrel Programming Environments Overview for details about the programming environments available on Kestrel.

+

For information specific to compiling MPI applications, refer to MPI.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/idl/index.html b/Documentation/Development/Languages/idl/index.html new file mode 100644 index 000000000..7e12c327c --- /dev/null +++ b/Documentation/Development/Languages/idl/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Idl - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Idl

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Languages/r/index.html b/Documentation/Development/Languages/r/index.html new file mode 100644 index 000000000..1c131b462 --- /dev/null +++ b/Documentation/Development/Languages/r/index.html @@ -0,0 +1,5400 @@ + + + + + + + + + + + + + + + + + + + + + + + R - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Running R Statistical Computing Environment Software#

+

Learn how to run the R statistical computing environment software.

+

What Is R?#

+

R is an open-source programming language designed for statistical computing and graphics. It is the current standard for the development of new statistical methodologies and enjoys a large user base.

+

For more information related to the R project, see the R website.

+

Accessing R#

+

The supported method for using R on the HPC systems is via Anaconda. In order to access R, first load the anaconda module (on Kestrel, this is module load anaconda3). Then, create a new conda environment that contains at least the r-base package. Optionally, install the r-essentials bundle, which provides many of the most popular R packages for data science.

+

For example, to create and activate a new environment named r_env on Kestrel that includes the r-essentials bundle:

+
module load anaconda3
+conda create -n r_env r-essentials r-base
+conda activate r_env
+
+

For more information about using R in the Anaconda framework, see Using R language with Anaconda.

+
+

Note

+

To avoid possible conflicts, remove any Intel compiler modules before loading R. One way to do this is via the following:

+
$ module purge
+$ module load anaconda3
+
+
+

Running R Interactively#

+

R is most commonly used via an interactive shell. To do this, first request an interactive compute node (see running interactive jobs) using the srun command. Alternatively, R can be used through Jupyterhub.

+

Once on a compute node, R environments can be accessed through Anaconda as described above. To access the R interactive console, type R at the command line. You will be prompted with the familiar R console in your terminal window:

+
+R Terminal +
$ R
+
+R version 4.0.5 (2021-03-31) -- "Shake and Throw"
+Copyright (C) 2021 The R Foundation for Statistical Computing
+Platform: x86_64-conda-linux-gnu (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+  Natural language support but running in an English locale
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+
+

Running R Scripts#

+

Since running R programs line by line in the interactive console can be a little tedious, it is often better to combine R commands into a single script and have R execute them all at once. R scripts are text files containing R commands with file extension .R:

+
+

hello_world.R

+
message = "Hi there!"
+nums = sample(1:100, 5)
+cat(message, "\n")
+cat("Here are some random numbers: ", paste(nums, sep = ", "),"\n")
+
+
+

There are several options for running R scripts:

+
+source() +

The source() function will execute R scripts from inside the interactive console.

+
> source("hello_world.R")
+  Hi there! 
+  Here are some random numbers:  100 41 14 82 63 
+
+
+
+Rscript +

The Rscript command can be used to run R scripts from the command line. Output is piped to the stdout.

+
$ Rscript hello_world.R
+Hi there! 
+Here are some random numbers:  71 37 50 24 90 
+
+
+
+R CMD BATCH +

R CMD BATCH is an older function that behaves similar to Rscript. All output is piped to a corresponding .Rout file.

+
$ R CMD BATCH --no-site-file hello_world.R
+$ cat hello_world.Rout 
+
+> #hello_world.R
+> 
+> message = "Hi there!"
+> nums = sample(1:100, 5)
+> cat(message, "\n")
+Hi there! 
+> cat("Here are some random numbers: ", paste(nums, sep = ", "),"\n")
+Here are some random numbers:  41 51 61 70 43 
+> 
+> proc.time()
+   user  system elapsed 
+  0.188   0.024   0.277 
+
+
+

Submitting Jobs#

+

Another option for using R on the HPC systems is to submit batch jobs to be run on non-interactive nodes.

+

An example job script for running the hello_world.R example is below (make sure to update your allocation name as well as the name of the conda environment where R has been installed):

+
#! /bin/bash
+#SBATCH --job-name=helloworld
+#SBATCH --nodes=1
+#SBATCH --time=60
+#SBATCH --account=<your_allocation_id>
+
+module purge
+module load anaconda3
+conda activate <r_env>
+Rscript hello_world.R
+
+

Versions and Packages#

+

R is a popular open-source language with an active development community. New versions of R are frequently released. Any version can be installed into a custom anaconda environment. Commands for using other versions is shown below:

+
+Custom Installation with Conda +
$ conda search r-essentials
+Loading channels: done
+# Name                  Version           Build  Channel
+r-essentials                1.0        r3.2.1_0  pkgs/r
+r-essentials                1.0       r3.2.1_0a  pkgs/r
+r-essentials                1.1        r3.2.1_0  pkgs/r
+r-essentials                1.1       r3.2.1_0a  pkgs/r
+r-essentials                1.1        r3.2.2_0  pkgs/r
+r-essentials                1.1       r3.2.2_0a  pkgs/r
+r-essentials                1.1        r3.2.2_1  pkgs/r
+r-essentials                1.1       r3.2.2_1a  pkgs/r
+r-essentials                1.4               0  pkgs/r
+r-essentials              1.4.1        r3.3.1_0  pkgs/r
+r-essentials              1.4.2               0  pkgs/r
+r-essentials              1.4.2        r3.3.1_0  pkgs/r
+r-essentials              1.4.3        r3.3.1_0  pkgs/r
+r-essentials              1.5.0               0  pkgs/r
+r-essentials              1.5.1               0  pkgs/r
+r-essentials              1.5.2        r3.3.2_0  pkgs/r
+r-essentials              1.5.2        r3.4.1_0  pkgs/r
+r-essentials              1.6.0        r3.4.1_0  pkgs/r
+r-essentials              1.7.0  r342hf65ed6a_0  pkgs/r
+r-essentials              3.4.3        mro343_0  pkgs/r
+r-essentials              3.4.3          r343_0  pkgs/r
+r-essentials              3.5.0        mro350_0  pkgs/r
+r-essentials              3.5.0          r350_0  pkgs/r
+r-essentials              3.5.1        mro351_0  pkgs/r
+r-essentials              3.5.1          r351_0  pkgs/r
+$ conda create -n otherr r-essentials==3.5.1
+<Text>
+$ . activate otherr
+(otherr) $ R --version
+R version 3.5.1 (2018-07-02) -- "Feather Spray"
+Copyright (C) 2018 The R Foundation for Statistical Computing
+Platform: x86_64-pc-linux-gnu (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under the terms of the
+GNU General Public License versions 2 or 3.
+For more information about these matters see
+http://www.gnu.org/licenses/.
+
+
+

Installing New Packages#

+

The install.packages() command in R will download new packages from the CRAN source directory and install them for your account. If you are running R from within a custom Anaconda environment, they will be specific to that environment. In either case, these packages will not be visible to other users.

+

Checking Installed Packages#

+

The command installed.packages() in R list details about all packages that are loaded and visible to current R session.

+

Loading Packages#

+

Packages are loaded into the current R environment through the library() function.

+

Graphics#

+

R is commonly used to produce high-quality graphics based on data. This capability is built-in and can be extended through the use of packages such as ggplot2. To produce graphics on the HPC systems, the easiest method is to output graphical displays to an appropriate filetype (pdf, jpeg, etc.). Then this file can be moved to your local machine using command line tools such as scp or rsync.

+
+Example R Script for Graphics Output +
library(ggplot2)
+set.seed(8675309)
+numbers = rnorm(200, sd = 2)
+more.numbers = rnorm(100, mean = 10, sd = 2)
+
+df = data.frame(values = c(numbers, more.numbers))
+
+p = ggplot(df, aes(x = values, y = ..density..)) +
+    geom_histogram(fill = "dodgerblue",
+                   colour = "black",
+                   alpha = .5,
+                   binwidth = .5) +
+    geom_density(size = 1.5) +
+    labs(y = "Density", x = "Value",
+         title = "Histogram Example")
+
+png(file = "histogram_example.png")
+print(p)
+dev.off()
+
+
+

Parallel Programming in R#

+

Programming in R on the HPC systems has two distinct advantages. First, running jobs on a remote system means you do not have to tie up your local machine. This can be particularly useful for jobs that take considerable time and resources to run. Secondly, the increased computational capabilities of the HPC system provide an opportunity to improve performance through parallel processing. R code, like many programming languages, is typically written and executed serially. This means that the added benefits of having multiple processing cores available are typically lost.

+

A major goal of the R community in recent years has been the development of specialized libraries and programming paradigms to better leverage modern HPC systems. The CRAN task view for High Performance Computing and Parallel Programming contains a detailed list of packages that address various aspects of these problems. For more information, see CRAN Task View: High-Performance and Parallel Computing with R.

+

Notable examples are:

+
    +
  • Parallel
  • +
  • Foreach
  • +
  • Multicore
  • +
  • Snow
  • +
  • pbdR
  • +
  • Rmpi
  • +
+

Each package includes in-depth documentation and examples for how to implement parallel processing in R code. Learning these packages does require a moderate amount of time, but for many large problems the improvements in computational efficiency dramatically outweighs the initial investment.

+

Most of these packages will have to be installed in a custom environment as many dependencies are incompatible with the version of openmpi installed in conda.

+
+Using the pbdR Project +

The pbdR project "enables high-level distributed data parallelism in R, so that it can easily utilize large HPC platforms with thousands of cores, making the R language scale to unparalleled heights." There are several packages within this project: pbdMPI for easy MPI work, pbdDMAT for distributed data matrices and associated functions, and pbdDEMO for a tutorial/vignette describing most of the project's details.

+

The pbdMPI package provides the MPI interface, which requires Open MPI. Note that Open MPI must be loaded prior to installing the package. For example, on Kestrel:

+
$ module load openmpi/4.1.5-gcc
+$ R
+> install.packages("pbdMPI")
+
+

The following script is a ranknode.R example using the pbdMPI package:

+
library(pbdMPI, quiet = TRUE)
+init()
+.comm.size <- comm.size()
+.comm.rank <- comm.rank()
+.hostname <- Sys.info()["nodename"]
+msg <- sprintf("I am %d of %d on %s.\n", .comm.rank, .comm.size, .hostname)
+comm.cat(msg, all.rank = TRUE, quiet = TRUE)
+comm.cat(msg, rank.print = sample(0:.comm.size, size = 1))
+comm.cat(msg, rank.print = sample(0:.comm.size, size = 1), quiet = TRUE)
+finalize()
+
+

You could run this interactively from a compute node or by submitting it to the job scheduling using a shell script similar to the one given below. For example, you would submit using sbatch ranknode.sh from a login node provided you name the script appropriately:

+
#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=24
+#SBATCH --time=5
+#SBATCH --account=<your_allocation_id>
+
+module purge
+module load anaconda3
+module load openmpi/4.1.5-gcc
+conda activate <r_env>
+
+INPUT_BASENAME=ranknode # JOB NAME - USER INPUT PARAMETER
+JOB_FILE=$INPUT_BASENAME.R
+OUT_FILE=$INPUT_BASENAME.Rout
+srun -n 48 Rscript $JOB_FILE > $OUT_FILE
+
+

In either case (interactive or queue submission), the output produced from the ranknode.R script should look like this:

+
I am 0 of 48 on x1004c0s2b0n0.
+I am 1 of 48 on x1004c0s2b0n0.
+I am 2 of 48 on x1004c0s2b0n0.
+...
+I am 46 of 48 on x1004c0s2b0n1.
+I am 47 of 48 on x1004c0s2b0n1.
+I am 42 of 48 on x1004c0s2b0n1.
+I am 45 of 48 on x1004c0s2b0n1.
+
+
+

Contacts#

+

For questions on statistics, the R software environment itself, or advanced R package questions, please contact Lindy Williams.

+

Additionally, NREL has an internal R Users Group that meets periodically to highlight interesting packages, problems, and share experiences related to R programming. For more details, contact Daniel Inman.

+

References#

+ + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/fftw/index.html b/Documentation/Development/Libraries/fftw/index.html new file mode 100644 index 000000000..658bf7148 --- /dev/null +++ b/Documentation/Development/Libraries/fftw/index.html @@ -0,0 +1,4863 @@ + + + + + + + + + + + + + + + + + + + + + + + FFTW - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

FFTW#

+

Documentation: FFTW

+

FFTW is a C library for computing discrete Fourier transforms of arbitrary input sizes and dimensions. It is optimized for speed and can perform discrete Fourier transforms up to several orders of magnitude faster than other commonly available Fourier transform libraries. FFTW supports both single-precision and double-precision transforms, as well as multithreading for parallel execution on shared-memory systems.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/hdf5/index.html b/Documentation/Development/Libraries/hdf5/index.html new file mode 100644 index 000000000..1005ec60a --- /dev/null +++ b/Documentation/Development/Libraries/hdf5/index.html @@ -0,0 +1,4864 @@ + + + + + + + + + + + + + + + + + + + + + + + HDF5 - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

HDF5#

+

Documentation: HDF5

+

HDF5 is a versatile data storage and management library designed for storing and exchanging large and complex data collections. It provides a powerful and flexible data model for representing and organizing data, as well as a variety of high-level programming interfaces for accessing and manipulating data. HDF5 supports a wide range of data types and can handle data sets of virtually unlimited size.

+

HDF5 supports both parallel and serial file I/O, achieving high performance with both.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/howto/index.html b/Documentation/Development/Libraries/howto/index.html new file mode 100644 index 000000000..f8fc703ec --- /dev/null +++ b/Documentation/Development/Libraries/howto/index.html @@ -0,0 +1,5213 @@ + + + + + + + + + + + + + + + + + + + + + + + Libraries How-to - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Libraries How-To: Linking Scientific Libraries#

+

This page is a tutorial explaining how to include scientific libraries when compiling software.

+

There are a few common scientific libraries: LAPACK, BLAS, BLACS, scaLAPACK, FFTW, HDF5, and others. These libraries are generally highly optimized, and many scientific programs favor use of these libraries over in-house implementations of similar functionality. See our libraries overview page for more information.

+

Scientific libraries can be packaged together, like in the Intel Math Kernel Library (MKL), or Cray’s LibSci. They can also be built completely separately and act as standalone libraries. These libraries can be built with different MPI implementations and compiler choices.

+

If you’re building a code that relies on one or more of these libraries, you can choose how to include these libraries. By the end of this tutorial, how to include these libraries should be clearer. If you need help building a particular package on an NREL machine, please contact HPC help.

+

Makefiles, autoconf, and cmake#

+

Build tools like make, autoconf, and cmake are convenient ways to automate the compilation of a code. If you’re building a package, you may need to modify/customize how the code compiles, e.g., so it finds and includes the libraries you want. This may involve directly modifying the makefile, modifying the make.include (or make.inc, makefile.include, etc.) file, or using tools like autoconf or CMake to configure the makefile.

+

Modifying a makefile (or make.include, etc.) so it compiles using the scientific libraries you want can be a daunting process. We’ll go through a prototypical example and show how different libraries can be included in the build of a program. To do this, we’ll use a makefile.include file for the electronic structure program VASP.

+
+

Note

+

We provide a walkthrough of linking scientific libraries using the VASP code as an example. This walkthrough tries to demonstrate key features of the general process of including scientific libraries in a build. We note that the exact build and modification process will vary between codes. Consulting the documentation of the code you’re trying to build is always the best place to start.

+
+

Walkthrough#

+

Overview#

+

We’ll use the VASP makefile.include file as our walkthrough example. We can find a number of VASP makefile.include files here. We’ll be looking specifically at this file.

+

We’ll take a look at building with Intel MKL and the HDF5 package.

+

Building with MKL and HDF5#

+

We want to build with MKL and HDF5. If we look at the VASP documentation, we see that LAPACK, scaLAPACK, BLAS, and FFTW are required. MKL covers all of these needs. Thus, we need to tell the makefile where to look for MKL.

+

Environment Preparation#

+

We need our MKL to be built with the same compilers and MPI implementation as we’re building VASP with. Let’s see what sorts of MKL builds are available to us. Using the following command to show what builds of mkl are available as a module:

+

module avail 2>&1 | grep mkl

+

Yields the output:

+

intel-oneapi-mkl/2023.0.0-intel ucx/1.13.0

+

Thus, if we want to use the toolchains managed by NREL, we must use the Intel oneapi toolchain in our VASP build, since intel-oneapi-mkl/2023.0.0-intel is the only available mkl module. If you want to use a different toolchain, you could build MKL yourself, but that’s outside the scope of this article.

+

To “use the Intel oneapi toolchain” means to use Intel compilers and Intel’s implementation of MPI to compile VASP. We’re doing this because mkl was built with this toolchain, and we want our toolchains to match as best as possible to minimize build errors and bugs.

+

Let’s prepare our environment to use this toolchain. First,

+

module purge

+

To clear your environment. Now, we want the Intel oneapi mkl module, the Intel fortran compiler (ifort), and the Intel MPI fortran compiler (mpiifort). Type:

+

module avail 2>&1 | grep oneapi

+

to see which modules are related to the intel-oneapi toolchain. We can locate the three we want:

+
module load intel-oneapi-mkl/2023.0.0-intel 
+module load intel-oneapi-mpi/2021.8.0-intel 
+module load intel-oneapi/2022.1.0 
+
+

How do we know these are the ones we want? The first line loads the mkl module. The second line gives us mpiifort, the Intel MPI fortran compiler, and the third line gives us ifort, the Intel Fortran compiler. (test the latter two with which mpiifort and which ifort -- you’ll see that they’re now in your path. If you module purge and try which mpiifort again, you’ll see you’re not able to find mpiifort anymore.)

+

Modifying the Makefile for MKL#

+

Now that we have the toolchain loaded into our environment, let’s take a look at the actual makefile.include file (link to file here). There are two important sections for the purpose of getting the code to build. The first:

+
CPP         = fpp -f_com=no -free -w0  $*$(FUFFIX) $*$(SUFFIX) $(CPP_OPTIONS) 
+FC          = mpiifort -qopenmp 
+FCL         = mpiifort 
+
+

The first line says that the compiler pre-processor will be fpp (try which fpp and you should get an output /sfs/nopt/nrel/apps/compilers/01-23/spack/opt/spack/linux-rhel8-icelake/gcc-8.4.0/intel-oneapi-compilers-2022.1.0-wosfexnwo5ag3gyfoco2w6upcew5yj6f/compiler/2022.1.0/linux/bin/intel64/fpp, confirming that we’re pulling fpp from intel-oneapi).

+

The second and third lines say that we’ll be using Intel’s MPI (Try which mpiifort to confirm that it is in your path). FC is the “Fortran Compiler” and FCL is the corresponding linker. Line 14 additionally says we’ll be compiling with openmp. Different compilers have different executable names (e.g. mpiifort for Intel MPI fortran compiler, mpifort for GNU). See the Fortran documentation page for a complete list.

+

The next important section is given below:

+
# Intel MKL (FFTW, BLAS, LAPACK, and scaLAPACK) 
+# (Note: for Intel Parallel Studio's MKL use -mkl instead of -qmkl) 
+FCL        += -qmkl 
+MKLROOT    ?= /path/to/your/mkl/installation 
+LLIBS      += -L$(MKLROOT)/lib/intel64 -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64
+INCS        =-I$(MKLROOT)/include/fftw 
+
+

This makefile.include file has been provided to us by VASP. Our job here is two-fold:

+
    +
  1. To ensure that we tell make (via the makefile.include file) the correct place to find MKL, I.e., to ensure that MKLROOT in the makefile.include file is set correctly.
  2. +
  3. To ensure that we tell make the correct libraries to reference within MKLROOT.
  4. +
+

To do step 1, first type:

+

module list

+

To see the modules you’ve loaded into your environment. You should have intel-oneapi-mkl/2023.0.0-intel in the list. If not, review the environment preparation section. Now, we use the module show command to find the root directory of mkl:

+

module show intel-oneapi-mkl/2023.0.0-intel

+

We see in the output of this command the following line:

+

setenv MKLROOT /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0

+

If we type echo $MKLROOT, we can confirm that this environment variable is properly set from when we ran the command module load intel-oneapi-mkl/2023.0.0-intel. In the VASP makefile, we have MKLROOT ?= /path/to/your/mkl/installation. The ?= means that this variable will not be set if MKLROOT has already been set. So, we can ignore this line if we’d like. However, to be safe, we should simply copy the path of the MKL root directory to this line in makefile.include, so that this line now reads:

+

MKLROOT ?= /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0

+
+

Tip

+

The name of the environment variable for mkl’s root directory set by its module (MKLROOT, set when we module load intel-oneapi-mkl/2023.0.0-intel) is not necessarily going to match the corresponding root directory variable in a given makefile. It did in this instance, but that’s not guaranteed. The VASP makefile.include could have just as easily used MKL_ROOT, instead of MKLROOT. This is one reason why it’s safer to use module show to find the path of the root directory, then copy this path into the makefile, rather than rely on environment variables.

+
+

To do step 2, we should first look at the contents of $MKLROOT. To show the contents of the MKL directory, type

+

ls /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0

+

We should obtain the following output:

+

benchmarks bin env examples include interfaces lib licensing modulefiles tools

+

If we look closely at the makefile, we see beneath the MKLROOT line the following: +

MKLROOT    ?= /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0
+LLIBS      += -L$(MKLROOT)/lib/intel64 -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64
+

+

the LLIBS line is telling make which libraries in particular to pick out.

+

So, we want to go into the lib directory, and then the intel64 directory (since LLIBS is pointing to $MKLROOT/lib/intel64). Let's see what's inside with the ls command:

+

ls /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0/lib/intel64

+

There's a lot of stuff in this directory! VASP helps us by telling us we need the mkl_scalapack_lp64 and mkl_blacs_openmpi_lp64 builds specifically. You won't always be told exactly which libraries, and figuring this out, if the information is not provided to you in the package documentation, can require some tinkering.

+

In general, the .a extension is for static linking, and the .so extension is for dynamic linking. For MKL in particular, the part ilp64 vs lp64 refer to two different interfaces to the MKL library.

+
+

Tip

+

Notice that, inside $MKLROOT/lib/intel64, the filenames all start with libmkl, but in our makefile, we reference lmkl_scalapack_lp64. That's not a file in $MKLROOT/lib/intel64, but libmkl_scalapack_lp64.so is. The notation is that "big L" references the directories that the libraries are in, and the "little l" references the particular libraries. For example: +

 LLIBS += -L$(MKLROOT)/lib/intel64 
+
 -lmkl_scalapack_lp64
This is just a convention, but is important to get right because your compile will fail otherwise.

+
+

Now that we have the correct MKLROOT set in the makefile.include, and we have an idea about how it's referencing the libraries within, we can move on to linking the HDF5 library.

+

Modifying the Makefile for HDF5#

+

Because HDF5 is an optional library, we could compile the code now if we wanted to. However, for the sake of practice, let’s uncomment the block in the makefile.include file related to HDF5 and repeat the exercise of linking a library:

+
# HDF5-support (optional but strongly recommended) 
+CPP_OPTIONS+= -DVASP_HDF5 
+HDF5_ROOT  ?= /path/to/your/hdf5/installation 
+LLIBS      += -L$(HDF5_ROOT)/lib -lhdf5_fortran 
+INCS       += -I$(HDF5_ROOT)/include 
+
+

Our job, again, is to give the makefile the correct directions to our library. In this case, it’s HDF5. Let’s see which HDF5 modules are available:

+

module avail hdf5

+

Returns

+

hdf5/1.12.2-intel-oneapi-mpi-intel hdf5/1.12.2-openmpi-gcc

+

So, we see that HDF5 has been built with the intel-oneapi-mpi toolchain, and also with the GCC/openmpi toolchain. Since we’re building vasp using the intel-oneapi toolchain, we need to load the corresponding module:

+

module load hdf5/1.12.2-intel-oneapi-mpi-intel

+

Again, we must locate the root directory:

+

module show hdf5/1.12.2-intel-oneapi-mpi-intel

+

We see the line for setting the HDF5 root directory environment variable:

+

setenv HDF5_ROOT_DIR /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/hdf5-1.12.2-dzgeixsm2cd3mupx4ti77ozeh7rh6zdo

+

Like before, we copy this path into our makefile.include:

+
# HDF5-support (optional but strongly recommended) 
+CPP_OPTIONS+= -DVASP_HDF5 
+HDF5_ROOT  ?= /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/hdf5-1.12.2-dzgeixsm2cd3mupx4ti77ozeh7rh6zdo 
+LLIBS      += -L$(HDF5_ROOT)/lib -lhdf5_fortran 
+INCS       += -I$(HDF5_ROOT)/include 
+
+

We’re ready to compile! In the case of VASP, the compile command is make DEPS=1 std but in general, the command may be make all or similar (consult the documentation of the code you’re trying to build).

+

If you’re working with a code that has a testsuite, now is a good time to run the testsuite to make sure that your compile was successful.

+

Summary of Steps#

+
    +
  1. Download the source code of the package you’re trying to build. This will generally be found on the website of the package.
  2. +
  3. Consult the documentation of the package to find out what scientific libraries are needed, and if the package developers provide guidance on what toolchains/libraries are best
  4. +
  5. Determine the availability of the needed scientific libraries.
      +
    1. Can a “library-of-libraries” like MKL or LibSci be used?
    2. +
    3. Does NREL support the library as a module?
        +
      1. If so, determine the toolchain it was built with (usually given in the name of the module). If the toolchain is not clear from the name of the module, try the ldd command (e.g., ldd path/to/executable/executable), which will show you the dynamically linked libraries of the executable.
      2. +
      +
    4. +
    +
  6. +
  7. Prepare your environment
      +
    1. module load the necessary modules to prepare your environment. (See environment preparation step of VASP example)
    2. +
    +
  8. +
  9. Prepare your makefile
      +
    1. Make sure that the compilers and (optional) MPI used in the makefile match what is used to build your scientific libraries as best as possible
    2. +
    3. Make sure that the paths to the scientific libraries in the makefile match the path given by the module show command
    4. +
    5. Make sure the proper “little L” libraries are referenced in the makefile
    6. +
    +
  10. +
  11. Compile!
  12. +
+

Questions?#

+

If you’re still stuck and unable to successfully link the scientific libraries you need, get in contact with HPC help.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/hsl/index.html b/Documentation/Development/Libraries/hsl/index.html new file mode 100644 index 000000000..6862710ca --- /dev/null +++ b/Documentation/Development/Libraries/hsl/index.html @@ -0,0 +1,5159 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + HSL - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

HSL for IPOPT#

+

HSL (Harwell Subroutine Library) for IPOPT are a set of linear solvers that can greatly accelerate the speed of the optimization over the default MUMPS solver.

+

Installation#

+

Go to the HSL site and follow the instructions to request the source code for all the available solvers. Note that the solver MA27 is free to obtain, but MA27 is a serial solver. Other solvers will require a license. Please request a license that applies to your use case.

+
+

Info

+

If you are building IPOPT along with HSL, please follow the instructions here.

+
+

We need to be careful regarding the selection of linear algebra libraries when installing HSL. +The default version of IPOPT distributed with Ipopt.jl on Linux links to the OpenBLAS library. This causes issues when linking the HSL library to the Intel oneAPI MKL libraries. For this reason, to use HSL linear solvers with IPOPT on Kestrel, either we must compile IPOPT from scratch or compile HSL with OpenBLAS and NetLib LAPACK instead of Intel oneAPI MKL. We demonstrated IPOPT + HSL installation with Intel oneAPI MKL here.

+

The following provides detailed instructions for building HSL using OpenBLAS and Netlib LAPACK ON HPC.

+

Pre-requisites#

+
Metis#
+

Metis is a serial graph partitioning and fill-reducing matrix ordering software that helps the HSL solvers perform better. Therefore, it is recommended that you also install or build the Metis library. If you do want to install Metis, it must be done before compiling the HSL library.

+

The easiest way to install Metis is to use anaconda:

+
+

Warning

+

Using HSL linear solvers requires installing Metis. Metis is optional for MUMPS.

+
+

We will install Metis using Anaconda, however, it can also be installed from source. +To install using Anaconda, we will create a clean environment with nothing but Metis. +The conda environment is being constructed within a directory in hpcapps project on +Kestrel.

+
module load conda
+conda create -p /projects/hpcapps/kpanda/conda-envs/metis python
+conda activate /projects/hpcapps/kpanda/conda-envs/metis
+conda install conda-forge::metis
+
+
+

Info

+

module load conda loads the default anaconda module. You may use a different conda module based on your needs.

+
+
+

Note

+

Anaconda packages sometimes have issues when they come from different channels. We tend to pull everything from conda-forge hence the channel choice above.

+
+

The Metis library and header files are placed in /projects/hpcapps/kpanda/conda-envs/metis/lib/ and /projects/hpcapps/kpanda/conda-envs/metis/include/, respectively.

+
Compilers#
+

We will be using the GNU compiler suite (gcc and gfortran). These can be accessed on the cluster by loading the appropriate module. This should work with any version of the GNU compilers. We use the default gcc and gfortran that are available on the CPU compute nodes.

+

Setting up the Environment#

+

We will install HSL in /kfs2/projects/msoc/kpanda/apps/Ipopt/install for this demonstration. This can be set to whatever location you wish to install. +Let's create the requisite installation directories

+
mkdir -p /kfs2/projects/msoc/kpanda/apps/Ipopt/install
+cd /kfs2/projects/msoc/kpanda/apps/Ipopt/install
+mkdir lib include
+cd ..
+
+

We will make use of the following environment variables.

+
# Location of metis.h
+export METIS_HEADER=/projects/hpcapps/kpanda/conda-envs/metis/include
+# Location of metis library
+export METIS_LIBRARY=/projects/hpcapps/kpanda/conda-envs/metis/lib
+
+# Directory for keeping source code and build products
+export MYAPPS=/kfs2/projects/msoc/kpanda/apps/Ipopt/install
+# Location of header files
+export MYINC=${MYAPPS}/include
+# Location of static and dynamic libraries
+export MYLIB=${MYAPPS}/lib
+
+

These can be added to the .bash_profile file (or equivalent for other shells). Remember after adding these to source .bash_profile (or equivalent) or to open a new terminal and do all building there. +Alternatively, to make the Metis header and dynamic library easily accessible to the HSL, MUMPS and IPOPT libraries, we will put symbolic links in the ${MYINC} and ${MYLIB} directories. Do this by doing the following:

+
cd ${MYINC}
+ln -s ${METIS_HEADER}/metis.h metis.h
+cd ${MYLIB}
+ln -s ${METIS_LIBRARY}/libmetis.so libmetis.so
+
+

This has two advantages. +First, we don't need to add /projects/hpcapps/kpanda/conda-envs/metis/lib/ to +the LD_LIBRARY_PATH. The second advantage is that anaconda puts all the +environments libraries and include files in the same directories with +libmetis.so and metis.h. Many of these libraries overlap with those used +by HSL, Mumps and IPOPT but are not necessarily the same versions. Loading a +different version of a library than those compiled against can cause unexpected behavior.

+

Configure and Install#

+

We will clone ThirdParty-HSL and configure and install HSL in a working directory

+
git clone git@github.com:coin-or-tools/ThirdParty-HSL.git
+
+

Copy the HSL source code tarball into /projects/msoc/kpanda/apps/ThirdParty/HSL/, +unpack it, and rename or (create a symbolic link to the unpacked directory) as coinhsl.

+

Run the following commands to configure

+
cd ThirdParty-HSL
+module load netlib-lapack
+./configure --prefix=${MYAPPS} \
+--with-metis \
+--with-metis-cflags=-I${METIS_HEADER} \
+--with-metis-lflags="-L${METIS_LIBRARY} -lmetis"
+make && make install
+
+

This should install the HSL libraries in ${MYAPPS}. Finally, add MYLIB to your LD_LIBRARY_PATH. You can append the following line to your .bash_profile to make it permanent or call it every time you need to run IPOPT with HSL solvers.

+
export export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MYAPPS}/lib
+
+

Usage#

+

IPOPT has a feature called the linear solver loader (read about it here). This allows for loading linear solvers from a dynamic library at run time. We will use this feature to use the HSL solvers.

+

The only thing you have to do is to make the HSL dynamic library findable. This is done by adding the directory containing the HSL library to the environment variable LD_LIBRARY_PATH. To use the new linear solvers just use the linear_solver="<solver>" argument to IPOPT.Optimizer.

+
+

Info

+

The IPOPT build that comes with Ipopt.jl seems to expect the HSL library to have the name libhsl.so. The repo ThirdParty-HSL builds the library libcoinhsl.so. The simplest fix is to do the following:

+
cd ${MYLIB}
+# Create a symbolic link called libhsl.dylib
+ln -s libcoinhsl.dylib libhsl.dylib
+
+
+

Alternatively, users can follow the instructions mentioned here for Julia JuMP

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/index.html b/Documentation/Development/Libraries/index.html new file mode 100644 index 000000000..44655cfbb --- /dev/null +++ b/Documentation/Development/Libraries/index.html @@ -0,0 +1,4864 @@ + + + + + + + + + + + + + + + + + + + + + + + Scientific Libraries Overview - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Scientific Libraries Overview#

+

Scientific math libraries are a collection of highly optimized software tools that provide functions and algorithms for performing mathematical operations commonly used in scientific applications. They provide developers with a variety of tools for solving complex problems. These libraries are highly optimized for performance and generally designed to be portable across different platforms and operating systems.

+

We support some of the most widely used scientific math libraries including:

+
    +
  • MKL
  • +
  • LibSci (Kestrel only)
  • +
  • FFTW
  • +
  • LAPACK
  • +
  • scaLAPACK
  • +
  • HDF5
  • +
  • PETSc
  • +
+

For details on how to build an application with scientific libraries, see our how-to guide

+

For more information on a given scientific library, see our individual library pages under our "Libraries" drop-down menu.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/lapack/index.html b/Documentation/Development/Libraries/lapack/index.html new file mode 100644 index 000000000..7e81a04ac --- /dev/null +++ b/Documentation/Development/Libraries/lapack/index.html @@ -0,0 +1,4866 @@ + + + + + + + + + + + + + + + + + + + + + + + LAPACK and ScaLAPACK - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

LAPACK and scaLAPACK#

+

Documentation: LAPACK, scaLAPACK

+

LAPACK is a highly optimized library of linear algebra routines written in Fortran 90. These routines include matrix multiplication, factorization (LU, Cholesky, QR, etc.) least squares solutions of linear systems, eigenvalue problems, and many others. LAPACK routines are available in both single and double precision, and for complex and real numbers.

+

LAPACK depends on BLAS (Basic Linear Algebra Subprograms).

+

ScaLAPACK is a parallel-distributed version of LAPACK (i.e., scalaPACK is MPI-parallel)

+

Both LAPACK and ScaLAPACK are available as either standalone libraries (netlib-lapack), or as part of the "package-of-packages" libraries MKL and LibSci.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/libsci/index.html b/Documentation/Development/Libraries/libsci/index.html new file mode 100644 index 000000000..134e3586b --- /dev/null +++ b/Documentation/Development/Libraries/libsci/index.html @@ -0,0 +1,4875 @@ + + + + + + + + + + + + + + + + + + + + + + + LibSci - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Cray LibSci#

+

Documentation: LibSci

+

LibSci is a collection of numerical libraries developed by Cray for scientific and engineering computing. LibSci is optimized for performance on Cray architectures, including multi-core processors, and supports both single-precision and double-precision arithmetic. It also includes multithreading support for parallel execution on shared-memory systems. Like MKL, LibSci includes the following math functions:

+
    +
  • BLAS (Basic Linear Algebra Subroutines)
  • +
  • CBLAS (C interface to the legacy BLAS) Note: not sure if this is also in MKL?
  • +
  • BLACS (Basic Linear Algebra Communication Subprograms)
  • +
  • LAPACK (Linear Algebra routines)
  • +
  • ScaLAPACK (parallel Linear Algebra routines)
  • +
+

And additionally, libraries that are unique to Cray systems including:

+
    +
  • IRT (Iterative Refinement Toolkit) - a library of solvers and tools that provides solutions to linear systems using single-precision factorizations while preserving accuracy through mixed-precision iterative refinement.
  • +
  • CrayBLAS - a library of BLAS routines autotuned for Cray XC series systems through extensive optimization and runtime adaptation.
  • +
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/mkl/index.html b/Documentation/Development/Libraries/mkl/index.html new file mode 100644 index 000000000..3a3d93276 --- /dev/null +++ b/Documentation/Development/Libraries/mkl/index.html @@ -0,0 +1,4977 @@ + + + + + + + + + + + + + + + + + + + + + + + MKL - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Intel Math Kernel Library (MKL)#

+

Documentation: MKL

+

Overview#

+

MKL includes a wealth of routines to accelerate technical application performance on modern multicore architectures. The library is designed to take full advantage of the latest Intel processors, including multi-core processors, and can significantly improve the performance of numerical applications. Core math functions include:

+
    +
  • BLAS (Basic Linear Algebra Subroutines)
  • +
  • LAPACK (Linear Algebra routines)
  • +
  • ScaLAPACK (parallel Linear Algebra routines)
  • +
  • Sparse solvers
  • +
  • Fast Fourier Transforms
  • +
  • Vector math
  • +
  • Data fitting
  • +
+
+

Note

+

If you are mixing an Anaconda environment with modules to build, always activate the conda environment before loading any library modules like MKL. cmake discovery, for example, is very sensitive to the order in which these actions are taken.

+
+

Linking#

+

With the Intel toolchain, linking against MKL is as simple as adding -mkl to the link command. This by default links in the threaded MKL routines. To limit to strictly sequential (i.e., not threaded) routines, use -mkl=sequential; to enable multi-process Scalapack routines, use -mkl=cluster. +To link MKL with GCC, the mkl module includes some convenience environment variables defined as the appropriate LDFLAGS setting. See the module show mkl output; the variable naming is intended to be self-explanatory.

+

If you have needs not covered by these, use Intel's interactive MKL Link Line Advisor website to discover the appropriate linking options. Don't use mkl_link_tool in your build automation, as Intel only provides a 32-bit version of this tool which will cause builds to fail.

+

User Tips#

+

MKL will provide optimized library code based on the most advanced instruction set able to run on discovered hardware. So for floating point math, although GNU and Intel compilers will generate application code with SSE 4.2 instructions by default, MKL libraries will use AVX-512 float point instructions available on Skylake processors.

+

As the code executes, rapid transition between different such floating point instruction sets may cause a significant performance penalty. Consider compiling the base code optimized for AVX instructions, i.e., adding -xcore-AVX512 for Intel and -march=skylake-avx512 for GNU.

+

Using -mkl by default generates the code to use multithreaded MKL routines. There is an extra initialization overhead associated with using multithreaded MKL. With the smaller problem size or with sparse vectors it may be more beneficial from the performance standpoint to use sequential MKL routines ( -mkl=sequential).

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/netcdf/index.html b/Documentation/Development/Libraries/netcdf/index.html new file mode 100644 index 000000000..d6773257d --- /dev/null +++ b/Documentation/Development/Libraries/netcdf/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Netcdf - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Netcdf

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Libraries/petsc/index.html b/Documentation/Development/Libraries/petsc/index.html new file mode 100644 index 000000000..245f6b24a --- /dev/null +++ b/Documentation/Development/Libraries/petsc/index.html @@ -0,0 +1,4876 @@ + + + + + + + + + + + + + + + + + + + + + + + PETSc - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

PETSc#

+

Documentation: PETSc

+

PETSc is a suite of data structures and routines for the scalable (parallel) solution of scientific applications modeled by partial differential equations.

+

On Kestrel, PETSc is provided under multiple toolchains

+
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+  petsc:
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+     Versions:
+        petsc/3.14.6-cray-mpich-intel
+        petsc/3.19.3-intel-oneapi-mpi-intel
+        petsc/3.19.3-openmpi-gcc
+
+

petsc/3.14.6-cray-mpich-intel is a PETSc installation that uses HPE provided PrgEnv-intel. +Therefore, the MPI used here is cray-mpich and the compiler is intel/2023.

+

petsc/3.19.3-intel-oneapi-mpi-intel is a PETSc installation that uses intel-oneapi-compilers and intel-oneapi-mpi for the compilers and MPI, respectively.

+

petsc/3.19.3-openmpi-gcc is a PETSc installation that uses gcc/10.1.0 and openmpi/4.1.5-gcc for the compilers and MPI, respectively.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/Intel/index.html b/Documentation/Development/Performance_Tools/Intel/index.html new file mode 100644 index 000000000..1fafed96e --- /dev/null +++ b/Documentation/Development/Performance_Tools/Intel/index.html @@ -0,0 +1,4968 @@ + + + + + + + + + + + + + + + + + + + + + + + Intel Parallel Studio - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Intel Parallel Studio#

+

Intel Parallel Studio is a set of tools that enable developing and optimizing software for the latest processor architectures.

+

Some of the tools available as part of the Intel Parallel Studio include:

+
+Intel VTune Amplifier XE +

Intel VTune Amplifier XE is a performance profiler for C, C++, C#, Fortran, Assembly and Java code. Hot spots analysis provides a sorted list of functions that use a lot of CPU time. Other features enable the user to quickly find common causes of slow performance in parallel programs, including waiting too long at locks and load imbalance among threads and processes. VTune Amplifier XE uses the Performance Monitoring Unit (PMU) on Intel processors to collect data with very low overhead.

+

The recommended way to use this tool is to run the profiler from the command line and view the data using the GUI or generate a text report from the command line.

+

You can list all the available profiling options for the machine you're profiling on, from the GUI or from the command line using amplxe-cl -collect-list.

+

Include the following in you batch script to get a HPC-characterization profile of you application:

+
#!/bin/bash --login
+#SBATCH -J <job name>
+#SBATCH -N <nodes>
+#SBATCH -t 00:30:00
+#SBATCH -A <Allocation handle>
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes. 
+export TMPDIR=/scratch/$USER/tmp
+# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+# profile the executable
+amplxe-cl --collect hpc-performance ./executable.exe
+
+

GUI:

+

amplxe-gui

+
+
+Intel Trace Analyzer XE +

Intel Trace Analyzer and Collector is a tool for understanding the behavior of MPI applications. Use this tool to visualize and understand MPI parallel application behavior, evaluate load balancing, learn more about communication patterns, and identify communication hot spots.

+

The recommended way to use this tool is to collect data from the command line and view the data using the GUI.

+

Example batch script to collect MPI communication data:

+
#!/bin/bash --login
+#SBATCH -J <job name>
+#SBATCH -q <queue>
+#SBATCH -N <nodes>
+#SBATCH -t 00:30:00
+#SBATCH -A <Allocation handle>
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes.
+export TMPDIR=/scratch/$USER/tmp
+
+# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio and load the Intel MPI module
+module load intel-mpi
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# to profile the executable, just append '-trace' to mpirun
+mpirun -trace -n 4 ./executable.exe
+# this generates a .stf file that can viewed using the GUI
+
+

GUI:

+

traceanalyzer

+
+
+Intel Advisor XE +

Intel Advisor helps with vectorization and threading in your C++ and Fortran Applications. This tool helps identify areas that would benefit the most from vectorization. It also helps with identifying what is blocking vectorization and gives insights to overcome it:

+
# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes.
+export TMPDIR=/scratch/$USER/tmp
+
+

You can list all the available profiling options for the machine you're profiling on, from the GUI or from the command line using:

+

advixe-cl -collect-list

+

This tool has a lot of features that can be accessed from the GUI:

+

advixe-gui

+
+
+Intel Inspector XE +

Intel Inspector XE is an easy to use memory checker and thread checker for serial and parallel applications written in C, C++, C#, F#, and Fortran. It takes you to the source locations of threading and memory errors and provides a call stack to help you determine how you got there. This tool has a GUI and a command line interface.

+
# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes.
+export TMPDIR=/scratch/$USER/tmp
+
+

You can list all the available profiling options for the machine you're running this tool on, from the GUI or from the command line using:

+

inspxe-cl -collect-list

+

This tool has a lot of features that can be accessed from the GUI:

+

inspxe-gui

+
+
+Intel Application Performance Snapshot +

The new Application Performance Snapshot merges the earlier MPI Performance Snapshot and Application Performance Snapshot Tech Preview. MPI Performance Snapshot is no longer available separately, but all of its capabilities and more are available in the new combined snapshot. This tool lets you take a quick look at your application's performance to see if it is well optimized for modern hardware. It also includes recommendations for further analysis if you need more in-depth information.

+

Using This Tool:

+
# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# serial/SMP executable
+$ aps <executable> # this generates an aps result directory
+# DMP executable
+$ mpirun -n 4 aps <executable>
+# this generates an aps result directory # to gerate text and /hmtl result files:
+$ aps --report=<the generated results directory from the previous step> 
+# the result file can be viewed in a browser or text editor
+
+
+

Before you begin, please make sure that your application is compiled with the debug flag (-g), to enable profiling and debugging.

+

When using the suite of tools from Intel Parallel Studio on Eagle, we recommend that you set your TMPDIR to point to a location in your SCRATCH directory:

+

export TMPDIR=/scratch/$USER/tmp

+
+

Important:

+

Please make sure that you clean up this directory after your job completes.

+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/Intel/trace/index.html b/Documentation/Development/Performance_Tools/Intel/trace/index.html new file mode 100644 index 000000000..d2fd0e492 --- /dev/null +++ b/Documentation/Development/Performance_Tools/Intel/trace/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Trace - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Trace

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/Intel/vtune/index.html b/Documentation/Development/Performance_Tools/Intel/vtune/index.html new file mode 100644 index 000000000..a7ff0af49 --- /dev/null +++ b/Documentation/Development/Performance_Tools/Intel/vtune/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Vtune - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Vtune

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/Linaro-Forge/index.html b/Documentation/Development/Performance_Tools/Linaro-Forge/index.html new file mode 100644 index 000000000..0748de869 --- /dev/null +++ b/Documentation/Development/Performance_Tools/Linaro-Forge/index.html @@ -0,0 +1,4874 @@ + + + + + + + + + + + + + + + + + + + + + + + Linaro MAP - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Linaro MAP#

+

Documentation: Linaro Forge Documentation Page

+

Linaro MAP (Memory Access Profiler) is a tool that provides insight into how memory is being accessed by an application. It can help developers understand the memory access patterns of an application and identify performance issues caused by memory bottlenecks. Linaro MAP can profile code running on multiple cores as well as code running on a system with hardware accelerators, such as GPUs, allowing developers to identify memory access patterns specific to these accelerators. The profiling data generated by Linaro MAP can be visualized in a variety of ways, including call graphs, heat maps, and histograms, making it easy to identify patterns and potential bottlenecks. The tool can also generate reports that provide a summary of memory usage and access patterns, as well as recommendations for optimizing memory usage. Here we will go through some of the information you can obtain with Linaro MAP using VASP as an example and in the next section MAP we show how to start up such a MAP profile. If you need help with profiling your programs, reach out to HPC help and we can work with you.

+

Here is some profiling information obtained for VASP.

+

Across the top we see our metrics data for the default metrics: main thread activity, percent time each rank spends on floating-point instructions, and memory usage. The horizontal axis is wall clock time. The colors represent the following:

+
    +
  • 🟢 Green: Single-threaded computation time.
  • +
  • 🔵 Blue: MPI communication and waiting time.
  • +
  • 🟠 Orange: I/O time
  • +
  • 🟣 Dark purple: Accelerator time.
  • +
+

VASP-MAP-1

+

Across the bottom we have different view tabs. The I/O view displays your program I/O. The Project Files view allows you to navigate through your code base. The Functions view shows a flat profile of the functions in your program. The Stacks view allows you to follow down from the main function to see which code paths took the most time. Each line of the Stacks view shows the performance of one line of your source code, including all the functions called by that line.

+

You can select different metrics to view from the metrics menu:

+

VASP-MAP-2

+

As well as zoom in on specific times in your program run.

+

VASP-MAP-3

+

By clicking on the functions in the “Main Thread Stacks,” the profiler will take you to those calls in your code. Here we see that the call to the Davidson algorithm takes 68.6% of the program time.

+

VASP-MAP-4

+

Digging in further, we can find that most of the time is spent in the CALLMPI function, and the activity shows as blue indicating this MPI communication and wait time.

+

VASP-MAP-5

+

See the next section MAP for how to obtain these.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/Linaro-Forge/map/index.html b/Documentation/Development/Performance_Tools/Linaro-Forge/map/index.html new file mode 100644 index 000000000..2eaf80f16 --- /dev/null +++ b/Documentation/Development/Performance_Tools/Linaro-Forge/map/index.html @@ -0,0 +1,5125 @@ + + + + + + + + + + + + + + + + + + + + + + + MAP - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

How to run MAP#

+

Program Setup#

+

Linaro-MAP can show you how much time was spent on each line of code. To see the source code in MAP, you must use a version of your code that is compiled with the debug flag. For most compilers, this is -g. Note: You should not just use a debug build but should keep optimization flags -O0 turned on when profiling.

+

For more information, see the Linaro Forge Documentation on getting started with MAP. In particular, if your program uses statically linked libraries, the MAP profiler libraries will not be automatically linked and you will need to do so yourself.

+
+

Note

+

Ensure that your program is working before trying to run it in MAP

+
+

MAP Setup#

+

There are two options for how to run MAP. The first method is to use the remote client (recommended to reduce latencies from X forwarding the display.). The second method is to use FastX. Both are described here.

+

Option 1: Remote Client Setup#

+

Download the remote client from the Linaroforge Website Select the client for your platform (Mac/Windows/Linux) and ensure the client version number matches the version number of the Linaro suite you are using. You can see all the versions of linaro-forge available using:

+

$ module avail forge

+

Once you have the client installed, you will need to configure it to connect to the host:

+
    +
  1. Open the Linaro Forge Client application
  2. +
  3. Select the configure option in the "Remote Launch" dropdown menu, click "Add" and set the hostname to "USER@HOST.hpc.nrel.gov" where USER is your username and HOST is the host you are trying to connect to. We recommend using DAV nodes if available on your system.
  4. +
  5. +

    In the Remote Installation Directory field, set the path to the Linaro installation on your host. This can be found by running the command:

    +
    dirname $(dirname $(which map))
    +
    +

    For example:

    +
    module load forge/24.0.4
    +dirname $(dirname $(which map))
    +/nopt/nrel/apps/cpu_stack/software/forge/24.0.4
    +
    +
  6. +
  7. +

    Hit "Test Remote Launch" to test the configuration.

    +
  8. +
+

Once the remote client is correctly set up, start a terminal and connect to the desired HPC system. +$ ssh USER@$HOST.hpc.nrel.gov

+

Continue to the profiling section

+

Option 2: FastX Setup#

+

To run MAP with FastX, follow instructions to download and install the desktop client and connect to a host on the FastX page.

+

Once you have FastX installed and an appropriate build of your program to profile, start an xterm window from within FastX connected to an HPC host (We recommend using DAV nodes if available on your system). Then continue to the profiling section

+

Profiling a program#

+

Once you have an appropriate build of your program to profile and either the Linaro Forge Client or FastX installed, you can obtain profiling data through map with the following steps. We will profile VASP as an example.

+
    +
  1. Start an xterm window from within FastX connected to a DAV node
  2. +
  3. Start an interactive job session.
    + Use the debug or other partitions as appropriate.
    +$ salloc --nodes=<N> --time=<time> --account=<handle>
  4. +
  5. Load the linaro-forge module (formerly arm)
    + Additionally load any other modules needed to run your program
    +$ module load linaro-forge
    +$ module load mkl intel-mpi #for VASP
  6. +
  7. +

    Start a map session using the command map --connect if you are using the desktop client or simply map if you are using FastX.
    + Optionally, navigate to your working directory and give map the path to your exe
    +$ cd PATH/TO/YOUR/WORKING/DIRECTORY
    +$ map --connect PATH/TO/YOUR/PROGRAM/exe (remove --connect if using FastX) + If using the remote client, it will send a Reverse Connection request. Click 'Accept'.

    +

    You should now see the linaro forge GUI appear and a submission box with some information filled out if you followed the optional directions. Otherwise use the GUI to input them now. +Make sure the path to the application includes your program exe. +Make sure your working directory includes your input files, or specify your stdin file and its path. +Adjust other parameters as needed for profiling.

    +

    Linaro-MAP-GUI

    +
  8. +
  9. +

    Start your profile by clicking “Run”

    +
  10. +
+

You should now see the profiling data we described in the previous section MAP. Please refer to that page as well as the Linaro Forge Documentation for more details on what you can learn from such profiles.

+

Linaro-MAP-Profile

+

Debugging a program#

+

The Forge debugger is ddt. It uses the same local client at map and perf-report. To get started, set up your local client version of Forge as described above in the section MAP Setup - Option 1: Remote Client Setup.

+

There are many ways to launch a debug session. Probably the simplest is to launch from an interactive session on a compute node.

+

Get an interactive session replacing MYACCOUNT with your account:

+
salloc --exclusive --mem=0 --tasks-per-node=104 --nodes=1 --time=01:00:00 --account=MYACCOUNT --partition=debug
+
+

As with map your application needs to be compiled with the -g option. Here is a simple build with make. (Here we also have a OpenMP program so we add the flag -fopenmp.)

+
make
+cc  -g -fopenmp -c triad.c
+cc  -g -fopenmp ex1.c triad.o -o exc
+
+

Our executable is exc.

+

We are going to need our remote directory so we run pwd.

+
pwd
+/kfs3/scratch/user/debug
+
+

We load the module:

+
module load forge/24.0.4
+
+

Then run the command:

+
ddt --connect
+
+

Ddt is running on the compute node, waiting for you to connect with the local client. Launch your local client. Then under Remote Launch: select the machine to which you want to connect. After a few seconds you will see a window announcing that the ddt wants to connect you to your client. Hit Accept.

+

Linaro-MAP-Profile

+

After acceptance completes click Run and debug a program.

+

Here is where you need the directory for your program. Put the full path to your application in the Application box and the directory in Working Directory. We assume the Working Directory, the directory which would normally contain your data is the same as your program directory.

+

This is an MPI program so select MPI. After that you will see more options. For most programs the Implementation should be SLURM (generic). If this is not what is shown or you know you need something else, select Change... to set it. For OpenMP programs select that box also.

+

Linaro-MAP-Profile

+

Finally hit Run. After a few seconds you will see the debug window with the "main" source in the center window. You can set Break Points by clicking in the leftmost column of the source window. To start your program click the right facing triangle in the top left corner of the window.

+

Linaro-MAP-Profile

+

See the full documentation for complete instructions. There is a copy of userguide-forge.pdf in the doc directory of the Forge directory.

+
module load forge
+
+$echo `dirname $(dirname $(which ddt))`/doc
+/nopt/nrel/apps/cpu_stack/software/forge/24.0.4/doc
+
+ls /nopt/nrel/apps/cpu_stack/software/forge/24.0.4/doc
+RELEASE-NOTES  stacks.dtd  userguide-forge.pdf
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/Linaro-Forge/performance_rep/index.html b/Documentation/Development/Performance_Tools/Linaro-Forge/performance_rep/index.html new file mode 100644 index 000000000..e4a8e361f --- /dev/null +++ b/Documentation/Development/Performance_Tools/Linaro-Forge/performance_rep/index.html @@ -0,0 +1,4964 @@ + + + + + + + + + + + + + + + + + + + + + + + Performance Reports - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Linaro-Performance Reports#

+

Documentation: Linaro Performance Reports

+

Linaro Performance Reports is a low-overhead tool that produces one-page text and HTML reports summarizing and characterizing both scalar and MPI application performance. (Only ~5% application slowdown even with thousands of MPI processes.) These high-level reports can help answer:

+
    +
  • Is this application optimized for the system it is running on?
  • +
  • Does it benefit from running at this scale?
  • +
  • Are there I/O or networking bottlenecks affecting performance?
  • +
  • Which configuration changes can be made to improve performance further?
  • +
+

Walk through#

+

Here we show the information you can obtain with Linaro Performance reports using VASP as an example. In the next section, we will detail how to obtain these reports. If you need help with profiling your programs, reach out to HPC help and we can work with you.

+

Here is the header of performance report obtained for a VASP run on 1 node with 36 processes:

+

1n36p report-1

+

This shows time spent running application code, sending MPI calls, and time on I/O. In this case, we see that we are MPI-bound, which makes sense given that we are running a small, simple test case on more MPI tasks than necessary, which creates unnecssary MPI communication overhead.

+

The rest of the report shows a further breakdown of each of these categories:

+

1n36p report-2

+

Running a performance report#

+

All you need to do is load the module and prefix your execution command with perf-report:

+
    +
  1. Start an interactive job session. Use the debug or other partitions as appropriate:
    +$ salloc --nodes=<N> --time=<time> --account=<handle>
  2. +
  3. Load the linaro-forge module (formerly arm), and additionally load any other modules needed to run your program:
    +$ module load linaro-forge
    +$ module load mkl intel-mpi #for VASP
  4. +
  5. Set MPI parameters and run your exe using perf-report:
    +$ perf-report srun -n 36 PATH/TO/YOUR/PROGRAM/exe
  6. +
+

This will generate an .html file and a .txt file that you can view in a browser or text editor. You should now see the overview we described in the previous section.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/craypat/index.html b/Documentation/Development/Performance_Tools/craypat/index.html new file mode 100644 index 000000000..0afcc2a21 --- /dev/null +++ b/Documentation/Development/Performance_Tools/craypat/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Craypat - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Craypat

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/hpctoolkit/index.html b/Documentation/Development/Performance_Tools/hpctoolkit/index.html new file mode 100644 index 000000000..68b45de13 --- /dev/null +++ b/Documentation/Development/Performance_Tools/hpctoolkit/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Hpctoolkit - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Hpctoolkit

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Performance_Tools/index.html b/Documentation/Development/Performance_Tools/index.html new file mode 100644 index 000000000..0a451e50b --- /dev/null +++ b/Documentation/Development/Performance_Tools/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Index - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Index

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Programming_Models/gpu_hpc/index.html b/Documentation/Development/Programming_Models/gpu_hpc/index.html new file mode 100644 index 000000000..8da9601aa --- /dev/null +++ b/Documentation/Development/Programming_Models/gpu_hpc/index.html @@ -0,0 +1,5408 @@ + + + + + + + + + + + + + + + + + + + + + + + Using GPUs for HPC - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Using GPUs for HPC

+ +

This page documents how NREL HPC users can utilize GPUs, from submitting the right kind of job to Slurm to examples of creating custom CUDA kernels from Python.

+

Submitting GPU jobs to Slurm#

+

Example scripts#

+

The following examples are generic templates that NREL HPC users can adapt for their own GPU job scripts for a given system. Be sure to replace <allocation> with the name of your HPC allocation. Note that Kestrel and Swift's GPU partitions have sharable nodes, allowing for multiple jobs to run on one node simultaneously. Since there are four GPU cards on each node on these systems, each node can theoretically accommodate four GPU-driven jobs at once. As such, example scripts for those systems are tailored for requesting one-quarter of a node by default. Although Vermilion's GPUs are technically "shared" in the sense that multiple (CPU) jobs can run on one node, there is only one GPU per node. As such the Vermilion example requests the entire node. Please refer to the system-specific pages for more information on the GPUs available on each cluster and how AUs are charged accordingly.

+
+

Note

+

When launching a GPU job on Kestrel, be sure to do so from one of its dedicated GPU login nodes.

+
+
+

Note

+

Be aware that --mem in Slurm ALWAYS refers to CPU, not GPU, memory. You are automatically given all of the GPU memory in a Slurm job.

+
+
+Kestrel +
#!/bin/bash 
+#SBATCH --account=<allocation>
+#SBATCH --time=01:00:00
+#SBATCH --mem=80G
+#SBATCH --gpus=1
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=32
+#SBATCH --output=%j-%x.log
+
+# don't forget to submit this from a GPU login node!
+# note that you do not have to specify a partition on Kestrel;
+# your job will be sent to the appropriate gpu-h100 queue based
+# on your requested --time
+<GPU-enabled code to run>
+
+
+
+Swift +
#!/bin/bash
+#SBATCH --account=<allocation>
+#SBATCH --partition=gpu
+#SBATCH --time=01:00:00
+#SBATCH --mem=250G
+#SBATCH --gpus=1
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=24
+#SBATCH --output=%j-%x.log
+
+<GPU-enabled code to run>
+
+
+
+Vermilion +
#!/bin/bash
+#SBATCH --account=<allocation>
+#SBATCH --partition=gpu
+#SBATCH --time=01:00:00
+#SBATCH --mem=0
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=30
+#SBATCH --output=%j-%x.log
+#SBATCH --exclusive
+
+# Note that you do not have to explicitly request a GPU on Vermilion 
+# with `#SBATCH --gpus=1` or `#SBATCH --gres=gpu:1`.
+<GPU-enabled code to run>
+
+
+

GPU-relevant environment variables#

+

The following are some GPU-relevant environment variables you can set in your submission scripts to Slurm.

+ + + + + + + + + + + + + + + + + +
VariableDescription
SLURM_GPUS_ON_NODENumber of GPUs allocated to the batch step.
SLURM_JOB_GPUSThe global GPU IDs of the GPUs allocated to this job. The GPU IDs are not relative to any device cgroup, even if devices are constrained with task/cgroup. Only set in batch and interactive jobs.
+
+

Note

+

You can also run nvidia-smi -L while connected to any GPU node to return the available GPU device(s).

+
+

Software containers#

+

Please refer to our dedicated documentation on using GPUs from software containers for more information.

+

Migrating workflows from CPU to GPU#

+

GPUs contain hundreds or thousands of cores and can considerably speed up certain operations when compared to CPUs. However, unless you are already using a GPU-accelerated application with built-in CUDA kernels (such as some versions of PyTorch), your custom code will likely require significant changes to be able to effectively use a GPU device. This is even more true if your intent is to parallelize your code over multiple GPU devices. Further, some algorithms or routines are much better suited for GPU computation than others. As such, the first question you should always ask yourself is whether it makes sense to invest the time and effort needed to refactor your CPU-driven code for GPU computation. The following subsections describe key points to consider when you want to take the plunge into GPU computing, ending with an example using the numba package to refactor Python functions for Kestrel's H100 GPUs.

+

Ensure your algorithm is suited for GPU computation#

+

Not all algorithms are created equal when it comes to being able to effectively utilize a GPU. In general, GPUs best accommodate large numbers of relatively small, simulataneous operations ("massive parallelism"); canonical algorithmic examples of this include graphics processing (reflecting the "G" in "GPU") and many linear algebra computations (e.g., "matrix-matrix math" like BLAS3 routines). Algorithms that would likely perform poorly on a GPU without significant modification are those that launch serial tasks (think for-loops or apply statements in Python) that may each require a significant amount of RAM and/or write to the filesystem directly.

+

Minimize data transfer between CPU and GPU devices#

+

Without even considering the characteristics of the algorithm itself, one of the largest bottlenecks in GPU computing is copying data from the CPU to the GPU device(s). In many cases, copying data between devices can easily take longer than the execution of the algorithm. As such, to maximize an algorithm's performance on a GPU, it is imperative to consider employing application-specific routines to minimize the total amount of data transferred during runtime. In other words, the goal with effective GPU computing often comes down to designing the code to transfer as little data as possible as infrequently as possible.

+

Ways to compile CUDA code#

+

CUDA is a low-level API distributed by NVIDIA that allows applications to parallelize on NVIDIA GPUs, such as the H100s available on Kestrel or the A100s on Swift. Because of this, any GPU-driven code gets compiled into a CUDA kernel, which is essentially a function translated to machine code for the GPU. There are two CUDA-aware compilers available from NVIDIA: nvcc, a CUDA analog to the more generic cc, and nvrtc, which is NVIDIA's runtime compiler for "just-in-time" (JIT) compilation.

+

See this page for specific GPU code compilation examples on Kestrel, which include both CUDA and OpenAcc (an open-source alternative) implementations.

+

Example: Create a custom CUDA kernel in Python with numba#

+

To demonstrate some of the concepts described here, we will use numba to refactor an algorithm that initially performs poorly on a GPU due to how its input/output data are copied between devices. numba is a Python package for creating custom CUDA kernels from Python functions working with numeric data. It has a simple interface to CUDA that feels comfortable for most Python users, though advanced GPU programmers may consider building GPU-accelerated applications with "pure" CUDA. For such examples of creating custom CUDA kernels outside of Python, please see here.

+

This example is written assuming you have access to Kestrel, but it should be able to run on any system with at least one GPU node.

+

Install numba from Anaconda#

+

The numba package is easily installable through Anaconda/mamba. For any GPU-enabled application, the biggest concern during installation is whether the application version matches the GPU drivers. At the time this page was written, the GPU drivers on Kestrel reflect CUDA 12.4, and so we must ensure that our version of numba can work with that. In conda, we can control this by explicitly passing the corresponding cuda-version=CUDA_VERSION from conda-forge and asking for a cuda-toolkit from the nvidia/label/cuda-CUDA_VERSION channel. When we do this, we will force a compatible version of numba to install into the $CONDA_ENVIRONMENT we define (which is in /scratch to save space). We will also install numpy to work with numeric data, as well as pandas for data manipulation tasks:

+
+

Note

+

It is best to create this environment on a node with at least one available NVIDIA GPU. On any such node, you can run the command nvidia-smi to display the current GPU driver version (as well as any running GPU processes).

+
+
ml mamba
+CONDA_ENVIRONMENT=/scratch/$USER/.conda-envs/numba-cuda124
+mamba create --prefix=$CONDA_ENVIRONMENT \
+  conda-forge::numba \
+  conda-forge::numpy \
+  conda-forge::pandas \
+  conda-forge::cuda-version=12.4 \
+  nvidia/label/cuda-12.4.0::cuda-toolkit \
+  --yes
+conda activate $CONDA_ENVIRONMENT
+
+

Example numba code#

+

Consider the script numba-mat.py below. This script demonstrates the importance of deciding when and how often one should copy data to and from the GPU device to optimize runtime performance.

+
+

Note

+

This example requires approximately 40GB of CPU RAM to complete successfully. Be sure to run this on a GPU compute node from a Slurm job accordingly, with the defined $CONDA_ENVIRONMENT activated.

+
+
+numba-mat.py: Matrix multiplication with numba +
# Define and JIT-compile a CUDA function (kernel) with numba for simple
+# matrix multiplication. This script demonstrates the importance of 
+# balancing the cost of copying data from the host CPU to GPU device in 
+# terms of runtime performance.
+
+# Please contact Matt.Selensky@nrel.gov with any questions.
+
+import numba
+from numba import vectorize
+from numba import cuda
+import pandas as pd
+import numpy as np
+from time import time
+
+# Note that you must define the dtype (float32 is preferred over 
+# float64) and target device type ('cuda' for GPU)
+@vectorize(['float32(float32, float32)'], target='cuda')
+def gpu_mult(x, y):
+    z = x * y
+    return z
+
+
+# create random arrays as input data
+asize = pow(10, 9)
+array_a = np.float32(np.random.rand(asize))
+array_b = np.float32(np.random.rand(asize))
+array_c = np.float32(np.random.rand(asize))
+matrix_a = ([array_a], [array_b], [array_c])
+matrix_b = ([array_c], [array_b], [array_a])
+
+# define number of function loops to run for each test case
+nloops = 10
+
+### numpy - CPU
+# Test Case 1: Here, we just use pure numpy to perform matrix multiplication on the CPU.
+t0 = time()
+for i in np.arange(nloops):
+    np.multiply(matrix_a, matrix_b)
+cpu_time = time()-t0
+print("numpy on CPU required", cpu_time, "seconds for", nloops, "function loops")
+
+### numba - GPU
+# Test Case 2: Here, we copy arrays to GPU device __during__ the execution of gpu_mult()
+t0 = time()
+for i in np.arange(nloops):
+    gpu_mult(matrix_a, matrix_b)
+gpu_time0 = time()-t0
+print("numba on GPU required", gpu_time0, "seconds for", nloops, "function loops (data are actively copied to GPU device)")
+
+# Test Case 3: Here, we copy arrays to GPU device __before__ the execution of gpu_mult()
+# output is then copied back to GPU
+matrix_a_on_gpu = cuda.to_device(matrix_a)
+matrix_b_on_gpu = cuda.to_device(matrix_b)
+t0 = time()
+for i in np.arange(nloops):
+    gpu_mult(matrix_a_on_gpu, matrix_b_on_gpu)
+gpu_time1 = time()-t0
+print("numba on GPU required", gpu_time1, "seconds for", nloops, "function loops (data were pre-copied to GPU device; output is copied back to CPU)")
+
+# Test Case 4: Here, we copy arrays to GPU device __before__ the execution of gpu_mult()
+# output remains on GPU unless we copy it back with out_device.copy_to_host()
+matrix_a_on_gpu = cuda.to_device(matrix_a)
+matrix_b_on_gpu = cuda.to_device(matrix_b)
+out_device = cuda.device_array(shape=(asize,len(matrix_a)), dtype=np.float32)  # does not initialize the contents, like np.empty()
+t0 = time()
+for i in np.arange(nloops):
+    gpu_mult(matrix_a_on_gpu, matrix_b_on_gpu, out=out_device)
+gpu_time2 = time()-t0
+print("numba on GPU required", gpu_time2, "seconds for", nloops, "function loops (data were pre-copied to GPU device; output remains on GPU)")
+# out_device.copy_to_host() # what you would run if you needed to bring this back to the CPU non-GPU work
+
+# format runtime data as output table
+d = {'device_used': ['CPU', 'GPU', 'GPU', 'GPU'],
+    'input_precopied_to_gpu': [np.nan, False, True, True],
+    'output_copied_from_gpu': [np.nan, True, True, False],
+    'seconds_required': [cpu_time, gpu_time0, gpu_time1, gpu_time2]}
+df = pd.DataFrame(d)
+print("")
+print(df)
+print("")
+df.to_csv('numba-runtimes.csv', index=False)
+
+
+

This script runs through four cases of multiplying two large random matrices, each with dimensions (109, 3). For each test case, 10 loops of the function are executed, and the time required reflects the time it takes for all 10 loops. Test Case 1 is the CPU speed baseline to which we will compare our various GPU runtimes. Matrix multiplication using pure numpy.multiply(), which does not invoke the GPU and runs entirely on the CPU, requires approximately 39.86 seconds. The remaining Test Cases will all run on the GPU, but have dramatically different runtime performances depending on how frequently data are copied between the CPU and GPU devices.

+

Note that to use the GPU in this script, we define the function gpu_mult(), which is vectorized with a numba decorator that also tells the device to operate on float32 values, and defines cuda as the runtime target device. Following these instructions, numba JIT-compiles gpu_mult() into a CUDA kernel that can execute on a GPU.

+
+

Note

+

In general, computing on numeric float32 data performs substantially better compared to float64 on GPUs.

+
+

In Test Case 2, we simply call the vectorized gpu_mult(), which actually has much slower performance (55.67 seconds) than the CPU test case! On the surface, this is counterintuitive (aren't GPUs supposed to be faster?!), however a deeper examination of the code explains why we observe this. Becuase we initialized matrix_a and matrix_b on the CPU (a normal use case), we have to copy each object to the GPU before they can be multiplied together. After gpu_mult() is executed, the output matrix is then copied back to the CPU. Without some extra effort on our part, numba will default to copying these data before and after the execution of gpu_mult(). By contrast, since everything is already on the CPU, numpy simply does not have to deal with this, so it runs faster.

+

Test Case 3 reflects a situation in which we pre-copy matrix_a and matrix_b to GPU memory before executing gpu_mult(). We do this with the numba command cuda.to_device(), which allows the input data to only be copied between devices once, even though we perform 10 executions on them. With this simple change, we observe a dramatic decrease in runtime to only ~0.8 seconds. However, because we do not specify an 'output device' in our vectorized gpu_mult(), the output matrix is actually copied back to CPU memory after each execution. However, with a bit of extra code, we can keep the output on the GPU, which would make sense if we wanted to do more work on it there later in the script.

+

To that end, Test Case 4 squeezes all possible performance out of gpu_mult() by both pre-copying the input data to the GPU and leaving the output matrix on the same device. The blazing-fast runtime of this test case (only about a millisecond) measures the GPU computation itself, without the clutter of copying data between devices. When compared to the runtime of Test Case 1, which also does not include any kind of data copying step, Test Case 4 shows a roughly 24,000X speedup in multiplying two matrices of this size, allowing us to appreciate the true power of the GPU.

+

This table summarizes the results and reflect runtimes of ten function loops on a node from Kestrel's gpu-h100 partition.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Test CaseInput pre-copied to GPUOutput copied from GPUTime required (seconds)
1 (CPU)NaNNaN39.860077
2 (GPU)FalseTrue55.670377
3 (GPU)TrueTrue0.797287
4 (GPU)TrueFalse0.001643
+

To be sure, there are many more considerations to have when developing a highly performant custom CUDA kernel, and there are many other packages that can do similar things. However, minimizing the amount of data copied between the CPU and GPU devices is a relatively easy approach that introductory GPU programmers can implement in their kernels to see immediate paybacks in performance regardless of computing platform.

+

Extra resources#

+ + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Programming_Models/index.html b/Documentation/Development/Programming_Models/index.html new file mode 100644 index 000000000..40835daac --- /dev/null +++ b/Documentation/Development/Programming_Models/index.html @@ -0,0 +1,4795 @@ + + + + + + + + + + + + + + + + + + + Programming Models - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Programming Models#

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Programming_Models/mpi/index.html b/Documentation/Development/Programming_Models/mpi/index.html new file mode 100644 index 000000000..8b08efaed --- /dev/null +++ b/Documentation/Development/Programming_Models/mpi/index.html @@ -0,0 +1,5502 @@ + + + + + + + + + + + + + + + + + + + + + + + MPI - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

MPI

+ +

Cray-MPICH#

+

Documentation: Cray-MPICH

+

Cray's MPICH is a high performance and widely portable implementation of the Message Passing Interface (MPI) standard.

+

Note Cray-MPICH is only available on Kestrel. +In order to use Cray-MPICH, it is recommended to use the HPE Cray complier wrappers cc, CC and ftn. +The wrappers will find the necessary MPI headers and libraries as well as scientific libraries provided by LibSci.

+

Depending on the compiler of choice, we can load a different instance of Cray-MPICH. +For example, if we decide to use PrgEnv-intel, we can load the module PrgEnv-intel which will invoke an Intel instance of cray-mpich that can be used through cc, CC and ftn. +We can also use the usual MPI compilers mpicc, mpicxx and mpif90/mpifort but it is recommended to use the wrappers.

+

Cray-MPICH takes into consideration the processor architecture through craype-x86-spr and the network type through craype-network-ofi.

+

cray-mpich-abi#

+

For codes compiled using intel-mpi or mpich, we can load the module cray-mpich-abi, an HPE provided MPI that allows pre-compiled software to leverage MPICH benefits on Kestrel's network topology.

+

OpenMPI#

+

Documentation: OpenMPI

+

The Open MPI Project is an open source Message Passing Interface implementation that is developed and maintained by a consortium of academic, research, and industry partners. Open MPI is therefore able to combine the expertise, technologies, and resources from all across the High Performance Computing community in order to build the best MPI library available. Open MPI offers advantages for system and software vendors, application developers and computer science researchers.

+

The Open MPI framework is a free and open-source communications library that is commonly developed against by many programmers. As an open-source package with strong academic support, the latest ideas may appear as implementations here prior to commercial MPI libraries.

+

Note that the Slurm-integrated builds of OpenMPI do not create the mpirun or mpiexec wrapper scripts that you may be used to. Ideally you should use srun (to take advantage of Slurm integration), but you can also use OpenMPI's native job launcher orterun. Some have also had success simply symlinking mpirun to orterun.

+

OpenMPI implements two Byte Transfer Layers for data transport between ranks in the same physical memory space: sm and vader. +Both use a memory-mapped file, which by default is placed in /tmp. +The node-local /tmp filesystem is quite small, and it is easy to fill this and crash or hang your job. +Non-default locations of this file may be set through the OMPI_TMPDIR environment variable.

+
    +
  • +

    If you are running only a few ranks per node with modest buffer space requirements, consider setting OMPI_TMPDIR to /dev/shm in your job script.

    +
  • +
  • +

    If you are running many nodes per rank, you should set iOMPI_TMPDIR to /tmp/scratch, which holds at least 1 TB depending on Eagle node type.

    +
  • +
+

Supported Versions#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KestrelEagleSwiftVermilion
openmpi/4.1.5-gccopenmpi/1.10.7/gcc-8.4.0openmpi/4.1.1-6vr2flzopenmpi/4.1.4-gcc
openmpi/4.1.5-intelopenmpi/3.1.6/gcc-8.4.0
openmpi/4.0.4/gcc-8.4.0
openmpi/4.1.1/gcc+cuda
openmpi/4.1.2/gcc
openmpi/4.1.2/intel
openmpi/4.1.3/gcc-11.3.0-cuda-11.7
openmpi/4.1.0/gcc-8.4.0
+

IntelMPI#

+

Documentation: IntelMPI

+

Intel® MPI Library is a multifabric message-passing library that implements the open source MPICH specification. Use the library to create, maintain, and test advanced, complex applications that perform better on HPC clusters based on Intel® and compatible processors.

+

Intel's MPI library enables tight interoperability with its processors and software development framework, and is a solid choice for most HPC applications.

+

Supported Versions#

+ + + + + + + + + + + + + + + + + +
KestrelEagleSwiftVermilion
intel-oneapi-mpi/2021.10.0-intelintel-mpi/2020.1.217intel-oneapi-mpi/2021.3.0-hcp2lkfintel-oneapi-mpi/2021.7.1-intel
+

MPT#

+

Documentation: MPT

+

HPE's Message Passing Interface (MPI) is a component of the HPE Message Passing Toolkit (MPT), a software package that supports parallel programming across a network of computer systems through a technique known as message passing.

+

Hewlett-Packard Enterprise (HPE)—Eagle's creator—offers a very performant MPI library as well, built on top of and colloquially known via its underlying Message Passing Toolkit high-performance communications component as "MPT."

+

Supported Versions#

+ + + + + + + + + + + + + + +
Eagle
mpt/2.23
mpt/2.22
+

Note:

+

MPT is only installed on Eagle.

+

MPICH#

+

Documentation: MPICH

+

MPICH is a high performance and widely portable implementation of the Message Passing Interface (MPI) standard. +MPICH and its derivatives form the most widely used implementations of MPI in the world. They are used exclusively on nine of the top 10 supercomputers (June 2016 ranking), including the world’s fastest supercomputer: Taihu Light.

+

Supported Versions#

+ + + + + + + + + + + + + + + + + + + + + + + +
KestrelEagleSwiftVermilion
mpich/4.1-gccmpich/3.4.2-h2s5trumpich/4.0.2-gcc
mpich/4.1-intel
+

Running MPI Jobs on Eagle GPUs#

+

To run MPI (message-passing interface) jobs on the Eagle system's NVidia GPUs, the MPI library must be "CUDA-aware." +A suitable OpenMPI build has been made available via the openmpi/4.0.4/gcc+cuda module. +This module is currently in test.

+

Interactive Use#

+

srun does not work with this OpenMPI build when running interactively, so please use orterun instead. +However, OpenMPI is cognizant of the Slurm environment, so one should request the resources needed via salloc (for example, the number of available "slots" is determined by the number of tasks requested via salloc). +Ranks are mapped round-robin to the GPUs on a node. +nvidia-smi shows, for example,

+

Processes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GPUPIDTypeProcess nameGPU Memory Usage
024625C./jacobi803MiB
024627C./jacobi803MiB
124626C./jacobi803MiB
+

when oversubscribing 3 ranks onto the 2 GPUs via the commands

+
srun --nodes=1 --ntasks-per-node=3 --account=<allocation_id> --time=10:00 --gres=gpu:2 --pty $SHELL
+...<getting node>...
+orterun -np 3 ./jacobi
+
+

If more ranks are desired than were originally requested via srun, the OpenMPI flag --oversubscribe could be added to the orterun command.

+

Batch Use#

+

An example batch script to run 4 MPI ranks across two nodes is as follows.

+
+batch script +
#!/bin/bash --login
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=2
+#SBATCH --time=2:00
+#SBATCH --gres=gpu:2
+#SBATCH --job-name=GPU_MPItest
+#SBATCH --account=<allocation_id>
+#SBATCH --error=%x-%j.err
+#SBATCH --output=%x-%j.out
+
+ml use -a /nopt/nrel/apps/modules/test/modulefiles
+ml gcc/8.4.0 cuda/10.2.89 openmpi/4.0.4/gcc+cuda
+
+cd $SLURM_SUBMIT_DIR
+srun ./jacobi
+
+
+

Multi-Process Service#

+

To run multiple ranks per GPU, you may find it beneficial to run NVidia's Multi-Process Service. This process management service can increase GPU utilization, reduce on-GPU storage requirements, and reduce context switching. To do so, include the following functionality in your Slurm script or interactive session:

+

MPS setup#

+
+MPS setup +
export CUDA_MPS_PIPE_DIRECTORY=/tmp/scratch/nvidia-mps
+if [ -d $CUDA_MPS_PIPE_DIRECTORY ]
+then
+   rm -rf $CUDA_MPS_PIPE_DIRECTORY
+fi
+mkdir $CUDA_MPS_PIPE_DIRECTORY
+
+export CUDA_MPS_LOG_DIRECTORY=/tmp/scratch/nvidia-log
+if [ -d $CUDA_MPS_LOG_DIRECTORY ]
+then
+   rm -rf $CUDA_MPS_LOG_DIRECTORY
+fi
+mkdir $CUDA_MPS_LOG_DIRECTORY
+
+# Start user-space daemon
+nvidia-cuda-mps-control -d
+
+# Run OpenMPI job.
+orterun ...
+
+# To clean up afterward, shut down daemon, remove directories, and unset variables
+echo quit | nvidia-cuda-mps-control
+for i in `env | grep CUDA_MPS | sed 's/=.*//'`; do rm -rf ${!i}; unset $i; done
+
+
+

For more information on MPS, see the NVidia guide.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Programming_Models/openacc/index.html b/Documentation/Development/Programming_Models/openacc/index.html new file mode 100644 index 000000000..57d9eb73c --- /dev/null +++ b/Documentation/Development/Programming_Models/openacc/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Openacc - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Openacc

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/Programming_Models/openmp/index.html b/Documentation/Development/Programming_Models/openmp/index.html new file mode 100644 index 000000000..5ed169dc8 --- /dev/null +++ b/Documentation/Development/Programming_Models/openmp/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Openmp - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Openmp

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Development/VSCode/vscode/index.html b/Documentation/Development/VSCode/vscode/index.html new file mode 100644 index 000000000..dfc15ab1e --- /dev/null +++ b/Documentation/Development/VSCode/vscode/index.html @@ -0,0 +1,5075 @@ + + + + + + + + + + + + + + + + + + + + + + + Connecting With VS Code - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Connecting With VS Code#

+

Microsoft Visual Studio Code (VS Code) is a popular tool for development in many programming languages, and may be used on HPC systems. However, there are some caveats to be aware of when running it remotely.

+

Connecting with VS Code#

+

To connect to an HPC system with VS Code, install the "Remote - SSH" extension from the Extensions menu.

+

Press "F1" to open the command bar, and type or search for Remote-SSH: Connect to Host...

+

You may then enter your HPC username and the address of an HPC system to connect to.

+
    +
  • +

    To connect to Kestrel from the NREL VPN, enter username@kestrel.hpc.nrel.gov, replacing "username" with your HPC user name.

    +
  • +
  • +

    To connect to Kestrel as an external collaborator, enter username@kestrel.nrel.gov, replacing "username" with your HPC user name.

    +
  • +
+

Enter your HPC password (or password and OTP code if external) and you will be connected to a login node. You may open a folder on the remote host to browse your home directory and select files to edit, and so on.

+

Caution About VS Code Processes#

+

Please be aware that the Remote SSH extension runs processes on the remote host. This includes any extensions or helpers, include language parsers, code analyzers, AI code assistants, and so on. These extensions can take up a considerable amount of CPU and RAM on any remote host that VS Code connects to. Jupyter notebooks loaded through VS Code will also be executed on the remote host and can use excessive CPU and RAM, as well. When the remote host is a shared login node on an HPC system, this can be a considerable drain on the resources of the login node, and cause system slowdowns for all users of that login node.

+

This problem can be circumvented by using a compute node to run VS Code. This will cost AU, but will allow for full resource usage of CPU and/or RAM.

+

Kestrel#

+

Using VS Code on a compute node will require adding an ssh key.

+

SSH Key Setup#

+

You may use an existing key pair on your local computer/laptop, or create one with ssh-keygen (adding -t ed25519 is optional, but recommended.)

+

We recommend choosing a strong passphrase and storing it in a password manager. The passphrase on your key will allow you to log in via ssh, but it is not the same as your HPC account password.

+
+

SSH Key Pair Caution

+

Do not replace the key pair in your Kestrel home directory. These keys are generated when you log into the cluster, and are used by Slurm jobs to communicate between nodes. There is a corresponding public key entry in your cluster home directory ~/.ssh/authorized_keys that must also be left in place.

+
+
+

Reminder About Passwords

+

Using an SSH key with an SSH agent can remove the need to use a password to SSH to Kestrel. However, not all HPC services (including Lex) use SSH keys. An SSH key does NOT replace your HPC account password. You must maintain a regular HPC account password in accordance with our Appropriate Use Policy and User Account Password Guidelines. Ignoring password expiration date notices will lead to automatic account lockouts, and you will need to contact HPC Support to restore your account.

+
+

Once you have a key pair on your local computer, use the ssh-copy-id <username>@kestrel.hpc.nrel.gov command to copy the public portion to Kestrel. This will add your public key to the ~/.ssh/authorized_keys file in your Kestrel home directory. Alternatively, you may manually add the contents of your PUBLIC key file (for example, the contents of ~/.ssh/id_ed25519.pub or ~/.ssh/id_rsa.pub) onto the end of this file. Do not delete the existing entries in these files on Kestrel.

+

Editing the VS Code SSH Config File#

+

We will now create a host entry in your local ssh config file to make connecting to Kestrel compute nodes easier.

+

Use the remote-ssh command to edit your VS Code ssh config file (~/.ssh/config). Add the following:

+
Host x?00?c*
+    ProxyJump <username>@kestrel.hpc.nrel.gov
+
+

This create a "wildcard" entry that should match Kestrel compute node names. Any time an ssh command is issued on your computer that matches the wildcard, the ssh connection will "jump" through a Kestrel login node and directly to the compute node.

+

If your allocation is finished on Kestrel (e.g. at the end of the FY and your allocation will not be continuing to the next) or you otherwise anticipate no further need to use VS Code with Kestrel in this fashion, you may delete this entry from your SSH config file.

+

Start a Job and Connect VS Code#

+

SSH to Kestrel as usual (outside of VS Code) and use sbatch or salloc to start a job. (An interactive job with salloc is suggested, using a --time limited to only the expected duration of your working session with VS Code.)

+

Wait until the job has started running, and take note of the node assigned to the job. Put the terminal aside, but leave the job running.

+

Now use the Remote-SSH extension in VS Code to Connect to Host... and use the hostname of the node that your job was assigned. For example, <username>@x1000c0s0b0n1.

+

This should open a new VS Code window that will connect to the compute node automatically. You may begin browsing your home directory and editing files in the VS Code window.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Environment/Building_Packages/acquire/index.html b/Documentation/Environment/Building_Packages/acquire/index.html new file mode 100644 index 000000000..35b77e22f --- /dev/null +++ b/Documentation/Environment/Building_Packages/acquire/index.html @@ -0,0 +1,4891 @@ + + + + + + + + + + + + + + + + + + + + + + + Getting the package - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Getting the package#

+
    +
  1. +

    Change working directory to the location where you'll build the package. A convenient location is /scratch/$USER, which we'll use for this example. cd /scratch/$USER

    +
  2. +
  3. +

    OpenMPI can be found at https://www.open-mpi.org/software/ompi/. This will automatically redirect you to the latest version, but older releases can be seen in the left menu bar. For this, choose version 4.1.

    +
  4. +
  5. +

    There are several packaging options. +Here, we'll get the bzipped tarball openmpi-4.1.0.tar.bz2. +You can either download it to a local machine (laptop) and then scp the file over to the HPC cluster, or get it directly on the supercomputer using wget. +

    wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.0.tar.bz2
    +
    +You should now have a compressed tarball in your scratch directory.

    +
  6. +
  7. List the contents of the tarball before unpacking. +This is very useful to avoid inadvertently filling a directory with gobs of files and directories when the tarball has them at the top of the file structure), +
    tar -tf openmpi-4.1.0.tar.bz2
    +
  8. +
  9. Unpack it via +
    tar -xjf openmpi-4.1.0.tar.bz2
    +
    +If you're curious to see what's in the file as it unpacks, add the -v option.
  10. +
  11. You should now have an openmpi-4.1.0 directory. +cd openmpi-4.1.0, at which point you are in the top level of the package distribution. +You can now proceed to configuring, making, and installing.
  12. +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Environment/Building_Packages/config_make_install/index.html b/Documentation/Environment/Building_Packages/config_make_install/index.html new file mode 100644 index 000000000..c61fea0e8 --- /dev/null +++ b/Documentation/Environment/Building_Packages/config_make_install/index.html @@ -0,0 +1,4927 @@ + + + + + + + + + + + + + + + + + + + + + + + Config, Make, Install - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Configuring your build#

+
    +
  1. +

    We will illustrate a package build that relies on the popular autotools system. +Colloquially, this is the configure; make; make install process that is often encountered first by those new to package builds on Linux. +Other build systems like CMake (which differ primarily in the configuration steps) won't be covered. +If you need to build a package that relies on CMake, please contact hpc-help@nrel.gov for assistance.

    +
  2. +
  3. +

    We'll use GCC version 8.4.0 for this illustration, so load the associated module first (i.e., gcc/8.4.0).

    +
  4. +
+
+Building on Kestrel +

You can use any version of GCC available to you on Kestrel. + The paths in step 3 are for Eagle, please make the necessary changes for Kestrel.

+
+
    +
  1. +

    Now that you've acquired and unpacked the package tarball and changed into the top-level directory of the package, you should see a script named "configure". +In order to see all available options to an autotools configure script, use ./configure -h (don't forget to include the ./ explicit path, otherwise the script will not be found in the default Linux search paths, or worse, a different script will be found).

    +

    We will build with the following command: +

    ./configure --prefix=/scratch/$USER/openmpi/4.1.0-gcc-8.4.0 --with-slurm --with-pmi=/nopt/slurm/current --with-gnu-ld --with-lustre --with-zlib --without-psm --without-psm2 --with-ucx --without-verbs --with-hwloc=external --with-hwloc-libdir=/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hwloc-1.11.11-mb5lwdajmllvrdtwltwe3r732aca76ny/lib --enable-cxx-exceptions --enable-mpi-cxx --enable-mpi-fortran --enable-static LDFLAGS="-L/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/11.0.2-4x2ws7fkooqbrerbsnfbzs6wyr5xutdk/lib64 -L/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/cuda-11.0.2-4x2ws7fkooqbrerbsnfbzs6wyr5xutdk/lib64 -Wl,-rpath=/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hwloc-1.11.11-mb5lwdajmllvrdtwltwe3r732aca76ny/lib -Wl,-rpath=/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/cuda-11.0.2-4x2ws7fkooqbrerbsnfbzs6wyr5xutdk/lib64" CPPFLAGS=-I/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hwloc-1.11.11-mb5lwdajmllvrdtwltwe3r732aca76ny/include
    +
    +These options are given for the following reasons.

    +
      +
    • --prefix= : This sets the location that "make install" will ultimately populate. If this isn't given, generally the default is to install into /usr or /usr/local, both of which require privileged access. We'll set up the environment using environment modules to point to this custom location.
    • +
    • --with-slurm : Enables the interface with the Slurm resource manager
    • +
    • --with-pmi= : Point to the Process Management Interface, the abstraction layer for MPI options
    • +
    • --with-gnu-ld : Letting the build system know that linking will be done with GNU's linker, rather than a commercial or alternative open one.
    • +
    • --with-lustre : Enable Lustre features
    • +
    • --with-zlib : Enable compression library
    • +
    • --without-psm[2] : Explicitly turn off interfaces to Intel's Performance Scaled Messaging for the now-defunct Omni-Path network
    • +
    • --with-ucx= : Point to UCX, an intermediate layer between the network drivers and MPI
    • +
    • --without-verbs= : For newer MPIs, communications go through UCX and/or libfabric, not directly to the Verbs layer
    • +
    • --with-hwloc[-libdir]= : Point to a separately built hardware localization library for process pinning
    • +
    • --enable-cxx-exceptions, --enable-mpi-cxx : Build the C++ interface for the libraries
    • +
    • --enable-mpi-fortran : Build the Fortran interface for the libraries
    • +
    • --enable-static : Build the .a archive files for static linking of applications
    • +
    • LDFLAGS : -L options point to non-standard library locations. -Wl,-rpath options embed paths into the binaries, so that having LD_LIBRARY_PATH set correctly is not necessary (i.e., no separate module for these components).
    • +
    • CPPFLAGS : Point to header files in non-standard locations.
    • +
    +

    NOTE: The CUDA paths are not needed for CUDA function per se, but the resulting MPI errors out without setting them. +There appears to be a lack of modularity that sets up a seemingly unneeded dependency.

    +

    After lots of messages scroll by, you should be returned to a prompt following a summary of options. +It's not a bad idea to glance through these, and make sure everything makes sense and is what you intended.

    +
  2. +
  3. +

    Now that the build is configured, you can "make" it. +For packages that are well integrated with automake, you can speed the build up by parallelizing it over multiple processes with the -j # option. +If you're building this on a compute node, feel free to set this option to the total number of cores available. +On the other hand, if you're using a login node, be a good citizen and leave cores available for other users (i.e., don't use more than 4; Arbiter should limit access at any rate regardless of this setting).

    +
    make -j 4
    +
    +
  4. +
  5. +

    Try a make check and/or a make test. +Not every package enables these tests, but if they do, it's a great idea to run these sanity checks to find if your build is perfect, maybe-good-enough, or totally wrong before building lots of other software on top of it.

    +
  6. +
  7. +

    Assuming checks passed if present, it's now time for make install. +Assuming that completes without errors, you can move onto creating an environment module to use your new MPI library.

    +
  8. +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Environment/Building_Packages/index.html b/Documentation/Environment/Building_Packages/index.html new file mode 100644 index 000000000..72e6c7dbc --- /dev/null +++ b/Documentation/Environment/Building_Packages/index.html @@ -0,0 +1,4913 @@ + + + + + + + + + + + + + + + + + + + + + + + Building Packages - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Building packages on NREL HPC for individual or project use.#

+

This training module will walk through how to build a reasonably complex package, OpenMPI, and deploy +it for use by yourself or members of a project.

+
    +
  1. +

    Acquire the package and set up for build

    +
  2. +
  3. +

    Configure, build, and install the package

    +
  4. +
  5. +

    Setting up your own environment module

    +
  6. +
+

Why build your own application?#

+
    +
  • +

    Sometimes, the package version that you need, or the capabilities you want, +are only available as source code.

    +
  • +
  • +

    Other times, a package has dependencies on other ones with application programming interfaces that change rapidly. +A source code build might have code to adapt to the (older, newer) libraries you have available, whereas a binary distribution will likely not. +In other cases, a binary distribution may be associated with a particular Linux distribution and version different from Kestrel's or Eagle's. +One example is a package for Linux version X+1 (with a shiny new libc). +If you try to run this on Linux version X, you will almost certainly get errors associated with the GLIBC version required. +If you build the application against your own, older libc version, those dependencies are not created.

    +
  • +
  • +

    Performance; for example, if a more performant numerical library is available, you may be able to link against it. +A pre-built binary may have been built against a more universally available but lower performance library. +The same holds for optimizing compilers.

    +
  • +
  • +

    Curiosity to know more about the tools you use.

    +
  • +
  • +

    Pride of building one's tools oneself.

    +
  • +
  • +

    For the sheer thrill of building packages.

    +
  • +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Environment/Building_Packages/modules/index.html b/Documentation/Environment/Building_Packages/modules/index.html new file mode 100644 index 000000000..f5269fcf3 --- /dev/null +++ b/Documentation/Environment/Building_Packages/modules/index.html @@ -0,0 +1,4917 @@ + + + + + + + + + + + + + + + + + + + + + + + Create a Modulefile - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Setting up your module#

+
    +
  1. +

    Now that the package has been installed to your preferred location, we can set up an environment module.

    +

    a. If this is your first package, then you probably need to create a place to collect modulefiles. +For example, mkdir -p /scratch/$USER/modules/default.

    +

    b. You can look at the systems module collection(s), e.g., /nopt/nrel/apps/modules/default/modulefiles on Eagle or /nopt/nrel/apps/modules/default on Kestrel, to see how modules are organized from a filesystem perspective. +In short, each library, application, or framework has its own directory in the modulefiles directory, and the modulefile itself sits either in this directory, or one level lower to accomodate additional versioning. +In this example, there is the MPI version (4.1.0), as well as the compiler type and version (GCC 8.4.0) to keep track of. +So, we'll make a /scratch/$USER/modules/default/openmpi/4.1.0 directory, and name the file by the compiler version used to build (gcc-8.4.0). +You're free to modify this scheme to suit your own intentions.

    +

    c. In the openmpi/4.1.0/gcc840 directory you just made, or whatever directory name you chose, goes the actual modulefile. +It's much easier to copy an example from the system collection than to write one de novo, so you can do

    +
    +On Eagle +
    cp /nopt/nrel/apps/modules/default/modulefiles/openmpi/4.0.4/gcc-8.4.0.lua /scratch/$USER/modules/default/openmpi/4.1.0/.
    +
    +
    +
    +On Eagle +
    cp /nopt/nrel/apps/modules/default/compilers_mpi/openmpi/4.1.5-gcc /scratch/$USER/modules/default/openmpi/4.1.0/.
    +
    +
    +
    +OpenMpi modulefile on Kestrel +

    Please note that the OpenMpi modulefile on Kestrel is of TCL type + It is not necessary for you to know the language to modify our examples.

    +
    +

    The Lmod modules system uses the Lua language natively for module code. +Tcl modules will also work under Lmod, but don't offer quite as much flexibility.

    +

    d. For this example, (a) the OpenMPI version we're building is 4.1.0 instead of 4.0.4 on Eagle or 4.1.5 on Kestrel, and (b) the location is in /scratch/$USER, rather than /nopt/nrel/apps. +So, edit /scratch/$USER/modules/default/openmpi/4.1.0/gcc-8.4.0.lua to make the required changes. +Most of these changes only need to be made at the top of the file; variable definitions take care of the rest.

    +

    e. Now you need to make a one-time change in order to see modules that you put in this collection (/scratch/$USER/modules/default). +In your $HOME/.bash_profile, add the following line near the top:

    +
    module use /scratch/$USER/modules/default
    +
    +

    Obviously, if you've built packages before and enabled them this way, you don't have to do this again!

    +
  2. +
  3. +

    Now logout, log back in, and you should see your personal modules collection with a brand new module.

    +
    [$USER@el1 ~]$ module avail
    +
    +---------------------------------- /scratch/$USER/modules/default -----------------------------------
    +openmpi/4.1.0/gcc-8.4.0
    +
    +

    Notice that the ".lua" extension does not appear--the converse is also true, if the extension is missing it will not appear via module commands! +As a sanity check, it's a good idea to load the module, and check that an executable file you know exists there is in fact on your PATH:

    +
    [$USER@el1 ~]$ module load openmpi/4.1.0/gcc-8.4.0
    +[$USER@el1 ~]$ which mpirun
    +/scratch/$USER/openmpi/4.1.0-gcc-8.4.0/bin/mpirun
    +
    +
  4. +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Environment/Customization/conda/index.html b/Documentation/Environment/Customization/conda/index.html new file mode 100644 index 000000000..573df08ea --- /dev/null +++ b/Documentation/Environment/Customization/conda/index.html @@ -0,0 +1,5362 @@ + + + + + + + + + + + + + + + + + + + + + + + Conda - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Conda

+ +

conda logo

+

Why Conda?#

+

Conda is a package manager which allows you to easily create and switch between different software environments in different languages for different purposes. With Conda, it's easy to:

+
    +
  • +

    Manage different (potentially conflicting) versions of the same software without complication

    +
  • +
  • +

    Quickly stand up even complicated dependencies for stacks of software

    +
  • +
  • +

    Share your specific programming environment with others for reproducible results

    +
  • +
+

Conda Module#

+

To use Conda on an NREL HPC cluster, you will need to load the appropriate module. To find the Conda module name on the system you are using, run module spider conda. Then, module load <module name>.

+

Creating Environments by Name#

+

To create a basic Conda environment, we'll start by running

+
conda create --name mypy python
+
+

where the --name option (or the shortened -n) means the environment will be specified by name and myenv will be the name of the created environment. Any arguments following the environment name are the packages to be installed.

+

To specify a specific version of a package, simply add the version number after the "=" sign

+
conda create --name mypy37 python=3.7
+
+

You can specify multiple packages for installation during environment creation

+
conda create --name mynumpy python=3.7 numpy
+
+

Conda ensures dependencies are satisfied when installing packages, so the version of the numpy package installed will be consistent with Python 3.7 (and any other packages specified).

+
+

Tip

+

It’s recommended to install all the packages you want to include in an environment at the same time to help avoid dependency conflicts.

+
+

Environment Navigation#

+

To see a list of all existing environments (useful to confirm the successful creation of a new environment):

+
conda env list
+
+

To activate your new environment:

+
conda activate mypy
+
+

Your usual command prompt should now be prefixed with (mypy), which helps keep track of which environment is currently activated.

+

To see which packages are installed from within a currently active environment:

+
conda list
+
+

When finished with this programming session, deactivate your environment with:

+
conda deactivate
+
+

Creating Environments by Location#

+

Creating environments by location is especially helpful when working on the HPC systems, as the default location is your /home/<username>/ directory, which is limited to 50 GB. To create a Conda environment somewhere besides the default location, use the --prefix flag (or the shortened -p) instead of --name when creating:

+
conda create --prefix /path/to/mypy python=3.7 numpy
+
+

This re-creates the python+numpy environment from earlier, but with all downloaded packages stored in the specified location.

+
+

Warning

+

Keep in mind that /scratch/<username> is temporary, and files are purged after 28 days of inactivity.

+
+

Unfortunately, placing an environment outside of the default folder means that it needs to be activated with the full path (conda activate /path/to/mypy) and will show the full path rather than the environment name at the command prompt.

+

To fix the cumbersome command prompt, simply modify the env_prompt setting in your .condarc file:

+
conda config --set env_prompt '({name}) '
+
+

Note that '({name})' is not a placeholder for your desired environment name but text to be copied literally. This will edit your .condarc file if you already have one or create a .condarc file if you do not. For more on modifying your .condarc file, check out the User Guide. Once you've completed this step, the command prompt will show the shortened name (mypy, in the previous example).

+

Managing Conda Environments#

+

Over time, it may become necessary to add additional packages to your environments. New packages can be installed in the currently active environment with:

+
conda install pandas
+
+

Conda will ensure that all dependencies are satisfied which may include upgrades to existing packages in this repository. To install packages from other sources, specify the channel option:

+
conda install --channel conda-forge fenics
+
+

To add a pip-installable package to your environment:

+
conda install pip
+pip <pip_subcommand>
+
+
+

Warning: Mixing Conda and Pip

+

Issues may arise when using pip and conda together. When combining conda and pip, it is best to use an isolated conda environment. Only after conda has been used to install as many packages as possible should pip be used to install any remaining software. If modifications are needed to the environment, it is best to create a new environment rather than running conda after pip. When appropriate, conda and pip requirements should be stored in text files.

+

For more information, see the User Guide.

+
+

We can use conda list to see which packages are currently installed, but for a more version-control-flavored approach:

+
conda list --revisions
+
+

which shows changes to the environment over time. To revert back to a previous environment

+
conda install --revision 1
+
+

To remove packages from the currently activated environment:

+
conda remove pkg1
+
+

To completely remove an environment and all installed packages:

+
conda remove --name mypy --all
+
+

Conda environments can become large quickly due to the liberal creation of cached files. To remove these files and free up space you can use

+
conda clean --all
+
+

or to simply preview the potential changes before doing any actual deletion

+
conda clean --all --dry-run
+
+

Sharing Conda Environments#

+

To create a file with the the exact "recipe" used to create the current environment:

+
conda env export > environment.yaml
+
+

In practice, this recipe may be overly-specific to the point of creating problems on different hardware. To save an abbreviated version of the recipe with only the packages you explicitly requested:

+
conda env export --from-history > environment.yaml
+
+

To create a new environment with the recipe specified in the .yaml file:

+
conda env create --name mypyhpc --file environment.yaml
+
+

If a name or prefix isn't specified, the environment will be given the same name as the original environment the recipe was exported from (which may be desirable if you're moving to a different computer).

+

Speed up dependency solving#

+

To speed up dependency solving, substitute the mamba command for conda. Mamba is a dependency solver written in C++ designed to speed up the conda environment solve.

+
mamba create --prefix /path/to/mypy python=3.7 numpy
+
+

HPC Considerations#

+

Migrating from local to HPC system#

+

Interacting with your Conda environments on the HPC systems should feel exactly the same as working on your desktop. An example desktop-to-HPC workflow might go:

+
    +
  1. Create the environment locally
  2. +
  3. Verify that environment works on a minimal working example
  4. +
  5. Export local environment file and copy to HPC system (conda env export > environment.yaml)
  6. +
  7. Duplicate local environment on HPC system (conda env create -f environment.yaml)
  8. +
  9. Execute production-level runs on HPC system:
  10. +
+
#!/bin/bash 
+#SBATCH --nodes=1
+#SBATCH --time=60
+#SBATCH --account=<project_handle>
+
+module purge
+module load conda
+conda activate mypy
+
+python my_main.py
+
+

Where to store Conda environments#

+

By default, the conda module uses the home directory for package caches and named environments. This can cause problems on the HPC systems because conda environments can require a lot of storage space, and home directories have a quota of 50GB. Additionally, the home filesystem is not designed to handle heavy I/O loads, so if you're running a lot of jobs or large multi-node jobs calling conda environments that are stored in home, it can strain the filesystem.

+

Some ways to change the default storage location for conda environments and packages:

+
    +
  • +

    Use the -p PATH_NAME switch when creating or updating your environment. Make sure PATH_NAME isn't in the home directory. Keep in mind files in /scratch are deleted after about a month of inactivity.

    +
  • +
  • +

    Change the directory used for caching. This location is set by the module file to ~/.conda-pkgs. A simple way to avoid filling up the home directory with cached conda data is to soft link a location on scratch to ~/.conda-pkgs, for example ln -s /scratch/$USER/.conda-pkgs /home/$USER/.conda-pkgs. Alternatively, you can call export CONDA_PKGS_DIRS=PATH_NAME to specify somewhere to store downloads and cached files such as /projects/<allocation handle>/$USER/.conda-pkgs.

    +
  • +
  • +

    Similarly, you can specify the directory in which environments are stored by default. To do this, either set the CONDA_ENVS_PATH environment variable, or use the --prefix option as described above.

    +
  • +
+
+

Warning

+

Overriding the default location for the environment and package cache directories in your .condarc file by setting envs_dirs and pkgs_dirs won't work as expected on Kestrel. When the conda module is loaded, it overrides these settings. Instead, set the environment variables after you load the conda module as described above.

+
+

Following are some guidelines and suggestions regarding where to store environments:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
PathWhen to useCaveats
/home$HOME/.conda is the default location for environments. For one-off environments, or if you don't create environments often, this is a reasonable location for your environments and doesn't require any extra flags or parameters.On systems such as Kestrel, $HOME is limited to 50 GB.
Not suited for multi-node jobs.
/scratch/scratch or /projects are well-suited for multiple-node jobs because these locations provide enhanced filesystem performance for parallel access.The contents of /scratch are purged after 28 days of inactivity.
/projectsIdeal location for storing environments that will be shared with colleagues that are working on the same project.Storage under /projects is contingent on having an HPC project allocation, and the project allocation has its own storage quota.
+

Cheat Sheet of Common Commands#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Task... outside environment... inside environment
Create by nameconda create -n mypy pkg1 pkg2N/A
Create by pathconda create -p path/to/mypy pkg1 pkg2N/A
Create by fileconda env create -f environment.yamlN/A
Show environmentsconda env listN/A
Activateconda activate mypyN/A
DeactivateN/Aconda deactivate
Install New Packageconda install -n mypy pkg1 pkg2conda install pkg1 pkg2
List All Packagesconda list -n mypyconda list
Revision Listingconda list --revisions -n mypyconda list --revisions
Export Environmentconda env export -n mypy > environment.yamlconda env export > environment.yaml
Remove Packageconda remove -n mypy pkg1 pkg2conda remove pkg1 pkg2
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Environment/Customization/images/conda_logo.png b/Documentation/Environment/Customization/images/conda_logo.png new file mode 100644 index 000000000..82a2e7567 Binary files /dev/null and b/Documentation/Environment/Customization/images/conda_logo.png differ diff --git a/Documentation/Environment/lmod/index.html b/Documentation/Environment/lmod/index.html new file mode 100644 index 000000000..44479ed76 --- /dev/null +++ b/Documentation/Environment/lmod/index.html @@ -0,0 +1,5038 @@ + + + + + + + + + + + + + + + + + + + + + + + Lmod - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Environment Modules#

+

The Lmod environment modules system is used to easily manage software environments. Modules facilitate the use of different versions of applications, libraries, and toolchains, which enables support of multiple package versions concurrently.

+

Modules typically just set environment variables that one might traditionally do manually by, for example, adding export or setenv commands to their login script. Modules add the ability to back out changes in an orderly manner as well, so users can change their environment in a reversible way. To learn how to build your own modules see Building an Application.

+

For system specific information on environments and modules, please visit the Systems section.

+

Common Module Commands#

+

The module command accepts parameters that enable users to inquire about and change the module environment. Most of the basic functionality can be accessed through the following commands.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
OptionDescription
spiderPrints available modules in a path-agnostic format.
availPrints available modules grouped by path.
listPrints all currently loaded modules.
display
'name'
Prints settings and paths specified for a particular module.
help 'name'Prints help message for a particular module.
load 'name'Loads particular module. For modules listed as the '(default)', the short package name is sufficient. To load another version of the package the long package name is required (e.g., module load fftw/3.3.8/gcc-7.3.0).
unload 'name'Unloads particular module.
swap
'name 1'
'name 2'
First unload modName1 and then load modName2.
use {-a}
A_PATH
Prefix {suffix} the path $A_PATH to your $MODULEPATH variable, in order to find modules in that location.
unuse {-a}
A_PATH
Remove the path $A_PATH from your $MODULEPATH variable.
+

Examples#

+
+Determining loaded modules +

To determine which modules are already loaded, run the command: +

$ module list
+

+
+
+Seeing available modules +

To get a list of available modules, type:

+
$ module avail
+
+

This should outut a full list of all modules and their versions in the system available for you to load. The modules denoted with (L) are already loaded in your environment. The module versions denoted with (D) are the default versions that will load if you do not specify the version when running module load.

+

To get a list of the available module defaults, type: +

$ module --default avail
+

+
+
+Loading and unloading a module +

$ module load <module_name>/<version>
+...
+$ module unload <module_name>/<version>
+...
+
+Here <module_name> is to be replaced by the name of the module to load. It is advised to ALWAYS include the full versioning in your load statements, and not rely on explicit or implicit default behaviors.

+
+
+Seeing module specifics +

It's a good idea to look at two other commands to see what a module does, and what software dependencies there are, as illustrated below:

+
$ module show <module_name>/<version>
+...
+$ module help <module_name>/<version>
+...
+
+

The environment variables set by the module can then be used in build scripts.

+

It is not necessary to load a module in order to use the module show command, this may be done at any time to see what a module does.

+
+
+Swap a module environment +

Module files for different versions can easily be swapped: +

$ module load openmpi/3.1.3/gcc-7.3.0
+$ module list
+Currently Loaded Modulefiles:
+1) openmpi/3.1.3/gcc-7.3.0
+$ module swap openmpi/3.1.3/gcc-7.3.0 openmpi/2.1.5/gcc-7.3.0
+$ module list
+Currently Loaded Modulefiles:
+1) openmpi/2.1.5/gcc-7.3.0
+

+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Environment/shell/index.html b/Documentation/Environment/shell/index.html new file mode 100644 index 000000000..1d1b51184 --- /dev/null +++ b/Documentation/Environment/shell/index.html @@ -0,0 +1,5396 @@ + + + + + + + + + + + + + + + + + + + + + + + Shell Startup - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Shell Startup#

+

When you login to a linux based machine you interact with the operating system via a program called a shell. There are various types of shell programs. One of the more common is bash. Bash is the default shell on NREL's HPC platforms. This document describes ways you can customize your shell's, in particular, bash's behavior.

+

Getting Started#

+

When you have a window open attached to a platform you are actually running a program on the remote computer, called a shell. There are various types of shell programs. One of the more common is bash.

+

The shell program provides your link to the machine's operating system (OS). It is the interface between a user and the computer. It controls the computer and provides output to the user. There are various types of interfaces but here we discuss the command line interface. That is, you type commands and the computer responds.

+

What happens on login#

+

When you login to a machine you are put in your home directory. You can see this by running the command pwd. Run the command ls -a to get a listing of the files. The -a option for the ls commands enables it to show files that are normally hidden. You'll see two important files that are used for setting up your environment.

+
    +
  • .bash_profile
  • +
  • .bashrc
  • +
+

These files are added to your home directory when your account is created.

+

When you login the file .bash_profile is sourced (run) to set up your environment. The environment includes settings for important variables, command aliases, and functions.

+

Here is the default version of .bash_profile.

+
[nreluser@el3 ~]$ cat ~/.bash_profile
+# .bash_profile
+
+# Get the aliases and functions
+if [ -f ~/.bashrc ]; then
+    . ~/.bashrc
+fi
+
+# User specific environment and startup programs
+
+PATH=$PATH:$HOME/.local/bin:$HOME/bin
+
+export PATH
+
+

We'll discuss this file starting at the bottom. The environmental variable PATH is set. PATH points to directories where the computer will look for commands to run. You can append directories as show here. The "new" PATH will be the PATH set at the system level plus the directories $HOME/.local/bin and $HOME/bin where $HOME is your home directory.

+

Notice the lines

+
if [ -f ~/.bashrc ]; then
+    . ~/.bashrc
+fi
+
+

The "if" statement says that if you have a file .bashrc in your home directory then run it. The dot is shorthand for "source" and ~/ is shorthand for your home directory.

+

So lets look at the default ~/.bashrc file

+
[nreluser@el3 ~]$ cat /etc/skel/.bashrc
+# .bashrc
+
+# Source global definitions
+if [ -f /etc/bashrc ]; then
+    . /etc/bashrc
+fi
+
+# Uncomment the following line if you don't like systemctl's auto-paging feature:
+# export SYSTEMD_PAGER=
+
+# User specific aliases and functions
+
+

This just runs the system version of bashrc.

+

Note in both of these files we have a place where you are encouraged to add user defined aliases and functions. You can also set environmental variables, such as PATH and a related variable LD_LIBRARY_PATH. You may want to load modules which also set environmental variables.

+

Suggestions (Philosophy)#

+

We're going to discuss customizing your environment. This is done by editing these two files. Before we do that here are three suggestions.

+
    +
  1. If you are new to linux use the nano editor
  2. +
  3. Make a backup of your current versions of the two files
  4. +
  5. Make additions in external files
  6. +
+

Nano is an easy to learn and use text editor. The official web page for nano is https://www.nano-editor.org. There are many on line tutorials. There are other editors available but nano is a good starting point.

+

It is very easy to make mistakes when doing edits or you just might want to go back to a previous version. So back it up. Here are commands to do so.

+

[hpcuser2@eyas1 ~]$ NOW=`date +"%y%m%d%H%M"`
+[hpcuser2@eyas1 ~]$ echo $NOW
+2303221513
+[hpcuser2@eyas1 ~]$ cp .bashrc bashrc.$NOW
+[hpcuser2@eyas1 ~]$ cp .bash_profile bash_profile.$NOW
+
+The first command creates a date/time stamp. The last commands copy files using the date/time stamp as part of the filename.

+
[hpcuser2@eyas1 ~]$ ls *2303221513
+bash_profile.2303221513  bashrc.2303221513
+[hpcuser2@eyas1 ~]$ 
+
+

In most cases you won't need to edit both .bashrc and .bash_profile. Since running .bash_profile runs .bashrc you can usually just edit .bashrc. (See the section Difference between login and interactive shells which describes cases where .bashrc is run even if .bash_profile is not.)

+

Instead of adding a bunch of text to .bashrc make your additions in an external file(s) and just source those files inside of .bashrc. The you can "turn off" additions by just commenting out the source lines. Also, you can test additions by sourcing the file from the command lines.

+

Additions#

+

The most common additions to your environment fall into these categories:

+
    +
  1. Setting variables
  2. +
  3. Creating Aliases
  4. +
  5. Loading modules
  6. +
  7. Adding Functions
  8. +
+

We'll discuss each. We're going to assume that you created a directory ~/MYENV and in that directory you have the files:

+
    +
  • myvars
  • +
  • myaliases
  • +
  • mymods
  • +
  • myfuncs
  • +
+

Then to enable all of your additions you can add the following lines to your .bashrc file

+
if [ -f ~/MYENV/myvars ];    then . ~/MYENV/myvars ;    fi
+if [ -f ~/MYENV/myaliases ]; then . ~/MYENV/myaliases ; fi
+if [ -f ~/MYENV/mymods ];    then . ~/MYENV/mymods ;    fi
+if [ -f ~/MYENV/myfuncs ];   then . ~/MYENV/myfuncs ;   fi
+
+

Note the additions will not take effect until you logout/login or until you run the command source ~/.bashrc Before going through the logout/login process you should test your additions by manually running these commands in the terminal window.

+

Setting variables#

+

We have discussed the PATH variable. It points to directories which contain programs. If you have an application that you built, say myapp in /projects/mystuff/apps you can add the line

+

export PATH=/projects/mystuff/apps:$PATH

+

to your ~/MYENV/myvars file. Then when you login the system will be able to find your application. The directories in path variables are seperated by a ":". If you forget to add $PATH to the export line the new PATH variable will be truncated and you will not see many "system" commands.

+

Another important variable is LD_LIBRARY_PATH. This points to directories containing libraries your applications need that are not "bundled" with your code. Assuming the libraries are in projects/mystuff/lib you would add the following line:

+

export LD_LIBRARY_PATH=/projects/mystuff/lib:$LD_LIBRARY_PATH

+

If you have a commercial application that requires a license server you may need to set a variable to point to it. For example

+

export LSERVER=license-1.hpc.nrel.gov:4691

+

Creating aliases#

+

Aliases are command short cuts. If there is a complicated command that you often you might want to crate an alias for it. You can get a list of aliases defined for you by just running the command alias. The syntax for an alias is:

+

alias NAME="what you want to do"

+

Here are a few examples that you could add to your ~/MYENV/myalias file.

+
#Show my running and queued jobs in useful format
+alias sq='squeue -u $USER --format='\''%10A%15l%15L%6D%20S%15P%15r%20V%N'\'''
+
+#Kill all my running and queued jobs
+alias killjobs="scancel -u $USER"
+
+#Get a list of available modules
+alias ma='module avail'
+
+#Get the "source" for a git repository
+alias git-home='git remote show origin'
+
+#Get a compact list of loaded modules
+alias mlist='module list 2>&1 |  egrep -v "Current|No modules loaded" | sed "s/..)//g"'
+
+

Loading modules#

+

Most HPC platforms run module systems. When you load a module changes some environmental variable setting. Often PATH and LD_LIBARAY_PATH are changed. In general loading a module will allow you to use a particular application or library.

+

If you always want gcc version 12 and python 3.10 in you path then you could add the following to your ~/MYENV/mymods file

+
module load gcc/12.1.0  
+module load python/3.10.2
+
+

Running the command module avail will show the modules installed on the system.

+

If you have modules that you created you can make them available to the load command by adding a command like the following in your ~/MYENV/mymods file.

+

module use /projects//mystuff/mods

+

The "module use" command needs to be before any module load command that loads your coustom modules.

+

Adding functions#

+

Functions are like aliases but in general multiline and more complex. You can run the command **compgen -A function ** to see a list of defined functions. Here are a few functions you might want to add to your environment

+
# given a name of a function or alias show its definition
+func () 
+{ 
+    typeset -f $1 || alias $1
+}
+
+# find files in a directory that changed today
+today () 
+{ 
+    local now=`date +"%Y-%m-%d"`;
+    if (( $# > 0 )); then
+        if [[ $1 == "-f" ]]; then
+            find . -type f -newermt $now;
+        fi;
+        if [[ $1 == "-d" ]]; then
+            find . -type d -newermt $now;
+        fi;
+    else
+        find . -newermt $now;
+    fi
+}
+
+

Most people who have worked in HPC for some time have collected many functions and alias they would be willing to share with you.

+

If you have a number of files in your ~/MYENV directory you want sourced at startup you can replace the set of 4 "if" lines shown above with a "for list" statemnet. The following will source every file in the directory. It will not source files in subdirectories within ~/MYENV. If you want to temporarly turn off additions you can put them in a subdirectory ~/MYENV/OFF. The find command shown here will return a list of files in the directory but not subdirectories. Again, recall that the changes will not be in effect until you logout/login.

+
for x in `find ~/MYENV  -type f` ; do
+   source $x 
+done
+
+

Difference between login and interactive shells#

+

This section is based in part on on https://stackoverflow.com/questions/18186929/what-are-the-differences-between-a-login-shell-and-interactive-shell

+

The shell that gets started when you open a window on a HPC is called a login shell. It is also an interactive shell in that you are using it to interact with the computer. Bash can also be run as a command. That is, if you enter bash as a command you will start a new instance of the bash shell. This new shell is an interactive shell but not a login shell because it was not used to do the login to the platform.

+

When you start a new interactive shell the file .bashrc is sourced. When you start a login shell the file .bash_profile is sourced. However, most versions of .bash_profile have a line that will also source .bashrc.

+

When you submit a slurm batch job with the command sbatch neither of the two files .bashrc or .bash_profile are sourced. Note, by default, the environment you have set up at the time you run sbatch is passed to the job.

+

When you start a slurm interactive session, for example using the command

+
salloc --nodes=1 --time=01:00:00 --account=$MYACCOUNT --partition=debug
+
+

the file .bashrc is sourced.

+

Troubleshooting#

+

The most common issue when modifying your environment is forgetting to add the previous version of PATH when you set a new one. For example

+

Do this:

+

export PATH=/projects/myapps:$PATH

+

Don't do this:

+

export PATH=/projects/myapps

+

If you do the second command you will lose access to most commands and you'll need to logout/login to restore access.

+

Always test additions before actually implementing them. If you use the files in ~/MYENV to modify your environment manually run the commands

+
if [ -f ~/MYENV/myvars ];    then . ~/MYENV/myvars ;    fi
+if [ -f ~/MYENV/myaliases ]; then . ~/MYENV/myaliases ; fi
+if [ -f ~/MYENV/mymods ];    then . ~/MYENV/mymods ;    fi
+if [ -f ~/MYENV/myfuncs ];   then . ~/MYENV/myfuncs ;   fi
+
+

to test things. After they are working as desired then add this lines to your .bashrc file. You can add a # to the lines in your .bashrc file to disable them.

+

There are copies of the default .bashrc and .bash_profile files in

+
    +
  • /etc/skel/.bash_profile
  • +
  • /etc/skel/.bashrc
  • +
+

Some commands#

+
man — Print manual or get help for a command  EXAMPLE: man ls
+man bash will show many "built in" commands in the shell
+
+ls — List directory contents
+  ls -a      Show all files, including hidden files
+  ls -l      Do a detailed listing
+  ls -R      Recursive listing, current directories subdirectories
+  ls  *.c    List files that end in "c"
+
+echo  - Prints text to the terminal window 
+
+mkdir — Create a directory
+
+pwd — Print working directory, that is give the name of your 
+      current directory.
+
+cd — Change directory
+  cd ~  Go to your home directory
+  cd .. Go up one level in the directory tree
+
+mv — Move or rename a file or directory directory
+
+nano - Edit a file. See above. 
+
+rm - Remove a file
+rm -r DIRECTORY will recursively remove a directory.
+      Use rm -rf very carefully !DO NOT! rm -rf ~  it will wipe out 
+      your home directory. 
+
+rmdir — Remove a directory. It must be empty to be removed. It's 
+        safer than rm -rf.
+
+less — view the contents of a text file
+
+> — redirect output from a command to a file.  Example ls > myfiles
+>> - same as > except it appends to the file
+> /dev/null A special case of > suppress normal output by sending 
+            it the the "null file"
+2> err 1> out    Send errors from a command to the file err and normal
+                 output to out
+
+1>both 2>&1 Send output and errors to the file "both"
+
+sort - Output a sorted version of a file.  Has many options.
+
+|  A pipe takes the standard output of one command and passes it as 
+   the input to another.  Example  cat mydata | sort
+
+cat — Read a file and send output to the terminal.  To concatenate files        
+      cat one two > combined
+
+head — Show the start of a file
+
+tail — Show the end of a file
+
+which - Show the location of a command.  EXAMPLE: which ls
+        Which will not show bash built in commands
+
+exit — Exit out of a shell, normally used to logout
+
+grep - search for a string in a file(s) or output
+
+history -  display the command history
+
+source  -  Read  and  execute  commands  from a file
+
+find - locate files/directories with particular characteristics.  Find 
+       has many options and capabilities.  "man find"will show all the
+       options.  However, an online search might be the best way to 
+       deterinine the options you want.
+
+find . -name "*xyz*"    Find all files, in the current directory and below that 
+                        have names that contain xyz.
+find .  -type f         Find all files, in the current directory and below.
+find .  -type d         Find all directories, in the current directory and below.
+find . -newermt `date +"%Y-%m-%d"` 
+                        Find files that have changed today.
+
+compgen                 Show various sets of commands
+compgen -a              list all bash aliases
+compgen -b              list bash builtin commands
+compgen -A function     list all the bash functions.
+compgen -k              list all the bash keywords
+compgen -c              list all commands available to you 
+compgen -c | grep file  Show commands that have "file" as part of the
+                        name
+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Machine_Learning/Containerized_TensorFlow/index.html b/Documentation/Machine_Learning/Containerized_TensorFlow/index.html new file mode 100644 index 000000000..7552c5531 --- /dev/null +++ b/Documentation/Machine_Learning/Containerized_TensorFlow/index.html @@ -0,0 +1,4981 @@ + + + + + + + + + + + + + + + + + + + + + Containerized TensorFlow - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Containerized TensorFlow

+ +

TensorFlow with GPU support - Apptainer#

+

This Apptainer image supplies TensorFlow 2.15.0 optimized for use with GPU nodes running CUDA > 12.3 (which works with Kestrel's H100s). It also includes opencv, numpy, pandas, seaborn, scikit-learn, and a number of other Python libraries. More information about Tensorflow's containerized images can be found on DockerHub.

+

For more information on Apptainer in general, on please see: Containers.

+

Quickstart#

+

After allocating a job, note that you will have to bind mount /nopt (where the image lives) as well as the parent directory of where you are working from (e.g., /scratch or /projects)

+
# Get allocation
+salloc --gres=gpu:2 -N 1 --mem=80G -n 32 -A <allocation handle> -t 01:00:00 -p debug
+# Run Apptainer in srun environment
+module load apptainer
+# Note that you will have to bind mount /nopt (where the image lives) as well as the parent directory of where you are working from (e.g., /scratch or /projects)
+cd /projects/<MY_HPC_PROJECT>
+srun --gpus=2 --pty apptainer shell -B /nopt:/nopt -B /projects:/projects --nv /nopt/nrel/apps/gpu_stack/ai_substack/tensorflow-2.17.0-gpu-jupyter.sif
+
+

Building a custom image based on TensorFlow#

+

In order to build a custom Apptainer image based on this one, Docker must be installed on your local computer. Please refer to our example Docker build workflow for HPC users for more information on how to get started.

+

This workflow is useful if you need to modify the prebuilt Tensorflow image for your own purposes (such as if you need extra Python libraries to be available). You can copy a requirements.txt into the container during buildtime and upload the resulting image to Kestrel, where it can be converted to Apptainer format for runtime.

+
    +
  1. Update Dockerfile shown below to represent the changes desired and save to working directory.
    +
    FROM tensorflow/tensorflow:2.17.0-gpu-jupyter
    +ENV DEBIAN_FRONTEND="noninteractive" 
    +RUN apt-get -y update
    +RUN apt-get -y install python3-opencv
    +RUN mkdir /custom_env
    +COPY requirements.txt /custom_env
    +RUN pip install -r /custom_env/requirements.txt
    +
  2. +
  3. Update requirements.txt shown below for changing the python library list and save to working directory. +
    seaborn
    +pandas
    +numpy
    +scikit-learn
    +git+https://github.com/tensorflow/docs
    +
  4. +
  5. Build new Docker image for x86_64. +
    docker build -t tensorflow-custom-tag-name . --platform=linux/amd64
    +
  6. +
  7. Follow the instructions here for exporting the Docker image to a .tar archive, uploading it to Kestrel, and using Apptainer to convert it to Apptainer format to run on HPC.
  8. +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Machine_Learning/Reinforcement_Learning/images/ppo_rew_comparison.png b/Documentation/Machine_Learning/Reinforcement_Learning/images/ppo_rew_comparison.png new file mode 100644 index 000000000..84bd63f55 Binary files /dev/null and b/Documentation/Machine_Learning/Reinforcement_Learning/images/ppo_rew_comparison.png differ diff --git a/Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpag-2.png b/Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpag-2.png new file mode 100644 index 000000000..f84e1d50f Binary files /dev/null and b/Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpag-2.png differ diff --git a/Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpage.png b/Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpage.png new file mode 100644 index 000000000..596878aa1 Binary files /dev/null and b/Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpage.png differ diff --git a/Documentation/Machine_Learning/Reinforcement_Learning/index.html b/Documentation/Machine_Learning/Reinforcement_Learning/index.html new file mode 100644 index 000000000..a424a2e73 --- /dev/null +++ b/Documentation/Machine_Learning/Reinforcement_Learning/index.html @@ -0,0 +1,5561 @@ + + + + + + + + + + + + + + + + + + + + + + + Reinforcement Learning - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Reinforcement Learning on Eagle#

+

Welcome to the first NREL HPC tutorial for Reinforcement Learning (RL)!

+

This tutorial covers an extended, albeit simplified, introduction of OpenAI Gym and Ray/RLlib which you can use to effortlessly design, create, and run your own RL experiments on Eagle.

+

You can find the full material of this tutorial in the NREL/HPC GitHub repo.

+

The tutorial covers the following:

+
    +
  • Brief introduction to RL and Ray
  • +
  • Agent training with Ray/RLlib:
      +
    • Experimenting with Ray Tune
    • +
    • Single node/Single core.
    • +
    • Single node/Multiple cores.
    • +
    • Multiple nodes.
    • +
    +
  • +
  • Run experiments using GPUs for policy learning (helpful for large-scale observation and/or action spaces)
  • +
+

Run OpenAI Gym on a single node/single core#

+

Login on your Eagle account, create a new Anaconda environment as described in the tutorial repo, and test your installation by running a small example using one of the standard Gym environments (e.g. CartPole-v0).

+

Activate the Anaconda enironment and start a Python session +

module purge
+conda activate /scratch/$USER/conda-envs/myenv
+python
+
+Then, run the following: +
import gym
+
+env = gym.ens.make("CartPole-v0")
+env.reset()
+
+done = False
+
+while not done:
+    action = env.action_space.sample()
+    obs, rew, done, _ = env.step(action)
+    print(action, obs, rew, done)
+
+If everything works correctly, you will see an output similar to: +
0 [-0.04506794 -0.22440939 -0.00831435  0.26149667] 1.0 False
+1 [-0.04955613 -0.02916975 -0.00308441 -0.03379707] 1.0 False
+0 [-0.05013952 -0.22424733 -0.00376036  0.2579111 ] 1.0 False
+0 [-0.05462447 -0.4193154   0.00139787  0.54940559] 1.0 False
+0 [-0.06301078 -0.61445696  0.01238598  0.84252861] 1.0 False
+1 [-0.07529992 -0.41950623  0.02923655  0.55376634] 1.0 False
+0 [-0.08369004 -0.61502627  0.04031188  0.85551538] 1.0 False
+0 [-0.09599057 -0.8106737   0.05742218  1.16059658] 1.0 False
+0 [-0.11220404 -1.00649474  0.08063412  1.47071687] 1.0 False
+1 [-0.13233393 -0.81244634  0.11004845  1.20427076] 1.0 False
+1 [-0.14858286 -0.61890536  0.13413387  0.94800442] 1.0 False
+0 [-0.16096097 -0.8155534   0.15309396  1.27964413] 1.0 False
+1 [-0.17727204 -0.62267747  0.17868684  1.03854806] 1.0 False
+0 [-0.18972559 -0.81966549  0.1994578   1.38158021] 1.0 False
+0 [-0.2061189  -1.0166379   0.22708941  1.72943365] 1.0 True
+
+Note that the above process does not involve any training.

+

Agent training with Ray/RLlib#

+

RL algorithms are notorious for the amount of data they need to collect in order to learn policies. The more data collected, the better the training will (usually) be. The best way to do it is to run many Gym instances in parallel and collecting experience, and this is where RLlib assists.

+

RLlib is an open-source library for reinforcement learning that offers both high scalability and a unified API for a variety of applications. It supports all known deep learning frameworks such as Tensorflow, Pytorch, although most parts are framework-agnostic and can be used by either one.

+

The RL policy learning examples provided in this tutorial demonstrate the RLlib abilities. For convenience, the CartPole-v0 OpenAI Gym environment will be used.

+

The most straightforward way is to create a Python "trainer" script. It will call the necessary packages, setup flags, and run the experiments, all nicely put in a few lines of Python code.

+

Import packages#

+

Begin trainer by importing the ray package: +

import ray
+from ray import tune
+
+Ray consists of an API readily available for building distributed applications. On top of it, there are several problem-solving libraries, one of which is RLlib.

+

Tune is also one of Ray's libraries for scalable hyperparameter tuning. All RLlib trainers (scripts for RL agent training) are compatible with Tune API, making experimenting easy and streamlined.

+

Import also the argparse package and setup some flags. Although that step is not mandatory, these flags will allow controlling of certain hyperparameters, such as:

+
    +
  • RL algorithm utilized (e.g. PPO, DQN)
  • +
  • Number of CPUs/GPUs
  • +
  • ...and others
  • +
+
import argparse
+
+

Create flags#

+

Begin by defining the following flags: +

parser.add_argument("--num-cpus", type=int, default=0)
+parser.add_argument("--num-gpus", type=int, default=0)
+parser.add_argument("--name-env", type=str, default="CartPole-v0")
+parser.add_argument("--run", type=str, default="DQN")
+parser.add_argument("--local-mode", action="store_true")
+
+All of them are self-explanatory, however let's see each one separately.

+
    +
  1. --num-cpus: Defines the number of CPU cores used for experience collection (Default value 0 means allocation of a single CPU core).
  2. +
  3. --num-gpus: Allocates a GPU node for policy learning (works only for Tensorflow-GPU). Except whole values (1,2,etc.), it also accepts partial values, in case 100% of the GPU is not necessary.
  4. +
  5. --name-env: The name of the OpenAI Gym environment.
  6. +
  7. --run: Specifies the RL algorithm for agent training.
  8. +
  9. --local-mode: Helps defining whether experiments running on a single core or multiple cores.
  10. +
+

Initialize Ray#

+

Ray is able to run either on a local mode (e.g. laptop, personal computer), or on a cluster.

+

For the first experiment, only a single core is needed, therefore, setup ray to run on a local mode. Then, set the number of CPU cores to be used.

+

Run experiments with Tune#

+

This is the final step in this basic trainer. Tune's tune.run function initiates the agent training process. There are three main arguments in this function:

+
    +
  • RL algorithm (string): It is defined in the --run flag (PPO, DQN, etc.).
  • +
  • stop (dictionary): Provides a criterion to stop training (in this example is the number of training iterations; stop training when iterations reach 10,000).
  • +
  • config (dictionary): Basic information for training, contains the OpenAI Gym environment name, number of CPUs/GPUs, and others.
  • +
+

tune.run(
+    args.run,
+    name=args.name_env,
+    stop={"training_iteration": 10000},
+    config={
+        "env": args.name_env,
+        "num_workers": args.num_cpus, 
+        "num_gpus": args.num_gpus,
+        "ignore_worker_failures": True
+        }
+    )
+
+The RLlib trainer is ready!

+

Except the aforementioned default hyperparameters, every RL algorithm provided by RLlib has its own hyperparameters and their default values that can be tuned in advance.

+

The code of the trainer in this example can be found in the tutorial repo.

+

Run experiments on Eagle#

+

Follow the steps in the tutorial repo carefully.

+

Run multi-core experiments#

+

The previous example is designed to run on a single CPU core. However, as explained above, RL training is highly benefited from running multiple concurrent OpenAI Gym rollouts. A single node on Eagle has 36 CPU cores, therefore use any number of those in order to speed up your agent training.

+

For all 36 cores, adjust the --num-cpus hyperparameter to reflect to all CPUs on the node: +

python simple_trainer.py --num-cpus 35
+
+Again, RLlib by default utilizes a single CPU core, therefore by putting --num-cpus equal to 35 means that all 36 cores are requested.

+

Such is not the case with the num_gpus key, where zero means no GPU allocation is permitted. This is because GPUs are used for policy training and not running the OpenAI Gym environment instances, thus they are not mandatory (although having a GPU node can assist the agent training by reducing training time).

+

Run experiments on multiple nodes#

+

Let's focus now on cases where the problem under consideration is highly complex and requires vast amounts of training data for training the policy network in a reasonable amount of time. It could be then, that you will require more than one nodes to run your experiments. In this case, it is better to use a slurm script file that will include all the necessary commands for agent train using multiple CPUs and multiple nodes.

+

Example: CartPole-v0#

+

As explained above, CartPole is a rather simple environment and solving it using multiple cores on a single node feels like an overkill, let alone multiple nodes! However, it is a good example for giving you an experience on running RL experiments using RLlib.

+

For multiple nodes it is more convenient to use a slurm script instead of an interactive node. Slurm files are submitted as sbatch <name_of_your_batch_script>, and the results are exported in an slurm-<job_id>.out file. The .out file can be interactively accessed during training using the tail -f slurm-<job_id>.out command. Otherwise, after training, open it using a standard text editor (e.g. nano). +Next, the basic parts of the slurm script file are given. The repo also provides the complete script.

+

The slurm file begins with defining some basic SBATCH options, including the desired training time, number of nodes, tasks per node, etc.

+

#!/bin/bash --login
+
+#SBATCH --job-name=cartpole-multiple-nodes
+#SBATCH --time=00:10:00
+#SBATCH --nodes=3
+#SBATCH --tasks-per-node=1
+#SBATCH --cpus-per-task=36
+#SBATCH --account=A<account>
+env
+
+Allocating multiple nodes means creating a Ray cluster. A Ray cluster consists of a head node and a set of worker nodes. The head node needs to be started first, and the worker nodes are given the address of the head node to form the cluster.

+

The agent training will run for 20 minutes (SBATCH --time=00:20:00), and on three Eagle CPU nodes (SBATCH --nodes=3). Every node will execute a single task (SBATCH --tasks-per-node=1), which will be executed on all 36 cores (SBATCH --cpus-per-task=36). Then, define the project account. Other options are also available, such as whether to prioritize the experiment (--qos=high).

+

Use the commands to activate the Anaconda environment. Do not forget to unset LD_PRELOAD. +

module purge
+conda activate /scratch/$USER/conda-envs/env_example
+unset LD_PRELOAD
+
+Set up the Redis server that will allow all the nodes you requested to communicate with each other. For that, set a Redis password: +
ip_prefix=$(srun --nodes=1 --ntasks=1 -w $node1 hostname --ip-address)
+port=6379
+ip_head=$ip_prefix:$port
+redis_password=$(uuidgen)
+
+Submit the jobs one at a time at the workers, starting with the head node and moving on to the rest of them. +
srun --nodes=1 --ntasks=1 -w $node1 ray start --block --head \
+--node-ip-address="$ip_prefix" --port=$port --redis-password=$redis_password &
+sleep 10
+
+echo "starting workers"
+for ((  i=1; i<=$worker_num; i++ ))
+do
+  node2=${nodes_array[$i]}
+  echo "i=${i}, node2=${node2}"
+  srun --nodes=1 --ntasks=1 -w $node2 ray start --block --address "$ip_head" --redis-password=$redis_password &
+  sleep 5
+done
+
+Set the Python script to run. Since this experiment will run on a cluster, Ray will be initialized as: +
ray.init(_redis_password=args.redis_password, address=os.environ["ip_head"])
+num_cpus = args.num_cpus - 1
+
+The --redis-password option must be active, along with the total number of CPUs: +
python -u simple_trainer.py --redis-password $redis_password --num-cpus $total_cpus
+
+The experiment is ready to begin, simply run: +
sbatch <your_slurm_file>
+
+If the trainer script is on a different directory, make sure to cd to this directory in the slurm script before executing it. +
### Example where the trainer is on scratch:
+cd /scratch/$USER/path_to_specific_directory
+python -u simple_trainer.py --redis-password $redis_password --num-cpus $total_cpus
+

+

Experimenting using GPUs#

+

It is now time to learn running experiments using GPU nodes on Eagle that can boost training times considerably. GPU nodes however is better to be utilized only in cases of environments with very large observation and/or action spaces. CartPole will be used again for establishing a template.

+

Allocate GPU node#

+

The following instructions are the same for both regular and Optimized TF versions of the Anaconda environments

+

Running experiments with combined CPU and GPU nodes is not so straightforward as running them using only CPU nodes (either single or multiple nodes). Particularly, heterogenous jobs using slurm have to be submitted.

+

Begin at first by specifying some basic options, similarly to previous section: +

#!/bin/bash  --login
+
+#SBATCH --account=A<account>
+#SBATCH --job-name=cartpole-gpus
+#SBATCH --time=00:10:00
+
+The slurm script will clearly define the various jobs. These jobs include the CPU nodes that will carry the environment rollouts, and the GPU node for policy learning. Eagle has 44 GPU nodes and each node has 2 GPUs. Either request one GPU per node (--gres=gpu:1), or both of them (--gres=gpu:2). For the purposes of this tutorial, one GPU core on a single node is utilized.

+

In total, slurm nodes can be categorized as:

+
    +
  • A head node, and multiple rollout nodes (as before)
  • +
  • A policy training node (GPU)
  • +
+

Include the hetjob header for both the rollout nodes and the policy training node. Three CPU nodes are requested to be used for rollouts and a single GPU node is requested for policy learning: +

# Ray head node
+#SBATCH --nodes=1
+#SBATCH --tasks-per-node=1
+
+# Rollout nodes - Nodes with multiple runs of OpenAI Gym 
+#SBATCH hetjob
+#SBATCH --nodes=3
+#SBATCH --tasks-per-node=1
+#SBATCH --cpus-per-task=36
+
+# Policy training node - This is the GPU node
+#SBATCH hetjob
+#SBATCH --nodes=1
+#SBATCH --tasks-per-node=1
+#SBATCH --partition=debug
+#SBATCH --gres=gpu:1
+
+Of course, any number of CPU/GPU nodes can be requested, depending on problem complexity.

+

As an example, a single node and perhaps just a single CPU core may be requested. Now, it is more reasonable to request GPUs for an OpenAI Gym environment that utilizes high-dimensional observation and/or action spaces. Hence, the first priority would be to start with multiple CPU nodes, and request GPUs only if they are needed.

+

For the three types of nodes (head, rollouts, training), define three separate groups: +

head_node=$(scontrol show hostnames $SLURM_JOB_NODELIST_HET_GROUP_0)
+rollout_nodes=$(scontrol show hostnames $SLURM_JOB_NODELIST_HET_GROUP_1)
+rollout_nodes_array=( $rollout_nodes )
+learner_node=$(scontrol show hostnames $SLURM_JOB_NODELIST_HET_GROUP_2)
+echo "head node    : "$head_node
+echo "rollout nodes: "$rollout_nodes
+echo "learner node : "$learner_node
+
+Each group of nodes requires its separate srun command so that they will run independently of each other. +
echo "starting head node at $head_node"
+srun --pack-group=0 --nodes=1 --ntasks=1 -w $head_node ray start --block --head \
+--node-ip-address="$ip_prefix" --port=$port --redis-password=$redis_password & # Starting the head
+sleep 10
+
+echo "starting rollout workers"
+for ((  i=0; i<$rollout_node_num; i++ ))
+do
+  rollout_node=${rollout_nodes_array[$i]}
+  echo "i=${i}, rollout_node=${rollout_node}"
+  srun --pack-group=1 --nodes=1 --ntasks=1 -w $rollout_node \
+   ray start --block --address "$ip_head" --redis-password=$redis_password & # Starting the workers
+  sleep 5
+done
+
+echo "starting learning on GPU"
+srun --pack-group=2 --nodes=1 --gres=gpu:1 -w $learner_node ray start --block --address "$ip_head" --redis-password=$redis_password &
+
+The slurm commands for the head and rollout nodes are identical to those from the previous section. A third command is also added for engaging the GPU node.

+

Finally, call +

python -u simple_trainer.py --redis-password $redis_password --num-cpus $rollout_num_cpus --num-gpus 1
+
+to begin training. Add the ---num-gpus argument to include the requested GPU node (or nodes in case of --gres=gpu:2) for policy training. There is no need to manually declare the GPU for policy training in the simple_trainer.py, RLlib will automatically recognize the available GPU and use it accordingly.

+

The repo contains the complete slurm file versions for both env_example_gpu and env_gpu_optimized_tf, and they can be used as templates for future projects.

+

Create Gym environments from scratch#

+

So far, only benchmark Gym environments were used in order to demonstrate the processes for running experiments. It is time now to see how one can create their own Gym environment, carefully tailor-made to one's needs. OpenAI Gym functionality allows the creation of custom-made environments using the same structure as the benchmark ones.

+

Custom-made environments can become extremely complex due to the mechanics involved and may require many subscripts that perform parts of the simulation. Nevertheless, the basis of all environments is simply a Python class that inherits the gym.Env class, where the user can implement the three main Gym functions and define any hyperpameters necessary:

+
    +
  • def __init__(self): Initializes the environment. It defines initial values for variables/hyperparameters and may contain other necessary information. It also defines the dimensionality of the problem. Dimensionality is expressed at the sizes of the observation and action spaces, which are given using the parameters self.observation_space and self.action_space, respectively. Depending on their nature, they can take discrete, continuous, or a combination of values. OpenAI provides detailed examples of each one of these types of spaces.
  • +
  • def reset(self): When called, it resets the environment on a previous state (hence the name). This state can either be a user-defined initial state or it may be a random initial position. The latter can be found on environments that describe locomotion like CartPole, where the initial state can be any possible position of the pole on the cart.
  • +
  • def step(self, action): The heart of the class. It defines the inner mechanics of the environment, hence it can be seen as some kind of simulator. Its main input is the sampled action, which when acted upon moves the environment into a new state and calculates the new reward. The new state and reward are two of the function's output and they are necessary for policy training since they are also inputs to the policy network. Other outputs include a boolean variable done that is True when the environment reaches its final state (if it exists), and False otherwise*, as well as a dictionary (info) with user-defined key-value objects that contain further information from the inner workings of the environment.
  • +
+

* Many environments do not consider a final state, since it might not make sense (e.g. a traffic simulator for fleets of autonomous ridesharing vehicles that reposition themselves based on a certain criterion. In this case the reward will get better every time, but there is no notion of a final vehicle position).

+

Directions of how to create and register a custom-made OpenAI Gym environment are given below.

+

Create an environment class#

+

As stated above, the basis of any Gym environment is a Python class that inherits the gym.Env class. After importing the gym package, define the class as: +

import gym
+
+class BasicEnv(gym.Env):(...)
+
+The example environment is very simple and is represented by two possible states (0, 1) and 5 possible actions (0-4). For the purposes of this tutorial, consider state 0 as the initial state, and state 1 as the final state.

+

Define the dimensions of observation and action spaces in the def __init__(self) function: +

def __init__(self):
+    self.action_space = gym.spaces.Discrete(5) # --> Actions take values in the 0-4 interval
+    self.observation_space = gym.spaces.Discrete(2) # --> Two possible states [0,1]
+
+Both spaces take discrete values, therefore they are defined using Gym's Discrete function. Other possible functions are Box for continuous single- or multi-dimensional observations and states, MultiDiscrete for vectors of discrete values, etc. OpenAi provides detailed explanation for all different space forms.

+

Next, define the def reset(self) function: +

def reset(self):
+    state = 0
+    return state
+
+In this example, the reset function simply returns the environment to the initial state.

+

Finally, define the def step(self, action) function, which takes as input the sampled action. Here the step function takes the environment at state 1 and based on the action, returns a reward of 1 or -1: +

def step(self, action):
+    state = 1
+
+    if action == 2:
+        reward = 1
+    else:
+        reward = -1
+
+    done = True
+    info = {}
+
+    return state, reward, done, info
+
+That's it, the new Gym environment is ready! Make note that there is one more function usually found on Gym environments. This is the def render(self) function, and is called in random intervals throughout training returning a "snapshot" of the environment at that time. While this is helpful for evaluating the agent training process, it is not necessary for the actual training process. OpenAI documentation provides details for every one of these functions.

+

You can find the full script of this environment in the repo.

+

Run experiments on RLlib#

+

Let's now train the agent with RLlib. The full trainer script is given at the repo.

+

The trainer is almost identical to the one used before, with few additions that are necessary to register the new environment.

+

At first, along with ray and tune, import: +

from ray.tune.registry import register_env
+from custom_env import BasicEnv
+
+The register_env function is used to register the new environment, which is imported from the custom_env.py.

+

Function register_env takes two arguments:

+
    +
  • Training name of the environment, chosen by the developer.
  • +
  • Actual name of the environment (BasicEnv) in a lambda config: function.
  • +
+

env_name = "custom-env"
+register_env(env_name, lambda config: BasicEnv())
+
+Once again, RLlib provides detailed explanation of how register_env works.

+

The tune.run function, instead of args.name_env, it uses the env_name defined above.

+

That's all! Proceed with agent training using any of the slurm scripts provided by the repo.

+

As a final note, creating custom-made OpenAI Gym environment is more like an art than science. The main issue is to really clarify what the environment represents and how it works, and then define this functionality in Python.

+

Validating results using Tensorboard#

+

Another way of visualizing the performance of agent training is with Tensorboard.

+

Navigate to the ray_results directory: +

cd ~/ray_results/
+
+Every RL experiment generates a subdirectory named from the OpenAI Gym environment used in the experiment.

+

E.g., after running all the examples previously shown in this tutorial, ray_results will have a subdirectory named CartPole-v0. Within, every experiment using CartPole generates a new subdirectory.

+

For the purpose of this tutorial, cd to the CartPole-v0 subdirectory and activate one of the environments: +

module purge
+conda activate <your_environment>
+
+Initialize Tensorboard following the steps in this tutorial. Open the localhost url in a browser, and all plots for rewards, iterations and other metrics will be demonstrated as:

+

+ + +

+ +

The tune/episode_reward_mean plot is essentialy the same as the figure plotted from data in the progress.csv file. The difference in the x-axis scale has a simple explanation. The episode_reward_mean column on the progress.csv file shows the reward progress on every training iteration, while the tune/episode_reward_mean plot on Tensorboard shows reward progress on every training episode (a single RLlib training iteration consists of thousands of episodes).

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Machine_Learning/TensorBoard/index.html b/Documentation/Machine_Learning/TensorBoard/index.html new file mode 100644 index 000000000..fae80d292 --- /dev/null +++ b/Documentation/Machine_Learning/TensorBoard/index.html @@ -0,0 +1,4907 @@ + + + + + + + + + + + + + + + + + + + + + + + TensorBoard - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Validating ML results using Tensorboard#

+

Tensorboard provides visualization and tooling needed for machine learning, deep learning, and reinforcement learning experimentation:

+
    +
  • Tracking and visualizing metrics such as loss and accuracy.
  • +
  • Visualizing the model graph (ops and layers).
  • +
  • Viewing histograms of weights, biases, or other tensors as they change over time.
  • +
  • Projecting embeddings to a lower dimensional space.
  • +
  • Displaying images, text, and audio data.
  • +
  • Profiling TensorFlow programs.
  • +
+

For RL it is useful to visualize metrics such as:

+
    +
  • Mean, min, and max reward values.
  • +
  • Episodes/iteration.
  • +
  • Estimated Q-values.
  • +
  • Algorithm-specific metrics (e.g. entropy for PPO).
  • +
+

To visualize results from Tensorboard, first cd to the directory where your results reside. E.g., if you ran experiments using ray, then do the following: +

cd ~/ray_results/
+

+

There are three main methods for activating Tensorboard:

+
    +
  • If you included Tensorboard installation in an Anaconda environment, simply activate it: +
    module purge
    +conda activate <your_environment>
    +
  • +
  • You can also install Tensorboard in userspace using pip install: +
    pip install tensorboard --user
    +
  • +
  • Or, install using container images: +
    ml singularity-container
    +singularity pull docker://tensorflow/tensorflow
    +singularity run tensorflow_latest.sif
    +
  • +
+

Then, initialize Tensorboard using a pre-specified port number of your choosing (e.g. 6006, 8008): +

tensorboard --logdir=. --port 6006 --bind_all
+
+If everything works properly, terminal will show: +
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
+TensorBoard 2.5.0 at http://localhost:6006/ (Press CTRL+C to quit)
+
+Open a new Terminal tab and create a tunnel: +
ssh -NfL 6006:localhost:6006 $USER@el1.hpc.nrel.gov
+
+Finally, open the above localhost url (http://localhost:6006/) in a browser, where all the aforementioned plots will be shown.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Machine_Learning/index.html b/Documentation/Machine_Learning/index.html new file mode 100644 index 000000000..37ef8752a --- /dev/null +++ b/Documentation/Machine_Learning/index.html @@ -0,0 +1,5192 @@ + + + + + + + + + + + + + + + + + + + + + + + Machine Learning - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Machine Learning#

+

Machine learning refers to a set of techniques and algorithms that enable computers to automatically learn from data and improve their performance on a specific task over time. Types of machine learning methods include, but are not limited to, supervised learning (algorithms trained on labeled datasets), unsupervised learning (algorithms trained on unlabeled datasets), and reinforcement learning (learning by trial and error). The Computational Science Center at NREL conducts research in these types of machine learning, and also supports the use of machine learning software on Kestrel.

+

Getting Started#

+ +

This section provides basic examples for getting started with two popular machine learning libraries: PyTorch and TensorFlow. Both examples use Anaconda environments, so if you are not familiar with their use please refer to the NREL HPC page on using Conda environments and also the Conda guide to managing environments.

+

Getting started with PyTorch#

+

To begin, we will outline basic steps for building a simple CPU-based conda environment for PyTorch. First, load the anaconda module and create a new conda environment: +

module load anaconda3
+
+conda create -p /projects/YOUR_PROJECT/YOUR_USER_NAME_HERE/FOLDER_FOR_CONDA_ENVIRONMENTS/pt python=3.9
+
+Answer yes to proceed, and you should end up with directions for starting your conda environment pt. Note that these instructions place your environment in the specified /projects folder. This is advisable, as opposed to installing conda environments in their default location in your home directory. See our Conda documentation for more information.

+

Activate the pt conda environment and install PyTorch into the active conda environment: +

conda activate /projects/YOUR_PROJECT/YOUR_USER_NAME_HERE/FOLDER_FOR_CONDA_ENVIRONMENTS/pt
+
+conda install pytorch torchvision torchaudio cpuonly -c pytorch
+
+Answer yes to proceed, and you should be up and running with PyTorch! The PyTorch webpage has great resources for getting started, including resources on learning the basics and PyTorch recipes.

+

Getting started with TensorFlow#

+

Getting started with TensorFlow is similar to the process for PyTorch. The first step is to construct an empty conda environment to work in: +

module load anaconda3
+
+conda create -p /projects/YOUR_PROJECT/YOUR_USER_NAME_HERE/FOLDER_FOR_CONDA_ENVIRONMENTS/tf python=3.9
+
+Subsequently, activate the tf conda environment, ensure you are running the latest version of pip in your environment, and install the CPU only version of TensorFlow using pip: +
conda activate /projects/YOUR_PROJECT/YOUR_USER_NAME_HERE/FOLDER_FOR_CONDA_ENVIRONMENTS/tf
+pip install --upgrade pip
+pip install tensorflow-cpu
+
+You should now be up and running with a TensorFlow! Similar to PyTorch, the TensorFlow webpage has lots of great resources for getting started, including turotials, basic examples, and more!

+

Example Job Script#

+
+PyTorch or TensorFlow shared partition CPU example +
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --mem=64G
+#SBATCH --cpus-per-task=26
+#SBATCH --partition=shared
+#SBATCH --account=<your account>
+#SBATCH --time=00:30:00
+
+module load conda
+conda activate /projects/<your_project>/<conda_envs_dir>/<pt_or_tf>
+
+srun python <your_code>.py
+
+
+
+

Note

+

This Getting Started section is only scratching the surface of ML libraries and resources that can be used on Kestrel. Tools such as LightGBM, XGBoost, and scikit-learn work well with conda environments, and other tools such as Flux for the Julia Language can be used on Kestrel as well.

+
+

Once you have completed your batch file, submit using +

sbatch <your_batch_file_name>.sb
+

+

Advanced (GPU)#

+

The above examples show how to build CPU-based conda environments. The following section covers how to build and run PyTorch and TensorFlow for use with GPUs on Kestrel.

+

To install either PyTorch or TensorFlow for use with GPUs on Kestrel, the first step is to load the anaconda module on the GPU node using module load conda. Once the anaconda module has been loaded, create a new environment in which to install PyTorch or TensorFlow, e.g.,

+
+Creating and activating a new conda environment +
conda create --prefix /projects/<your-project-name>/<your-user-name>/<conda-env-dir>/pt python=3.9
+conda activate /projects/<your-project-name>/<your-user-name>/<conda-env-dir>/<pt or tf>
+
+
+
+

Note

+

If you are not familiar with using Anaconda environments please refer to the NREL HPC page on using Conda environments and also the Conda guide to managing environments.

+
+

Installing TensorFlow on Kestrel#

+

Presented below are instructions for installing TensorFlow following in the pip install instructions found here: TensorFlow. For optimized TensorFlow performance, we recommend using a containerized version of TensorFlow.

+

Once the conda environment created above has been activated, you can install TensorFlow using the pip based approach described in TensorFlow, but with a couple modifications. Instead of using the cudatoolkit, we recommend using the nvhpc programming environment accessed using the module Prg-Env-nvhpc. Also, there is a module for cudnn. Using these two modules, we install TensorFlow with the following commands:

+
+Installing TensorFlow using pip +
module load PrgEnv-nvhpc
+module load cudnn
+python3 -m pip install tensorflow[and-cuda]
+
+
+

Installing PyTorch on Kestrel#

+

Once the environment has been activated, you can install PyTorch using the standard approach found under the Get Started tab of the PyTorch website, e.g., using pip,

+
+Installing PyTorch using pip +

pip3 install torch torchvision torchaudio

+
+

or using conda,

+
+Installing PyTorch using conda specifying CUDA 12.4 +

conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia

+
+
+

Note

+

We recommend installing software for GPU jobs using the GPU nodes. There are two GPU login nodes available on Kestrel.

+
+

Running a PyTorch or TensorFlow Batch Job on Kestrel - GPU#

+
+Sample job script: Kestrel - Shared (partial) GPU node +
#!/bin/bash
+#SBATCH --account=<your-account-name> 
+#SBATCH --nodes=1
+#SBATCH --gpus=1 
+#SBATCH --ntasks-per-node=1
+#SBATCH --mem=96G
+#SBATCH --cpus-per-task=32
+#SBATCH --time=00:30:00
+#SBATCH --job-name=<your-job-name>
+
+module load conda
+conda activate /projects/<your-project-name>/<your-user-name>/<conda-env-dir>/<pt or tf>
+
+srun python <your-pytorch or tensorflow-code>.py
+
+
+

TensorFlow Example#

+

Find below a simple neural network example using the MNIST data set for getting started using TensorFlow with Kestrel GPUs. This example was based on TensorFlow's quick start documentation found here.

+
+MNIST example +
import tensorflow as tf
+
+# Select a standard data set and normalize
+mnist = tf.keras.datasets.mnist    
+(x_train, y_train),(x_test, y_test) = mnist.load_data()
+x_train, x_test = x_train / 255.0, x_test / 255.0
+
+# Set up and compile a model 
+model = tf.keras.models.Sequential([
+    tf.keras.layers.Flatten(input_shape=(28, 28)),  tf.keras.layers.Dense(128, activation='relu’), 
+    tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation='softmax')]) 
+
+model.compile(optimizer='adam’, 
+loss='sparse_categorical_crossentropy’, metrics=['accuracy'])
+
+# Fit model to training data and evaluate on test data
+model.fit(x_train, y_train, epochs=5)
+
+model.evaluate(x_test, y_test)
+
+
+

PyTorch Example#

+

Below we present a simple convolutional neural network example for getting started using PyTorch with Kestrel GPUs. The original, more detailed version of this example can be found in the pytorch tutorials repo here.

+
+CIFAR10 example +
import torch
+import torchvision
+import torchvision.transforms as transforms
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+
+# Check if there are GPUs. If so, use the first one in the list
+device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+print(device)
+
+# Load data and normalize
+transform = transforms.Compose(
+    [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+batch_size = 4
+trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
+                                    download=True, transform=transform)
+trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
+                                      shuffle=True, num_workers=2)
+testset = torchvision.datasets.CIFAR10(root='./data', train=False,
+                                   download=True, transform=transform)
+testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
+                                     shuffle=False, num_workers=2)
+
+# Define the CNN
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = torch.flatten(x, 1) # flatten all dimensions except batch
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+net = Net()
+
+# send the network to the device
+# If you want to use data parallelism across multiple GPUs, uncomment if statement below
+#if torch.cuda.device_count() > 1:
+#    net = nn.DataParallel(net)
+
+net.to(device)
+
+# Define loss function and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
+
+# Train the network
+for epoch in range(2):  # loop over the dataset multiple times
+
+    running_loss = 0.0
+    for i, data in enumerate(trainloader, 0):
+        # get the inputs; data is a list of [inputs, labels]
+        # inputs, labels = data # setup without device
+        inputs, labels = data[0].to(device), data[1].to(device)
+
+        # zero the parameter gradients
+        optimizer.zero_grad()
+
+        # forward + backward + optimize
+        outputs = net(inputs)
+        loss = criterion(outputs, labels)
+        loss.backward()
+        optimizer.step()
+
+        # print statistics
+        running_loss += loss.item()
+        if i % 2000 == 1999:    # print every 2000 mini-batches
+            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
+            running_loss = 0.0
+
+print('Finished Training')
+
+
+
+

Note

+

Currently, this code will run on a single GPU, specifically the GPU denoted cuda:0. To use multiple GPUs via data parallelism, uncomment the two lines above the net.to(device) command. Furthermore, use of multiple GPUs require requesting multiple GPUs for the batch or interactive job.

+
+
+

Note

+

To better observe the multi-GPU peformance of the above example, you can change the size of the CNN. For example, by increasing the size of the second argument in the definition of self.conv1 and the first argument in self.conv2, you can increase the size of the network and use more resources for training.

+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Managing_Data/Transferring_Files/FileZilla/index.html b/Documentation/Managing_Data/Transferring_Files/FileZilla/index.html new file mode 100644 index 000000000..60f1be70e --- /dev/null +++ b/Documentation/Managing_Data/Transferring_Files/FileZilla/index.html @@ -0,0 +1,4972 @@ + + + + + + + + + + + + + + + + + + + + + + + FileZilla - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Transferring files using FileZilla#

+

FileZilla can be used to securely transfer files between your local computer running Windows, Linux or MacOS to a remote computer running Linux.

+

Setting Up FileZilla#

+ +

Connecting to a Host#

+
    +
  • Decide which host you wish to connect to such as, kestrel.hpc.nrel.gov
  • +
  • Enter your username in the Username field.
  • +
  • Enter your password or Password+OTP Token in the Password field.
  • +
  • Use 22 as the Port.
  • +
  • Click the 'Quickconnect' button.
  • +
+

Transferring Files#

+

You may use FileZilla to transfer individual files or directories from the Local Directory to the Remote Directory or vice versa.

+

Transfer files by dragging them from the Local Directory (left pane) to the Remote Directory (right pane) or vice versa. Once the transfer is complete the selected file will be visible in the pane it was transferred to.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Managing_Data/Transferring_Files/globus/index.html b/Documentation/Managing_Data/Transferring_Files/globus/index.html new file mode 100644 index 000000000..9ffdeae5a --- /dev/null +++ b/Documentation/Managing_Data/Transferring_Files/globus/index.html @@ -0,0 +1,5121 @@ + + + + + + + + + + + + + + + + + + + + + + + Transferring Files with Globus - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + +

Transferring Files with Globus#

+

For large data transfers between NREL’s high-performance computing (HPC) +systems and another data center, or even a laptop off-site, we recommend using +Globus.

+

A supporting set of instructions for requesting a Globus account and data +transfer using Globus is available on the HPC NREL +Website

+

What Is Globus?#

+

Globus provides services for research data management, including file transfer. +It enables you to quickly, securely and reliably move your data to and from +locations you have access to.

+

Globus transfers files using GridFTP. GridFTP is a high-performance data +transfer protocol which is optimized for high-bandwidth wide-area networks. It +provides more reliable high performance file transfer and synchronization than +scp or rsync. It automatically tunes parameters to maximize bandwidth while +providing automatic fault recovery and notification of completion or problems.

+

Get a Globus Account#

+

To get a Globus account, sign up on the Globus account website.

+

Globus NREL Endpoints#

+

The current NREL Globus Endpoints are:

+
    +
  • nrel#eglobus - This endpoint allows access to any files on Eagle's Lustre file system: /scratch and /projects.
  • +
  • nrel#kglobus_projects - This endpoint will give you access to any files you have on the Kestrel Project File System: /datasets, /projects, and /shared-projects.
  • +
  • nrel#kglobus_scratch - This endpoint will give you access to any files you have on the Kestrel Scratch File System: /scratch.
  • +
  • nrel#vast - This endpoint will give you access to any files you have on our VAST file system: /campaign, /datasets (Eagle), /MSS and /shared-projects (Eagle).
  • +
+

Globus Personal Endpoints#

+

You can set up a "Globus Connect Personal EndPoint", which turns your personal +computer into an endpoint, by downloading and installing the Globus Connect +Personal application on your system.

+

Set Up a Personal EndPoint#

+
    +
  • Download Globus Connect Personal
  • +
  • Once installed, you will be able to start the Globus Connect Personal + application locally, and login using your previously created Globus + account credentials.
  • +
  • Within the application, you will need to grant consent for Globus to access + and link your identity before creating a collection that will be visible from + the Globus Transfer website.
  • +
  • Additional tutorials and information on this process is located at the Globus + Website for both +Mac and +Windows.
  • +
+

Transferring Files#

+

You can transfer files with Globus through the Globus +Online website or via the CLI +(command line interface).

+
+

Important

+

It is strongly recommended to compress multiple files into a single archive (tar.gz, zip) before transferring data with Globus.

+

To compress a directory: +

tar -czvf filename.tar.gz /path/to/dir
+
+To extract an archive: +
tar -xzvf filename.tar.gz
+

+
+
+Globus Online +

Globus Online is a hosted service that allows you to use a browser to transfer +files between trusted sites called "endpoints". To use it, the Globus software +must be installed on the systems at both ends of the data transfer. The NREL +endpoint is nrel#eglobus.

+
    +
  1. Click Login on the Globus web site. On the login +page select "Globus ID" as the login method and click continue. Use the Globus +credentials you used to register your Globus.org account.
  2. +
  3. The ribbon on the left side of the screen acts as a Navigator, select File Manager +if not already selected. In addition, select the 'middle' option for Panels in the upper +right, which will display space for two Globus endpoints.
  4. +
  5. The collection tab will be searchable (e.g. nrel), or nrel#eglobus can be +entered in the left collection tab. In the box asking for authentication, enter +your NREL HPC username and password. Do not use your globus.org username +or password when authenticating with the nrel#eglobus endpoint.
  6. +
  7. Select another Globus endpoint, such as a personal endpoint or +an endpoint at another institution that you have access to. +To use your personal endpoint, first start the Globus Connect Personal application. +Then search for either the endpoint name or your username in the collections tab, +and select your endpoint. After the first use, you should see your endpoints in +the recent tab when searching. You may also setup an endpoint/directory as a bookmark.
  8. +
  9. To transfer files:
      +
    • select the files you want to transfer from one of the endpoints
    • +
    • select the destination location in the other endpoint (a folder or directory)
    • +
    • click the 'start' button on the source collection, and it will transfer files + to the target collection
    • +
    +
  10. +
  11. For additional information, the Globus Webpage has +tutorials and documentation under the Resources tab.
  12. +
+

When your transfer is complete, you will be notified by email.

+
+
+Globus CLI (command line interface) +

Globus supports a command line interface (CLI), which can be used for scripting +and automating some transfer tasks. For more information, +it is suggested that the user refer to the Globus CLI +documentation located on the Globus Webpage.

+

For installing globus-cli, the recommendation is to use a Conda environment. In this +case, it is advised to follow the instructions about mixing Conda and Pip, +and only use Pip after establishing a base environment using Conda. For more information about mixing Conda and Pip, refer to our internal documentation at: Conda

+
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Managing_Data/Transferring_Files/index.html b/Documentation/Managing_Data/Transferring_Files/index.html new file mode 100644 index 000000000..2673b0bc9 --- /dev/null +++ b/Documentation/Managing_Data/Transferring_Files/index.html @@ -0,0 +1,4995 @@ + + + + + + + + + + + + + + + + + + + + + + + File Transfers - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Transferring Files#

+

Learn how to transfer data within, to and from NREL's high-performance computing (HPC) systems.

+

For further information about invidiual systems' filesystem architecture and quotas, please see the Systems section.

+

Best Practices for Transferring Files#

+

File Transfers Between Filesystems on the NREL network#

+

rsync is the recommended tool for transferring data between NREL systems. It allows you to easily restart transfers if they fail, and also provides more consistency when dealing with symbolic links, hard links, and sparse files than either scp or cp. It is recommended you do not use compression for transfers within NREL systems. An example command is:

+
$ rsync -aP --no-g /scratch/username/dataset1/ /mss/users/username/dataset1/
+
+

Mass Storage has quotas that limit the number of individual files you can store. If you are copying hundreds of thousands of files then it is best to archive these files prior to copying to Mass Storage. See the guide on how to archive files.

+

Mass Storage quotas rely on the group of the file and not the directory path. It is best to use the --no-g option when rsyncing to MSS so you use the destination group rather than the group permissions of your source. You can also chgrp your files to the appropriate group prior to rsyncing to MSS.

+

Small Transfers (<100GB) outside of the NREL network#

+

rsync, scp, and curl will be your best option for small transfers (<100GB) outside of the NREL network. If your rsync/scp/curl transfers are taking hours to complete then you should consider using Globus.

+

If you're transferring many files then you should use rsync:

+
$ rsync -azP --no-g /mss/users/username/dataset1/ user@desthost:/home/username/dataset1/
+
+

If you're transferring an individual file then use scp:

+
$ scp /home/username/example.tar.gz user@desthost:/home/username/
+
+

You can use curl or wget to download individual files: +

$ curl -O https://URL
+$ wget https://URL
+

+

Large Transfers (>100GB) outside of the NREL network#

+

Globus is optimized for file transfers between data centers and anything outside of the NREL network. It will be several times faster than any other tools you will have available. Documentation about requesting a HPC Globus account is available on the Globus Services page on the HPC website. See Transfering files using Globus for instructions on transfering files with Globus.

+

Transfering files using Windows#

+

For Windows you will need to download WinSCP to transfer files to and from HPC systems over SCP. See Transfering using WinSCP.

+

Archiving files and directories#

+

Learn various techniques to combine and compress multiple files or directories into a single file to reduce storage footprint or simplify sharing.

+

tar#

+

tar, along with zip, is one of the basic commands to combine multiple individual files into a single file (called a "tarball"). tar requires at least one command line option. A typical usage would be: +

$ tar -cf newArchiveName.tar file1 file2 file3
+# or
+$ tar -cf newArchiveName.tar /path/to/folder/
+

+

The -c flag denotes creating an archive, and -f denotes that the next argument given will be the archive name—in this case it means the name you would prefer for the resulting archive file.

+

To extract files from a tar, it's recommended to use: +

$ tar -xvf existingArchiveName.tar
+
+-x is for extracting, -v uses verbose mode which will print the name of each file as it is extracted from the archive.

+

Compressing#

+

tar can also generate compressed tarballs which reduce the size of the resulting archive. This can be done with the -z flag (which just calls gzip on the resulting archive automatically, resulting in a .tar.gz extension) or -j (which uses bzip2, creating a .tar.bz2).

+

For example:

+
# gzip
+$ tar -czvf newArchive.tar.gz file1 file2 file3
+$ tar -xvzf newArchive.tar.gz
+
+# bzip2
+$ tar -czjf newArchive.tar.bz2 file1 file2 file3
+$ tar -xvjf newArchive.tar.bz2
+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Managing_Data/Transferring_Files/winscp/index.html b/Documentation/Managing_Data/Transferring_Files/winscp/index.html new file mode 100644 index 000000000..4ad3d5874 --- /dev/null +++ b/Documentation/Managing_Data/Transferring_Files/winscp/index.html @@ -0,0 +1,4970 @@ + + + + + + + + + + + + + + + + + + + + + + + WinSCP - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

WinSCP for Windows File Transfers#

+

WinSCP can be used to securely transfer files between your local computer running Microsoft Windows and a remote computer running Linux.

+

Setting Up WinSCP#

+

Download and install WinSCP.

+

You may follow the prompts to import your PuTTY sites to simplify host management.

+

Connecting to a Host#

+
    +
  • Set up a host (if needed) by selecting "New Site" and providing a host name (e.g., kestrel.nrel.gov) and your user name. In most cases, use the SFTP protocol.
  • +
  • Connect to the server by selecting a site and clicking [Login].
  • +
  • Enter your password or Password+Token when prompted.
  • +
+

Transferring Files#

+

You may use WinSCP to transfer individual files or to synchronize the Local Directory to the Remote Directory.

+

Transfer files by dragging them from the Local Directory (left pane) to the Remote Directory (right pane) or vice versa. Once the transfer is complete the selected file will be visible in the Remote Directory pane.

+

Synchronizing directories allows you to easily replicate changes affecting entire directory structures back and forth. To synchronize the Remote Directory and the Local Directory select Synchronize from the Commands menu. Select the Synchronize Files mode and click OK.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Managing_Data/file_permissions/index.html b/Documentation/Managing_Data/file_permissions/index.html new file mode 100644 index 000000000..27fb8e699 --- /dev/null +++ b/Documentation/Managing_Data/file_permissions/index.html @@ -0,0 +1,5277 @@ + + + + + + + + + + + + + + + + + + + + + + + Understanding File Permissions - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Linux File Permissions and Ownership#

+

Linux uses standard POSIX-style permissions to control who has the ability to read, write, or execute a file or a directory.

+

Permission Levels and Ownership#

+

Under this standard, all files and all directories have three types of permission that can be granted.

+

The three permission types are:

+
    +
  • r (Read): permission to read or copy the contents of the file, but not make changes
  • +
  • w (Write): permission to make changes, move, or delete a file
  • +
  • x (eXecute): permission to run a file that is an executable program, such as a compiled binary, shell script, python code, etc, OR to access a directory.
  • +
+

Files are also "owned" by both an individual user, and a user group. This ownership is used to provide varying levels of access to different +cohorts of users on the system.

+

The cohorts of users to which file permissions may be assigned include:

+
    +
  • u: permissions granted to the (u)ser who owns the file
  • +
  • g: permissions granted to the (g)roup of users who own the file
  • +
  • o: permissions granted to any (o)thers who are not the user or the group that own the file
  • +
+

On most systems, every user is a member of their own personal group that has the same name as their username, and only that user has access +to their own personal group. Whenever a new file is created, the default is that it will be created with the user and group ownership of the +user that created the file. However, this may not always be the case, and the permissions of the directory in which the file is created can +have an effect on the group ownership. This will be discussed in a later section.

+

Viewing File and Directory Permissions#

+

The ls -l command will show the ownership and permissions of a file, a list of files, or all files in a directory. Here's an example output with two files, two directories, and a symbolic link to another directory. The user in the example is "alice". +

[alice@el1 ~]$ ls -l 
+-rwxrwx---.  1 alice alice         49 Oct 13  2020 testjob.sh
+-rw-rw----.  1 alice alice         35 Mar  9 16:45 test.txt
+drwxrwx---.  3 alice alice       1536 Mar 31  2021 myscripts
+drwxrws---.  3 alice csc000         4096 Dec 14  2020 shared-data
+lrwxrwxrwx.  1 alice alice         16 Jan 30  2023 my_proj -> /projects/csc000
+

+

The first field of ls -l output for each file consists of ten characters. These represent the permission bits for the file.

+

The first bit is reserved to describe the type of file. The three most common file types are:

+
    +
  • - : a dash indicates a regular file (no special file type)
  • +
  • d : a d indicates that this is a directory (a type of "file" that stores a list of other files)
  • +
  • l : an l indicates a symbolic link to another file/directory
  • +
+

The next nine bits describe the file permissions that are set. These are always in the order of read, write, and execute.

+

A letter indicates that this permission is granted, a - indicates that the permission is not granted.

+

This "rwx" order repeats three times: the first triplet is for User permissions, the second triplet is for Group permissions, and the third triplet is for Other permissions.

+

In the example above, testjob.sh has the permissions -rwxrwx---. This means that the User and Group owners have read, write, and execute permission. The last three characters are -, which indicates that "Other" users do not have permissions to this file.

+

There also may be a dot (.) or other character at the end of the permissions list, depending on the variety of Linux that is installed. The dot indicates that no further access controls are in place. A + indicates that ACLs (Access Control Lists) are in place that provide additional permissions. ACLs are an extension of the file permission system that is present on some, but not all, NREL HPC systems, and may be used to provide more fine-grained access control on a per-user basis. If the system you are using supports ACLs, you may see man getfacl and man setfacl for more help on ACLs.

+

After the permissions flags is a number indicating the number of hard links to the file. It has no bearing on permissions and can be ignored.

+

The next two fields are the User and Group with access rights to the file. A file may only be owned by one User and one Group at a time.

+

Special Permissions Flags: Setuid, Setgid, and Sticky Bits#

+

An s in the e(x)ecute bit field has a special meaning, depending on whether it's in the User or Group permissions. A t in the "Others" +e(x)ecute also has a special meaning.

+

In the Group permission bits, an s for the eXecute bit indicates that SETGID is enabled. This can be set for an individual file or for a directory, but +is most common on a directory. When setgid is enabled on a directory, any files created in the directory will have a group ownership that corresponds to the +group ownership of the directory itself, instead of the default group of the user who created the file. This is very useful when an entire directory is +intended to be used for collaboration between members of a group, when combined with appropriate group read, write, and/or execute bits.

+

In the User permission bits, an s for the eXecute bit indicates that SETUID is enabled. This is only used for executable files, and means that +regardless of the user who runs the program, the owner of the process that starts up will be changed to the owner of the file. This is very +rarely used by regular users and can pose a considerable security risk, because a process that belongs to a user also has access to that user's +files as though it had been run by that user. Setuid should almost never be used.

+

In the Other permission bits, a t for the eXecute bit indicates that a "sticky bit" has been set. This is only used on directories. With the sticky bit +set, files in that directory may only be deleted by the owner of the file or the owner of the directory. This is commonly used for directories that +are globally writeable, such as /tmp or /tmp/scratch and will be set by a system administrator. It is otherwise rarely used by regular users.

+

Changing Permissions and Ownership#

+

Only the User that owns a file may change ownership or permissions.

+

The chgrp command is used to change the Group ownership of a file or directory.

+

The chmod command is used to change the permissions of a file or directory.

+

The chown command is used to change the User owner and/or Group owner of a file, but only system administrators may change the User owner, so this command will not be covered in this document. Please see man chown for more information.

+

The chgrp Command#

+

The chgrp command is used to change the group ownership of a file. You must be a member of the group the file currently belongs to, as well as a +member of the destination group.

+

chgrp -c group filename

+

The -c flag is recommended, as it explicitly shows any changes that are made to ownership.

+

Filename can be a file, a list of files, a wildcard (e.g. *.txt), or a directory.

+

Please see man chgrp for more detailed information on this command.

+

The chmod Command and Symbolic Permissions#

+

The chmod command is used to change the permissions (also called file mode bits) of a file or directory. Using an alphabetic shorthand ("symbolic mode"), permissions can be changed for a file or directory, in the general format:

+

chmod -c ugoa+-rwxst file

+

The cohort to which permissions should be applied is first: (u)ser, (g)roup, (o)ther, or (a)ll.

+

The + or - following the cohort denotes whether the permissions should be added or removed, respectively.

+

After the +/- is the list of permissions to change: (r)ead, (w)rite, e(x)ecute are the primary attributes. (s)etuid or (s)etgid depend on the cohort +chosen: u+s is for setuid, g+s is for setgid. The s(t)icky bit may also be set.

+

To add eXecute permission for the User owner of a file:

+

chmod u+x myscript.sh

+

To add group read, write, and execute, and REMOVE read, write, execute from others:

+

chmod g+rwx mydirectory

+

To remove write and execute from other users:

+

chmod o-wx myscript.sh

+

You can also combine arguments, for example:

+

chmod g+rwx,o-rwx myscript.sh

+

chmod ug+rwx,o+r,o-w myscript.sh

+

Please avoid setting global read, write, and execute permissions, as it is a security risk:

+

chmod a+rwx myscript.sh

+

Using Octal Permissions With chmod#

+

Chmod can also accept numeric arguments for permissions, instead of the symbolic permissions. This is called +"octal" mode, as it uses base 8 (numbers 0 through 7) for binary encoding. Symbolic permissions are now generally preferred for clarity, but octal +is sometimes used as a shorthand way of accomplishing the same thing.

+

In octal mode, a three or sometimes four digit number is used to represent the permission bits. The octal equivalent to "ug+rwx" is:

+

chmod 770 myscript.sh

+

The first position is User, the second is Group, and the last is Other.

+

The following table describes the value of the bit and the corresponding permission.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
bitpermission
0none
1execute
2write
4read
+

The permission is set by the sum of the bits, from 0 to 7, with 0 being "no permissions" and 7 being "read, write, and execute."

+

760 and 770 are the most common for data shared by a group of users. 700 is common for protected files that should only be viewed or edited by the User who owns the file.

+

Occasionally there may be a fourth leading digit. This is used for setuid, setgid, or a sticky bit setting.

+

Caution with Mode 777#

+

The command chmod 777 is the equivalent of chmod a+rwx, which grants read, write, and execute permission to ALL users on the system for the file(s) specified. Use of this command should be EXTREMELY rare, and any suggestions that it be applied should be examined closely, as it poses a major security risk to your files and data. Use your best judgement.

+

Further Reading About File Permissions#

+

All of the command listed have manual pages available at the command line. See man <command> for more information, or man man for help with the manual page system itself.

+

Further documentation regarding file permissions and other Linux fundamentals is widely available online in text or video format, and many paper books are available.

+

We do not endorse any particular source, site, or vendor. The following links may be helpful:

+
    +
  • https://www.redhat.com/sysadmin/linux-file-permissions-explained
  • +
  • https://www.linuxfoundation.org/blog/blog/classic-sysadmin-understanding-linux-file-permissions
  • +
  • https://docs.nersc.gov/filesystems/unix-file-permissions/
  • +
  • https://en.wikipedia.org/wiki/File-system_permissions
  • +
  • https://www.linux.com/training-tutorials/file-types-linuxunix-explained-detail/
  • +
  • https://en.wikipedia.org/wiki/Unix_file_types
  • +
+

Default Permissions on NREL Systems#

+

When first created, all /projects directories will be owned by the allocation's HPC Lead User and the project's shared Group. The default permissions will typically be ug+rwx (chmod 770) or ug+rwx,o+rx (chmod 776), depending on the system. The setgid bit will also be set on the directory, so that all files created in the /projects directory will have a Group ownership of the project's group.

+

NREL Technical Help with File Permissions#

+

The NREL HPC Support Team relies on allocation owners and users to be responsible for file permissions and ownership as a part of managing the allocation and its data, but the PI or HPC Leads of a project may request assistance in changing permissions or ownership of files that belong to the allocation by opening a support ticket with hpc-help@nrel.gov.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Managing_Data/localstorage/index.html b/Documentation/Managing_Data/localstorage/index.html new file mode 100644 index 000000000..e7058c80c --- /dev/null +++ b/Documentation/Managing_Data/localstorage/index.html @@ -0,0 +1,4921 @@ + + + + + + + + + + + + + + + + + + + + + + + Node Local Storage - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Local and Scratch Storage on NREL HPC Systems#

+

The table below summarizes the local and scratch storage currently on NREL HPC systems.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
System NameNode Local Storage$TMPDIR DefaultDefault $TMPDIR Storage TypeGlobal Scratch Storage
Kestrel1.7TB on 256 of the standard compute nodes, 5.6TB on bigmem nodes, 3.4TB GPU nodes. Other nodes have none./tmp/scratch/$SLURM_JOBIDLocal disk when available, or RAM/scratch/$USER (Lustre)
Swift1.8TB/scratch/$USER/$SLURM_JOBIDLocal diskNone
Vermilion60GB (t), 250GB (sm), 500GB (std), 1.0TB (lg), 2.0TB (gpu)/tmpRAM. Write to /tmp/scratch instead to use local disk./scratch/$USER
+

Important Notes

+
    +
  • Local storage is local to a node and usually faster to access by the processes running on the node. Some scenarios in which using the local disk might make your job run faster are:
      +
    • Your job may access or create many small (temporary) files
    • +
    • Your job may have many parallel tasks accessing the same file
    • +
    • Your job may do many random reads/writes or memory mapping.
    • +
    +
  • +
  • Local or scratch spaces are for temporary files only and there is no expectation of data longevity in these spaces. HPC users should copy results from those spaces to a /projects or global scratch directory as part of the job script before the job finishes.
  • +
  • A node will not have read or write access to any other node's local scratch, only its own
  • +
  • On Kestrel, the path /tmp/scratch is not writeable. Use $TMPDIR instead.
  • +
  • On Kestrel, only 256 of the standard compute nodes have real local disk, the other standard compute nodes have no local disk space. For the nodes without local storage, writing to $TMPDIR uses RAM. This could cause an out-of-memory error if using a lot of space in $TMPDIR. To solve this problem:
      +
    • Use /scratch/$USER instead of the default $TMPDIR path if the job benefits little from local storage (e.g. jobs with low I/O communication)
    • +
    • Request nodes with local storage by using the --tmp option in your job submission script. (e.g. --tmp=1600000). Then, $TMPDIR will be using a local disk.
    • +
    • In addition, on Kestrel, this bash command can be used to check if there is a local disk on the node: "if [ -e /dev/nvme0n1 ]". This will only work on standard compute nodes. For example:
    • +
    +
  • +
+

if [ -e /dev/nvme0n1 ]; then
+ echo "This node has a local storage and will use as the scratch path"
+ APP_SCRATCH=$TMPDIR
+else
+ echo "This node does not have a local storage drive and will use /scratch as the scratch path"
+ APP_SCRATCH=/scratch/$USER/$SLURM_JOB_ID
+fi
+
+This does not work on bigmem nodes. All bigmem nodes have a real local disk.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Managing_Data/mss/index.html b/Documentation/Managing_Data/mss/index.html new file mode 100644 index 000000000..96cc636ab --- /dev/null +++ b/Documentation/Managing_Data/mss/index.html @@ -0,0 +1,5013 @@ + + + + + + + + + + + + + + + + + + + + + + + Mass Storage System - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Mass Storage Sytem (MSS)#

+

NREL’s Amazon Web Services (AWS) Mass Storage System (MSS) +is an additional data archival resource available to active projects and users +on the Kestrel high-performance computing (HPC) system.

+

The AWS MSS keeps and protects important data, primarily as an addition to +Kestrel's high-performance Lustre filesystem (/projects and /scratch).

+

NREL implemented the AWS MSS to take advantage of S3 Deep Glacier archiving, +replacing the previous on-premises MSS, Gyrfalcon, which reached end-of-life at +the end of 2020.

+

How To Copy/Move Data from Kestrel#

+

AWS charges per inode. Therefore, to keep costs down it is recommended +users create a compressed tarball of any files and/or directories desired +to be archived to AWS MSS. The size limit per archived file is 5TB, and therefore +individual tarballs need to be under this limit (although multiple tarballs that sum to greater than 5 TB can be archived).

+

The recommended command is:

+

$ tar czvf /destination/descriptor-YYYMMDD.tgz <source-files-directories\>

+

Example, from Kestrel’s /projects/csc000/data-to-be-copied from a Kestrel Login +node:

+
$ cd /projects/csc000
+$ tar czvf /kfs2/shared-projects/MSS/projects/csc000/data-to-be-copied-20211215.tgz data-to-be-copied
+
+

Data placed in /kfs2/shared-projects/MSS/projects/<project_handle> and +/kfs2/shared-projects/MSS/home/<username> is synced to AWS MSS and then purged from Kestrel.

+

How To Restore Data#

+
    +
  • Restore requests of AWS MSS data will require a request to +the HPC Help Desk and may require 48 hours or more to be able to stage from +Deep Archive to recover.
  • +
  • +

    Users can see a list of the archived files they have on AWS MSS by searching the following file: /kfs2/shared-projects/MSS/MSS-archived-files

    +
      +
    • The MSS-archived-files has limited information, but all archives + related to a project can be found using a command such as: + $ grep <project name> /kfs2/shared-projects/MSS/MSS-archived-files
    • +
    +
  • +
  • +

    Let the HPC Help Desk know specifically what file(s) you would like to recover, and where the +recovered files should be placed.

    +
  • +
+

Usage Policies#

+

Follow the AWS MSS policies.

+

Contact#

+

Contact the HPC Help Desk if you have any questions or issues.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Slurm/batch_jobs/index.html b/Documentation/Slurm/batch_jobs/index.html new file mode 100644 index 000000000..6ddc09b37 --- /dev/null +++ b/Documentation/Slurm/batch_jobs/index.html @@ -0,0 +1,5389 @@ + + + + + + + + + + + + + + + + + + + + + + + Running Batch Jobs - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Running Batch Jobs#

+

Job Scheduling and Management#

+

Batch jobs are run by submitting a job script to the scheduler with the sbatch command. The job script contains the commands needed to set up your environment and run your application. (This is an "unattended" run, with results written to a file for later access.)

+

Once submitted, the scheduler will insert your job script into the queue to be run at some point in the future, based on priority and how many jobs are in the queue currently.

+

Priority factors vary on a cluster-by-cluster basis, but typically include a "fairshare" value based on the resources assigned to the allocation, as well as weighting by the job's age, partition, resources (e.g. node count) and/or Quality of Service (qos) factor. Please see the Monitoring and Control commands page for more information on checking your job's priority. The Systems documentation for each cluster will also have more information about the priority weighting, QOS factors, and any associated AU upcharges.

+

To submit batch jobs on an HPC system at NREL, the Slurm sbatch command should be used:

+

$ sbatch --account=<project-handle> <batch_script>

+

Sbatch scripts may be stored on or run from any file system (/home or /projects, for example), as they are typically fairly lightweight shell scripts. However, on most HPC systems it's generally a good idea to have your executables, conda environments, other software that your sbatch script executes stored in a /projects directory. Your input and output files should typically be read from and/or written to either /projects or /scratch directories, as well. Please see the appropriate Systems page for more information specific to the filesystems on the NREL-hosted cluster you're working on to maximize I/O performance.

+

Arguments to sbatch may be used to specify resource limits such as job duration (referred to as "walltime"), number of nodes, etc., as well as what hardware features you want your job to run with. These can also be supplied within the script itself by placing #SBATCH comment directives within the file.

+

Required Flags#

+

Resources for your job are requested from the scheduler either through command line flags to sbatch, or directly inside your script with an #SBATCH directive. All jobs require the following two flags to specify an allocation ("account") to charge the compute time to, and a maximum duration:

+ + + + + + + + + + + + + + + + + + + + + + + +
ParameterFlagExampleExplanation
Project handle--account, -A--account=<handle> or -A <handle>Project handles are provided by HPC Operations at the beginning of an allocation cycle.
Maximum Job Duration (walltime)--time, -t--time=1-12:05:50
(1 day, 12 hours, 5 minutes, and 50 seconds)
or
-t5 (5 minutes)
Recognized Time Formats:
<days>-<hours>
<days>-<hours>:<min>
<days>-<hours>:<min>:<sec>
<hours>:<min>:<sec>
<min>:<sec>
<min>
+

Resource Request Descriptions#

+

Specific resources may be requested from the scheduler to help the scheduler assign appropriate number and type of node or nodes to your job:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterFlagExampleExplanation
Nodes, Tasks, MPI Ranks--nodes or -N
--ntasks or -n
--ntasks-per-node
--nodes=20
--ntasks=40
--ntasks-per-node=20
if ntasks is specified, it is important to indicate the number of nodes request as well. This helps with scheduling jobs on the fewest possible Ecells (racks) required for the job.

The maximum number of tasks that can be assigned per node is equal to the CPU (core) count of the node.
Memory--mem
--mem-per-cpu
--mem=50000Memory per node
memory per task/MPI rank
Local disk (/tmp/scratch)--tmp--tmp=10TB
--tmp=100GB
--tmp=1000000
Request /tmp/scratch space in megabytes (default), GB, or TB.
GPUs--gpus--gpus=2Requests 2 GPUs. See system information for total number of GPUs.
+

Job Management and Output#

+

Command and control and monitoring customization are also available:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterFlagExampleExplanation
High priority--qos--qos=highHigh-priority jobs will take precedence in the queue. Note: There is an AU penalty of 2X for high-priority jobs.
Dependencies--dependency--dependency=<condition>:<job_id>

Conditions:

after
afterany
afternotok
afterok
singleton
You can submit jobs that will wait until a condition is met before running.


Conditions:

After the listed jobs have started
After the listed jobs have finished
After the listed jobs have failed
After the listed jobs return exit code 0
After all existing jobs with the same name and user have ended
Job Name--job-name--job-name=myjobA short, descriptive job name for easier identification in the queue.
Email notifications--mail-user--mail-user=my.email@nrel.gov
--mail=type=ALL
Slurm will send updates on job status change. Type can be specified with --mail-type as BEGIN, END, FAIL, or ALL.
Output--output

--error
--output=job_stdout

--output=job_stderr
Defaults to slurm-<jobid>.out

Defaults to slurm-<jobid>.out (same file as stdout)

stdout and stderr will be written to the same file unless specified otherwise
+

Commonly Used Slurm Environment Variables#

+

You may use these environment variables in your sbatch scripts to help control or monitor various aspects of your job directly within the script, as well:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterSemantic ValueSample Value
$LOCAL_SCRATCHAbsolute directory path for local-only disk space per node. This should always be /tmp/scratch for compute nodes with local disk./tmp/scratch
$TMPDIRPath for temporary directory for scratch space. Uses local storage on compute nodes with local disk, and RAM on those without./tmp/scratch/<JOBID> (default value on Kestrel)
$SLURM_CLUSTER_NAMEThe cluster name as per the master configuration in Slurm. Identical to $NREL_CLUSTER.kestrel, swift
$SLURM_CPUS_ON_NODEQuantity of CPUs per compute node.104
$SLURMD_NODENAMESlurm name of the node on which the variable is evaluated. Matches hostname.r4i2n3
$SLURMD_JOB_ACCOUNTThe Slurm account used to submit the job. Matches the project handle.csc000
$SLURM_JOB_CPUS_PER_NODEContains value of --cpus-per-node, if specified. Should be equal or less than $SLURM_CPUS_ON_NODE.104
$SLURM_JOBID or $SLURM_JOB_IDJob ID assigned to the job.521837
$SLURM_JOB_NAMEThe assigned name of the job, or the command run if no name was assigned.bash
$SLURM_JOB_NODELIST or $SLURM_NODELISTHostnames of all nodes assigned to the job, in Slurm syntax.r4i2n[1,3-6]
$SLURM_JOB_NUM_NODES or $SLURM_NNODESQuantity of nodes assigned to the job.5
$SLURM_JOB_PARTITIONThe scheduler partition the job is assigned to.short
$SLURM_JOB_QOSThe Quality of Service the job is assigned to.high
$SLURM_NODEIDA unique index value for each node of the job, ranging from 0 to $SLURM_NNODES.0
$SLURM_STEP_ID or $SLURM_STEPIDWithin a job, sequential srun commands are called "steps". Each srun increments this variable, giving each step a unique index nmber. This may be helpful for debugging, when seeking which step a job fails at.0
$SLURM_STEP_NODELISTWithin a job, srun calls can contain differing specifications of how many nodes should be used for the step. If your job requests 5 total nodes and you used srun --nodes=3, this variable would contain the list of the 3 nodes that participated in this job step.r4i2n[2-4]
$SLURM_STEP_NUM_NODESReturns the quantity of nodes requested for the job step (see entry on $SLURM_STEP_NODELIST.)3
$SLURM_STEP_NUM_TASKSReturns the quantity of tasks requested to be executed in the job step. Defaults to the task quantity of the job request.1
$SLURM_STEP_TASKS_PER_NODEContains the value specified by --tasks-per-node in the job step. Defaults to the tasks-per-node of the job request.1
$SLURM_SUBMIT_DIRContains the absolute path of the directory the job was submitted from./projects/csc000
$SLURM_SUBMIT_HOSTThe hostname of the system from which the job was submitted. Should always be a login node.el1
$SLURM_TASKS_PER_NODEContained the value specified by --tasks-per-node in the job request.1
+

Example SBATCH Script Walkthrough#

+

Many examples of sbatch scripts are available in the HPC Repository Slurm Directory on Github.

+

Here's a basic template job script to get started, followed by a breakdown of the individual components of the script. This script may be adapted to any HPC system with minor modifications. Copy it into a file on the cluster, make any necessary changes, and save it as a file, e.g. "myjob.sh".

+
#!/bin/bash
+#SBATCH --account=<allocation>
+#SBATCH --time=4:00:00
+#SBATCH --job-name=job
+#SBATCH --mail-user=your.email@nrel.gov
+#SBATCH --mail-type=BEGIN,END,FAIL
+#SBATCH --output=job_output_filename.%j.out  # %j will be replaced with the job ID
+
+module load myprogram
+myprogram.sh
+
+

Script Details#

+

Here is a section-by-section breakdown of the sample sbatch script, to help you begin writing your own.

+

Script Begin#

+

#!/bin/bash

+

This denotes the start of the script, and that it is written in BASH shell language, the most common Linux environment.

+

SBATCH Directives#

+

#SBATCH --account=<allocation>
+#SBATCH --time=4:00:00
+#SBATCH --job-name=job
+#SBATCH --mail-user=your.email@nrel.gov
+#SBATCH --mail-type=BEGIN,END,FAIL
+#SBATCH --output=job_output_filename.%j.out  # %j will be replaced with the job ID
+
+Generalized form:

+

#SBATCH --<command>=<value>

+

Command flags to the sbatch program are given via #SBATCH directives in the sbatch script. There are many flags available that can affect your job, listed in the previous section. Please see the official Slurm documentation on sbatch for a complete list, or view the man page on a login node with man sbatch.

+

Sbatch directives must be at the beginning of your sbatch script. Once a line with any other non-directive content is detected, Slurm will no longer parse further directives.

+

Note that sbatch flags do not need to be issued via directives inside the script. They can also be issued via the commandline when submitting the job. Flags issued via commandline will supercede directives issued inside the script. For example:

+

sbatch --account=csc000 --time=60 --partition=debug mytestjob.sh

+

Job Instructions#

+

After the sbatch directive block, you may then begin executing your job. The syntax is normal BASH shell scripting. You may load system modules for software, load virtual environments, define environment variables, and execute your software to perform work.

+

In the simplest form, your sbatch script should load your software module(s) required, and then execute your program.

+

module load myprogram
+srun myprogram.sh
+
+or

+
module load myprogram
+myprogram.sh
+
+

You may also use more advanced bash scripting as a part of your sbatch script, e.g. to set up environments, manage your input and output files, and so on.

+

More system-specific information about Slurm partitions, node counts, memory limits, and other details can be found under the appropriate Systems page.

+

You may also visit the "master" main branch of the Github repository for downloadable examples, or to contribute your own.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Slurm/index.html b/Documentation/Slurm/index.html new file mode 100644 index 000000000..11aeb08e8 --- /dev/null +++ b/Documentation/Slurm/index.html @@ -0,0 +1,4888 @@ + + + + + + + + + + + + + + + + + + + + + + + Slurm Job Scheduler - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + +

Slurm Job Scheduler

+ +

Schedule Your Computational Work with Slurm#

+

Slurm is the job scheduler and workload manager used by the HPC clusters hosted at NREL.

+

A job contains a list of required consumable resources (such as nodes), a list of job constraints (when, where and how the job should run), and an execution environment, which includes things like an executable, input and output files. All computational work on an HPC cluster should generally be contained in a job.

+

There are two key types of jobs:

+
    +
  • +

    Batch jobs are unattended scripts that launch programs to complete computational work. Batch jobs are placed in a queue and launched at a future time and date, determined by the priority of the job. Batch jobs are submitted to the queue using the sbatch command.

    +
  • +
  • +

    Interactive jobs provide a shell prompt on a compute node and allow for software to be run that requires keyboard input from the user. The salloc and srun commands can be used to start an interactive job.

    +
  • +
+

Most computational work is typically submitted as a batch script and queued for later automatic execution. Results from standard output and/or standard error will be stored in a file or files by Slurm (this behavior is customizable in your sbatch script.) Your software may or may not also produce its own output files.

+

Please see the navigation bar on the left under the Slurm Job Scheduling section for more information about how to submit a job.

+ + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Slurm/interactive_jobs/index.html b/Documentation/Slurm/interactive_jobs/index.html new file mode 100644 index 000000000..edf7a051c --- /dev/null +++ b/Documentation/Slurm/interactive_jobs/index.html @@ -0,0 +1,5044 @@ + + + + + + + + + + + + + + + + + + + + + + + Running Interactive Jobs - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Running Interactive Jobs#

+

Interactive jobs provide a shell prompt on a compute node. This allows users to execute commands and scripts "live" as they would on the login nodes, with direct user input and output immediately available.

+

Login nodes are primarily intended to be used for logging in, editing scripts, and submitting batch jobs. Interactive work that involves substantial resources—either memory, CPU cycles, or file system I/O—should be performed on the compute nodes rather than on login nodes.

+

Interactive jobs may be submitted to any partition and are subject to the same time and node limits as non-interactive jobs.

+

Requesting Interactive Access#

+

The salloc command is used to start an interactive session on one or more compute nodes. When resources become available, interactive access is provided by a shell prompt. The user may then work interactively on the node for the time specified.

+

The job is held until the scheduler can allocate a node to you. You will see a series of messages such as:

+
$ salloc --time=30 --account=<handle> --nodes=2
+salloc: Pending job allocation 512998
+salloc: job 512998 queued and waiting for resources
+salloc: job 512998 has been allocated resources
+salloc: Granted job allocation 512998
+salloc: Waiting for resource configuration
+salloc: Nodes r2i2n5,r2i2n6 are ready for job
+[hpc_user@r2i2n5 ~]$ 
+
+

You can view the nodes that are assigned to your interactive jobs using one of these methods:

+
$ echo $SLURM_NODELIST
+r2i2n[5-6]
+$ scontrol show hostname
+r2i2n5
+r2i2n6
+
+

Once a job is allocated, you will automatically "ssh" to the first allocated node so you do not need to manually ssh to the node after it is assigned. If you requested more than one node, you may ssh to any of the additional nodes assigned to your job.

+

You may load modules, run applications, start GUIs, etc., and the commands will execute on that node instead of on the login node.

+
+

Note

+

When requesting multiple nodes, please use number of nodes --nodes (or -N) instead of number of tasks --ntasks (or -n) to reduce the total number of network "hops" between the allocated nodes.

+
+

Type exit when finished using the node.

+

Interactive jobs are useful for many tasks. For example, to debug a job script, users may submit a request to get a set of nodes for interactive use. When the job starts, the user "lands" on a compute node, with a shell prompt. Users may then run the script to be debugged many times without having to wait in the queue multiple times.

+

A debug job allows up to two nodes to be available with shorter wait times when the system is heavily utilized. This is accomplished by limiting the number of nodes to 2 per job allocation and specifying --partition=debug. For example:

+
[hpc_user@el1 ~]$ salloc --time=60 --accounft=<handle> --nodes=2 --partition=debug
+
+

A debug node will only be available for a maximum wall time of 1 hour.

+

Sample Interactive Job Commands#

+

The following command requests interactive access to one node with at least 150 GB RAM for 20 minutes:

+
$ salloc --time=20 --account=<handle> --nodes=1 --mem=150G
+
+

For an interactive job that will require multiple nodes, for example, running interactive software that uses MPI, launch with an salloc first:

+
$ salloc --time=20 --account=<handle> --nodes=2
+
+

The above salloc command will log you into one of the two nodes automatically. You can then launch your software using an srun command with the appropriate flags, such as --ntasks or --ntasks-per-node:

+
[hpc_user@r2i2n5 ~]$ module purge; module load paraview
+[hpc_user@r2i2n5 ~]$ srun --ntasks=20 --ntasks-per-node=10 pvserver --force-offscreen-rendering
+
+

If your single-node job needs a GUI that uses X-windows:

+
$ ssh -Y kestrel.hpc.nrel.gov
+...
+$ salloc --time=20 --account=<handle> --nodes=1 --x11
+
+

If your multi-node job needs a GUI that uses X-windows, the least fragile mechanism is to acquire nodes as above, then in a separate session set up X11 forwarding:

+
$ salloc --time=20 --account=<handle> --nodes=2
+...
+[hpc_user@r3i5n13 ~]$ (your compute node r3i5n13)
+
+

Then from your local workstation:

+
$ ssh -Y kestrel.hpc.nrel.gov
+...
+[hpc_user@el1 ~]$ ssh -Y r3i5n13  #(from login node to reserved compute node)
+...
+[hpc_user@r3i5n13 ~]$  #(your compute node r3i5n13, now X11-capable)
+[hpc_user@r3i5n13 ~]$ xterm  #(or another X11 GUI application)
+
+

Requesting Interactive GPU Nodes#

+

The following command requests interactive access to GPU nodes:

+
[hpc_user@el2 ~] $ salloc --account=<handle> --time=5 --gres=gpu:2 
+
+

This next srun command inside the interactive session gives you access to the GPU devices:

+
[hpc_user@r104u33 ~] $ srun --gres=gpu:2 nvidia-smi
+Mon Oct 21 09:03:29 2019
++-------------------------------------------------------------------+
+| NVIDIA-SMI 410.72 Driver Version: 410.72 CUDA Version: 10.0 |
+|---------------------+----------------------+----------------------+
+| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
+| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
+|=====================+======================+======================|
+| 0 Tesla H100-PCIE... Off | 00000000:37:00.0 Off | 0 |
+| N/A 41C P0 38W / 250W | 0MiB / 16130MiB | 0% Default |
++---------------------+----------------------+----------------------+
+| 1 Tesla H100-PCIE... Off | 00000000:86:00.0 Off | 0 |
+| N/A 40C P0 36W / 250W | 0MiB / 16130MiB | 0% Default |
++---------------------+----------------------+----------------------+
+
++-------------------------------------------------------------------+
+| Processes: GPU Memory |
+| GPU PID Type Process name Usage |
+|===================================================================|
+| No running processes found |
++-------------------------------------------------------------------+
+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Slurm/job_arrays/index.html b/Documentation/Slurm/job_arrays/index.html new file mode 100644 index 000000000..b00f9ee06 --- /dev/null +++ b/Documentation/Slurm/job_arrays/index.html @@ -0,0 +1,4999 @@ + + + + + + + + + + + + + + + + + + + + + + + Job Arrays - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Job Arrays#

+

Job arrays are typically used when a user wants to submit many similar jobs with different inputs. Job arrays are capable of submitting hundreds, and even thousands, of similar jobs together. Here, we will describe how to submit job arrays on Slurm. More details on job arrays can be found in the Slurm documentation.

+

An example of a job array submission script can be found in our NREL HPC Slurm Examples directory. The job array example is titled uselist.sh, and requires doarray.py and invertc.c from the source folder.

+

SBATCH Directives for Job Arrays#

+

In order to submit a job array to Slurm, the SBATCH directives at the top of your script or sbatch command line submission must contain the flag --array=<ARRAY_VALS>, where ARRAY_VALS is a list or range of numbers that will represent the index values of your job array. For example:

+
# SBATCH --array=0-12  # Submits a job array with index values between 0 and 12
+...
+
+# SBATCH --array=2,4,6,10  # Submits a job array with index values 2, 4, 6, and 10
+...
+
+# SBATCH --array=1-43:2  # Submits a job array with index values between 1 and 43 with a step size of 2
+...
+
+# SBATCH --array=1-25%5  # Submits a job array with index values between 1 and 25 and limits the number of simultaneously running tasks to 5
+
+
+

Submitting Job Arrays on Kestrel

+

To ensure that your job array is running optimally, it is recommended that job arrays are submitted on the shared partition using --partition=shared. See more about shared partitions on Kestrel here.

+
+

Job Control#

+

Like standard slurm jobs, job arrays have a JOB_ID, which is stored in the environment variable SLURM_ARRAY_JOB_ID. The environment variable SLURM_ARRAY_TASK_ID will hold information about the index of the job array.

+

For example, if there is a job array in the queue, the output may look like this:

+
$ squeue
+ JOBID   PARTITION     NAME     USER  ST  TIME NODES NODELIST
+ 45678_1  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_2  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_3  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_4  standard    array     user  R  0:13  1     x1007c0s0b0n1
+
+

Here, SLURM_ARRAY_JOB_ID is 45678. The number followed by the underscore in row is the SLURM_ARRAY_TASK_ID. This job is a job array that was submitted with --array=1-4.

+

Scontrol commands can be executed on entire job arrays or specific indices of a job array. +

$ scontrol suspend 45678 
+$ squeue
+ JOBID   PARTITION     NAME     USER  ST  TIME NODES NODELIST
+ 45678_1  standard    array     user  S  0:13  1     x1007c0s0b0n1
+ 45678_2  standard    array     user  S  0:13  1     x1007c0s0b0n1
+ 45678_3  standard    array     user  S  0:13  1     x1007c0s0b0n1
+ 45678_4  standard    array     user  S  0:13  1     x1007c0s0b0n1
+
+$ scontrol resume 45678
+$ squeue
+ JOBID   PARTITION     NAME     USER  ST  TIME NODES NODELIST
+ 45678_1  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_2  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_3  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_4  standard    array     user  R  0:13  1     x1007c0s0b0n1
+
+
$ scontrol suspend 45678_2 
+$ squeue
+ JOBID   PARTITION     NAME     USER  ST  TIME NODES NODELIST
+ 45678_1  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_2  standard    array     user  S  0:13  1     x1007c0s0b0n1
+ 45678_3  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_4  standard    array     user  R  0:13  1     x1007c0s0b0n1
+
+$ scontrol resume 45678_2
+$ squeue
+ JOBID   PARTITION     NAME     USER  ST  TIME NODES NODELIST
+ 45678_1  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_2  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_3  standard    array     user  R  0:13  1     x1007c0s0b0n1
+ 45678_4  standard    array     user  R  0:13  1     x1007c0s0b0n1
+

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Slurm/monitor_and_control/index.html b/Documentation/Slurm/monitor_and_control/index.html new file mode 100644 index 000000000..745bccd0d --- /dev/null +++ b/Documentation/Slurm/monitor_and_control/index.html @@ -0,0 +1,5240 @@ + + + + + + + + + + + + + + + + + + + + + + + Monitor and Control Commands - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Commands to Monitor and Control Jobs#

+

Slurm includes a suite of command-line tools used to submit, monitor, and control jobs and the job queue.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CommandDescription
squeueShow the Slurm queue. Users can specify JOBID or USER.
scontrolControls various aspects of jobs such as job suspension, re-queuing or resuming jobs and can display diagnostic info about each job.
scancelCancel specified job(s).
sinfoView information about all Slurm nodes and partitions.
sacctDetailed information on accounting for all jobs and job steps.
sprioView priority and the factors that determine scheduling priority.
+

Please see man pages on the cluster for more information on each command. Also see --help or --usage flags for each.

+

Our Presentation on Advanced Slurm Features is also available as a resource, which has supplementary information on how to manage jobs.

+

Another great resource for Slurm at NREL is this repository on Github.

+

Usage Examples#

+

squeue#

+

The squeue command is used to view the current state of jobs in the queue.

+

To show your jobs:

+
$ squeue -u hpcuser
+           JOBID    PARTITION       NAME      USER   ST       TIME      NODES   NODELIST(REASON)
+          506955          gpu   wait_tes   hpcuser   PD       0:00          1      (Resources)
+
+

To show all jobs in the queue with extended information:

+
$ squeue -l
+Thu Dec 13 12:17:31 2018
+ JOBID  PARTITION NAME     USER     STATE   TIME    TIME_LIMIT   NODES  NODELIST(REASON)
+ 516890 standard Job007    user1    PENDING 0:00    12:00:00    1050   (Dependency)
+ 516891 standard Job008    user1    PENDING 0:00    12:00:00    1050   (Dependency)
+ 516897      gpu Job009    user2    PENDING 0:00    04:00:00       1   (Resources)
+ 516898 standard Job010    user3    PENDING 0:00    15:00:00      71   (Priority)
+ 516899 standard Job011    user3    PENDING 0:00    15:00:00      71   (Priority)
+-----------------------------------------------------------------------------
+ 516704 standard Job001    user4    RUNNING 4:09:48 15:00:00      71    r1i0n[0-35],r1i1n[0-34]
+ 516702 standard Job002    user4    RUNNING 4:16:50 15:00:00      71    r1i6n35,r1i7n[0-35],r2i0n[0-33]
+ 516703 standard Job003    user4    RUNNING 4:16:57 15:00:00      71    r1i5n[0-35],r1i6n[0-34]
+ 516893 standard Job004    user4    RUNNING 7:19     3:00:00      71    r1i1n35,r1i2n[0-35],r1i3n[0-33]
+ 516894 standard Job005    user4    RUNNING 7:19     3:00:00      71    r4i2n[20-25],r6i6n[7-35],r6i7n[0-35]
+ 516895 standard Job006    user4    RUNNING 7:19     3:00:00      71    r4i2n[29-35],r4i3n[0-35],r4i4n[0-20]
+
+

To estimate when your jobs will start to run, use the squeue --start command with the JOBID.

+

Note that the Slurm start times are only an estimate, and are updated frequently based on the current state of the queue and the specified --time of all jobs in the queue.

+
$ squeue --start -j 509851,509852
+ JOBID    PARTITION    NAME      USER      ST          START_TIME    NODES   SCHEDNODES   NODELIST(REASON)
+ 509851   short      test1.sh   hpcuser    PD                 N/A      100       (null)       (Dependency)
+ 509852   short      test2.sh   hpcuser    PD 2018-12-19T16:54:00        1      r1i6n35         (Priority)
+
+

Output Customization of the squeue Command#

+

The displayed fields in squeue can be highly customized to display the information that's most relevant for the user by using the -o or -O flags. The full list of customizable fields can be found under the entries for these flags in the man squeue command on the system.

+

By setting the environment variable export $SQUEUE_FORMAT, you can override the system's default squeue fields with your own. For example, if you run the following line (or place it in your ~/.bashrc or ~/.bash_aliases file to make it persistent across logins):

+

export SQUEUE_FORMAT="%.18i %.15P %.8q %.12a %.8p %.8j %.8u %.2t %.10M %.6D %R"

+

Using squeue will now provide the formatted output:

+
JOBID    PARTITION   QOS    ACCOUNT   PRIORITY     NAME     USER    ST     TIME    NODES NODELIST(REASON)
+13141110 standard   normal  csc000    0.051768    my_job   hpcuser  R   2-04:01:17   1    r1i3n29
+
+

Or you may wish to add the %V to show the timestamp that a job was submitted, and sort by timestamp, ascending:

+

squeue -o "%.18i %.9P %.8j %.8u %.2t %.10M %.6D %20V %6q %12l %R" -S "V"

+

Example output:

+
             JOBID PARTITION     NAME     USER ST       TIME  NODES SUBMIT_TIME          QOS    TIME_LIMIT   NODELIST(REASON)
+          13166762    bigmem    first  hpcuser PD       0:00      1 2023-08-30T14:08:11  high   2-00:00:00   (Priority)
+          13166761    bigmem       P5  hpcuser PD       0:00      1 2023-08-30T14:08:11  high   2-00:00:00   (Priority)
+          13166760    bigmem       P4  hpcuser PD       0:00      1 2023-08-30T14:08:11  high   2-00:00:00   (Priority)
+          13166759    bigmem      Qm3  hpcuser PD       0:00      1 2023-08-30T14:08:11  high   2-00:00:00   (Priority)
+          13166758    bigmem       P2  hpcuser PD       0:00      1 2023-08-30T14:08:11  high   2-00:00:00   (Priority)
+          13166757    bigmem       G1  hpcuser PD       0:00      1 2023-08-30T14:08:11  high   2-00:00:00   (Priority)
+          13167383    bigmem       r8  hpcuser PD       0:00      1 2023-08-30T16:25:52  high   2-00:00:00   (Priority)
+          13167390  standard      P12  hpcuser PD       0:00      1 2023-08-30T16:25:55  high   2-00:00:00   (Priority)
+          13167391    bigmem      P34  hpcuser PD       0:00      1 2023-08-30T16:25:55  high   2-00:00:00   (Priority)
+          13167392    bigmem    qchem  hpcuser PD       0:00      1 2023-08-30T16:25:55  high   2-00:00:00   (Priority)
+          13167393     debug  testrun  hpcuser PD       0:00      1 2023-08-30T16:25:55  high   2-00:00:00   (Priority)
+          13167394    bigmem   latest  hpcuser PD       0:00      1 2023-08-30T16:25:55  high   2-00:00:00   (Priority)
+          13182480     debug  runtest  jwright2 R      31:01      1 2023-09-01T14:49:54  normal 59:00        r3i7n35
+
+

Many other options are available in the man page.

+

scontrol#

+

To get detailed information about your job before and while it runs, you may use scontrol show job with the JOBID. For example: +

$ scontrol show job 522616
+JobId=522616 JobName=myscript.sh
+ UserId=hpcuser(123456) GroupId=hpcuser(123456) MCS_label=N/A
+ Priority=43295364 Nice=0 Account=csc000 QOS=normal
+ JobState=PENDING Reason=Dependency Dependency=afterany:522615
+
+The scontrol command can also be used to modify pending and running jobs: +
$ scontrol update jobid=526501 qos=high
+$ sacct -j 526501 --format=jobid,partition,state,qos
+       JobID  Partition      State        QOS
+------------ ---------- ---------- ----------
+526501            short    RUNNING       high
+526501.exte+               RUNNING
+526501.0                 COMPLETED
+
+To pause a job: scontrol hold <JOBID>

+

To resume a job: scontrol resume <JOBID>

+

To cancel and rerun: scontrol requeue <JOBID>

+

scancel#

+

Use scancel -i <jobID> for an interactive mode to confirm each job_id.step_id before performing the cancel operation. Use scancel --state=PENDING,RUNNING,SUSPENDED -u <userid> to cancel your jobs by STATE or scancel -u <userid> to cancel ALL of your jobs.

+

sinfo#

+

Use sinfo to view cluster information: +

$ sinfo -o %A
+NODES(A/I)
+1580/514
+
+Above, sinfo shows nodes Allocated (A) and nodes idle (I) in the entire cluster.

+

To see specific node information use sinfo -n <node id> to show information about a single or list of nodes. You will see the partition to which the node can allocate as well as the node STATE. +

$ sinfo -n r105u33,r2i4n27
+PARTITION  AVAIL   TIMELIMIT NODES  STATE  NODELIST
+short      up        4:00:00     1  drain   r2i4n27
+short      up        4:00:00     1   down   r105u33
+standard   up     2-00:00:00     1  drain   r2i4n27
+standard   up     2-00:00:00     1   down   r105u33
+long       up     10-00:00:0     1  drain   r2i4n27
+long       up     10-00:00:0     1   down   r105u33
+bigmem     up     2-00:00:00     1   down   r105u33
+gpu        up     2-00:00:00     1   down   r105u33
+bigscratch up     2-00:00:00     0    n/a
+ddn        up     2-00:00:00     0    n/a
+

+

sacct#

+

Use sacct to view accounting information about jobs AND job steps: +

$ sacct -j 525198 --format=User,JobID,Jobname,partition,state,time,start,elapsed,nnodes,ncpus
+     User        JobID    JobName  Partition      State  Timelimit               Start    Elapsed  NNodes    NCPUS
+--------- ------------ ---------- ---------- ---------- ---------- ------------------- ---------- ------- --------
+  hpcuser 525198        acct_test      short  COMPLETED   00:01:00 2018-12-19T16:09:34   00:00:54       4      144
+          525198.batch      batch             COMPLETED            2018-12-19T16:09:34   00:00:54       1       36
+          525198.exte+     extern             COMPLETED            2018-12-19T16:09:34   00:00:54       4      144
+          525198.0           bash             COMPLETED            2018-12-19T16:09:38   00:00:00       4        4
+
+Use sacct -e to print a list of fields that can be specified with the --format option.

+

sprio#

+

By default, sprio returns information for all pending jobs. Options exist to display specific jobs by JOBID and USER. +

$ sprio -u hpcuser
+  JOBID  PARTITION     USER  PRIORITY   AGE  JOBSIZE PARTITION       QOS
+ 526752      short  hpcuser  43383470  3733   179737         0  43200000
+
+Use the `-n` flag to provide a normalized priority weighting with a value between 0-1:
+
+$ sprio -u hpcuser -n
+  JOBID  PARTITION     USER    PRIORITY        AGE    JOBSIZE  PARTITION        QOS
+ 526752      short  hpcuser  0.01010100  0.0008642  0.0009747  0.0000000  0.1000000
+

+

The sprio command also has some options that can be used to view the entire queue by priority order. The following command will show the "long" (-l) format sprio with extended information, sorted by priority in descending order (-S -Y), and piped through the less command with line numbers shown on the far left (less -N):

+

sprio -S -Y -l | less -N

+
1           JOBID PARTITION     USER   PRIORITY       SITE        AGE      ASSOC  FAIRSHARE    JOBSIZE  PARTITION        QOS        NICE                 TRES
+2        13150512 standard-  hpcuser  373290120          0    8909585          0  360472143      84743    3823650          0           0
+3        13150514 standard-  hpcuser  373290070          0    8909534          0  360472143      84743    3823650          0           0
+
+

When sprio is piped through the less command for paginating, press the / key and type in a jobid or a username and press the return key to search for and jump to that jobid or username. Press / and hit return again to search for the next occurrence of your search term, or use the ? instead of / to search upwards in the list. Press q to exit.

+

Note that when piped through less -N, line numbers may be equated to position in the priority queue plus 1, because the top column label line of sprio is counted by less. To remove the column labels from sprio output, add the -h or --noheader flag to sprio.

+

The -l(--long) flag precludes using the -n for normalized priority values.

+

Like squeue and other Slurm commands, sprio supports the -o format flag to customize the columns that are displayed. For example:

+

sprio S -Y -o "%i %r %u %y"

+

Will show only the jobid, partition, username, and normalized priority. More details about output formatting are available in man sprio.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Slurm/multiple_sub_jobs/index.html b/Documentation/Slurm/multiple_sub_jobs/index.html new file mode 100644 index 000000000..e98286aca --- /dev/null +++ b/Documentation/Slurm/multiple_sub_jobs/index.html @@ -0,0 +1,5078 @@ + + + + + + + + + + + + + + + + + + + + + + + Running Multiple Sub-Jobs - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Running Multiple Sub-Jobs with One Job Script#

+

If your workload consists of serial or modestly parallel programs, you can run multiple instances of your program at the same time using different processor cores on a single node. This will allow you to make better use of your allocation because it will use the resources on the node that would otherwise be idle.

+

Example#

+

For illustration, we use a simple C code to calculate pi. The source code and instructions for building that program are provided below:

+

Sample Program#

+

Copy and paste the following into a terminal window that's connected to the cluster. +This will stream the pasted contents into a file called pi.c using the command cat << eof > pi.c.

+
cat << eof > pi.c
+#include <stdio.h>
+
+// pi.c: A sample C code calculating pi
+
+main() {
+  double x,h,sum = 0;
+  int i,N;
+  printf("Input number of iterations: ");
+  scanf("%d",&N);
+  h=1.0/(double) N;
+
+  for (i=0; i<N; i++) {
+   x=h*((double) i + 0.5);
+   sum += 4.0*h/(1.0+x*x);
+  }
+
+  printf("\nN=%d, PI=%.15f\n", N,sum);
+}
+
+eof
+
+

Compile the Code#

+

This example uses the Intel C compiler. Load the module and compile pi.c with the following commands:

+
$ module purge
+$ module load intel-mpi
+$ icc -O2 pi.c -o pi_test
+$ ./pi_test
+
+

A sample batch job script file to run 8 copies of the pi_test program on a node with 24 processor cores is given below. This script creates 8 directories and starts 8 jobs, each in the background. It waits for all 8 jobs to complete before finishing.

+

Copy and paste the following into a text file#

+

Place that batch file into one of your directories on the cluster. Make sure to change the allocation to a project-handle you belong to.

+
#!/bin/bash
+## Required Parameters   ##############################################
+#SBATCH --time 10:00               # WALLTIME limit of 10 minutes
+
+## Double ## will cause SLURM to ignore the directive:
+#SBATCH -A <handle>                # Account (replace with appropriate)
+
+#SBATCH -n 8                       # ask for 8 tasks   
+#SBATCH -N 1                       # ask for 1 node
+## Optional Parameters   ##############################################
+#SBATCH --job-name wait_test       # name to display in queue
+#SBATCH --output std.out
+#SBATCH --error std.err
+
+JOBNAME=$SLURM_JOB_NAME            # re-use the job-name specified above
+
+# Run 1 job per task
+N_JOB=$SLURM_NTASKS                # create as many jobs as tasks
+
+for((i=1;i<=$N_JOB;i++))
+do
+  mkdir $JOBNAME.run$i             # Make subdirectories for each job
+  cd $JOBNAME.run$i                # Go to job directory
+  echo 10*10^$i | bc > input       # Make input files
+  time ../pi_test < input > log &  # Run your executable, note the "&"
+  cd ..
+done
+
+#Wait for all
+wait
+
+echo
+echo "All done. Checking results:"
+grep "PI" $JOBNAME.*/log
+
+

Submit the Batch Script#

+

Use the following Slurm sbatch command to submit the script. The job will be scheduled, and you can view the output once the job completes to confirm the results.

+

$ sbatch -A <project-handle> <batch_file>

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Environments/Toolchains/intel/index.html b/Documentation/Systems/Kestrel/Environments/Toolchains/intel/index.html new file mode 100644 index 000000000..72527ba0f --- /dev/null +++ b/Documentation/Systems/Kestrel/Environments/Toolchains/intel/index.html @@ -0,0 +1,5410 @@ + + + + + + + + + + + + + + + + + + + + + + + Intel - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Compile and run: Intel1API compilers & MPI#

+

Introduction#

+

This page shows how to compile and run programs using Intel's 1API tool chain. We'll look at building using their MPI and Fortran and C compilers. It is possible to build programs using Intel's MPI libraries but actually compile using gfortran and gcc. This is also covered.

+

Intel's C compiler icc has been around for many years. It is being retired and replaced with icx. As of summer of 2023 you can still use icc but it is scheduled to be removed by the end of the year. Building with icc produces a warning message. We'll discuss how to surpress the warning and more importantly, build using icx.

+

Our example programs are hybrid MPI/Openmp so we'll show commands for building hybrid programs. If your program is pure MPI the only change you need to make to the build process is to remove the compile line option -fopenmp.

+

Sample makefile, source codes, and runscript for on Kestrel can be found in our Kestrel Repo under the Toolchains folder. There are individual directories for source,makefiles, and scripts or you can download the intel.tgz file containing all required files. The source differs slightly from what is shown here. There is an extra file triad.c that gets compiled along with the Fortran and C programs discussed below. This file does some "dummy" work to allow the programs to run for a few seconds.

+

module loads for compile#

+

These are the module you will need for compiles:

+
module load  intel-oneapi-compilers 
+module load intel-oneapi-mpi        
+module load gcc/13.1.0                     
+
+

Intel compilers use some gcc functionality so we load gcc to give a newer version of that compiler.

+

module loads for run#

+

Normally, builds are static, meaning that an application "knows" where to find its libraries. Thus, we don't need to load the Intel modules at runtime Unless you have some other external libaries that require a module load the only module lines you will need are:

+
module purge
+module load libfabric
+
+

Building programs#

+

As discussed above we can build with Intel (ifort, icc, icx) or GNU (gcc, gfortran) underlying compilers. The 5 options are:

+
    +
  1. Fortran with: Intel MPI and Intel Fortran compiler
  2. +
  3. C with: Intel MPI and Intel C compiler, older compiler (icc)
  4. +
  5. C with: Intel MPI and Intel C compiler, newer compiler (icx)
  6. +
  7. Fortran with: Intel MPI with gfortran Fortran compiler
  8. +
  9. C with: Intel MPI with gcc C compiler
  10. +
+

Here's what the compile lines should be where we add the -fopenmp option for Opnemp and the optimization flag -O3.

+

1. Fortran with: Intel MPI and Intel Fortran compiler#

+
mpiifort -O3 -g -fopenmp  ex1.f90  
+
+

2a. C with: Intel MPI and Intel C compiler, older compiler (icc)#

+
mpiicc -O3 -g -fopenmp  ex1.c  -o ex_c
+
+

This will produce the warning message icc: remark #10441: The Intel(R) C++ Compiler Classic (ICC) is deprecated and will be removed from product release in the second half of 2023. The Intel(R) oneAPI DPC++/C++ Compiler (ICX) is the recommended compiler moving forward. Please transition to use this compiler. Use '-diag-disable=10441' to disable this message

+

We can compile with the extra flag.

+
mpiicc -diag-disable=10441 -O3 -g -fopenmp  ex1.c   -o gex_c
+
+

2b. Older compiler (icc) might not be avialable#

+

Depending on the version of compilers loaded the message shown above might be replaced with one saying that the icx is no longer available. In this case you MUST use icx. There are two ways to do that shown below.

+

3a. C with: Intel MPI and Intel C compiler, newer compiler (icx)#

+

export I_MPI_CC=icx
+mpiicc -O3 -g -fopenmp  ex1.c  -o ex_c
+
+Setting the environmental variable tells mpiicc to use icx (the newer Intel compiler) instead of icc.

+

3a. C with: Intel MPI and Intel C compiler, newer compiler (icx)#

+

mpiicx -O3 -g -fopenmp  ex1.c  -o ex_c
+
+Explictly running mpiicx will give you icx as the backend compiler.

+

mpicc and mpif90 may not give you what you expect.#

+

The commands mpicc and mpif90 actually call gcc and gfortran instead of the Intel compilers. If you consider these the default way to compile programs the "by default" Intel MPI does not use Intel compilers.

+

4. Fortran with: Intel MPI with gfortran Fortran compiler#

+
mpif90 -O3 -g -fopenmp  ex1.f90 
+
+

5. C with: Intel MPI with gcc C compiler#

+
mpicc -O3 -g -fopenmp  ex1.f90 
+
+

Example programs +We have two example MPI/OpenMP programs, ex1.c and ex1.f90. They are more or less identical in function. They first print MPI Library and compiler information. For example the fortran example compiled with mpiifort reports:

+
  Fortran MPI TASKS            4
+ Intel(R) MPI Library 2021.8 for Linux* OS
+
+ Intel(R) Fortran Intel(R) 64 Compiler Classic for applications running on Intel
+
+

For mpif90 we get:

+
  Fortran MPI TASKS            4
+ Intel(R) MPI Library 2021.8 for Linux* OS
+
+ GCC version 13.1.0
+
+

Note in these cases we have the same MPI library but different compilers.

+

The programs call a routine, triad. It keeps the cores busy for about 4 seconds. This allows the OS to settle down. Then for each MPI task and each openmp thread we get a line of the form:

+
task 0001 is running on x9000c3s2b0n0 thread=   2 of   3 is on core  054
+
+

This is saying that MPI task 1 is running on node x9000c3s2b0n0. The task has 3 openmp threads and the second is running on core 54.

+

Example makefile#

+

The triad.c file containes the routines that keeps the cores busy for 4 seconds. This is common to both the fortran and C versions of our codes. As discussed above our main codes are ex1.c and ex1.f90. Our makefile will build for

+

1. Fortran with: Intel MPI and Intel Fortran compiler#

+

3. C with: Intel MPI and Intel C compiler, newer compiler (icx)#

+

4. Fortran with: Intel MPI with gfortran Fortran compiler#

+

5. C with: Intel MPI with gcc C compiler#

+

There are comments in the makefile to show how to build with

+

2. C with: Intel MPI and Intel C compiler, older compiler (icc)#

+

The makefile has an intresting "trick". The default target is recurse. This target loads the modules then calls make again using the same makefile but with the targets intel and gnu. By using this "trick" you don't have to load modules before the make.

+

The targets intel and gnu each have a dependency to compile triad with either Intel or gcc compilers. Then the final applications are built with Intel MPI and again the either Intel or gnu.

+

The final MPI codes are:

+
    +
  • gex_c : gcc
  • +
  • gex_f : gfortran
  • +
  • ex_c : Intel C (icx)
  • +
  • ex_f : Intel Fortran (ifort)
  • +
+

Run script#

+
    +
  1. Makes a new directory, copies the requred files and goes there
  2. +
  3. Does a make with output going into make.log
  4. +
  5. Sets the number of MPI tasks and openmp threads
  6. +
  7. Sets some environmental variables to control and report on threads (discussed below)
  8. +
  9. module commands
      +
    1. module purge
    2. +
    3. module load libfabric
    4. +
    +
  10. +
  11. Creates a string with all of our srun options (discussed below)
  12. +
  13. Calls srun on each version of our program
      +
    1. output goes to *.out
    2. +
    3. Report on thread placement goes to *.info
    4. +
    +
  14. +
+

Our script sets these openmp related variables. The first is familiar. KMP_AFFINITY is unique to Intel compilers. In this case we are telling the OS to scatter (spread) out our threads. OMP_PROC_BIND=spread does the same thing but it is not unique to Intel compilers. So in this case KMP_AFFINITY is actually redundent.

+
  export OMP_NUM_THREADS=3
+  export KMP_AFFINITY=scatter
+  export OMP_PROC_BIND=spread
+
+

The next line

+
export BIND="--cpu-bind=v,cores"
+
+

is not technically used as an environmental variable but it will be used to create the srun command line. Passing --cpu-bind=v to srun will casue it to report threading information. The "cores" option tells srun to "Automatically generate masks binding tasks to cores." There are many other binding options as described in the srun man page. This setting works well for many programs.

+

Our srun command line options for 2 tasks per node and 3 threads per task are:

+
--mpi=pmi2 --cpu-bind=v,cores --threads-per-core=1 --tasks-per-node=2 --cpus-per-task=3
+
+
    +
  • --mpi=pmi2 : tells srun to use a particular launcher (This is optional.)
  • +
  • --cpu-bind=v,cores : discussed above
  • +
  • --threads-per-core=1 : don't allow multiple threads to run on the same core. Without this option it is possible for multiple threads to end up on the same core, decreasing performance.
  • +
  • --cpus-per-task=3 : The cpus-per-task should always be equal to OMP_NUM_THREADS.
  • +
+

The final thing the script does is produce a results report. This is just a list of mapping of mpi tasks and threads. There should not be any repeats in the list. There will be "repeats" of cores but on different nodes. There will be "repeats" of nodes but with different cores.

+

You can change the values for --cpu-bind, OMP_PROC_BIND, and threads-per-core to see if this list changes.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Environments/Toolchains/others/index.html b/Documentation/Systems/Kestrel/Environments/Toolchains/others/index.html new file mode 100644 index 000000000..8511be9b5 --- /dev/null +++ b/Documentation/Systems/Kestrel/Environments/Toolchains/others/index.html @@ -0,0 +1,4925 @@ + + + + + + + + + + + + + + + + + + + + + + + MPI - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Compile and run: MPI#

+

Introduction#

+

The ToolChains Intel document goes into great detail on running with various settings and +with the old and new versions of the Intel compilers.

+

The mpi/normal section of gpubuildandrun shows how to build and run using the more standard version of MPI.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Environments/gpubuildandrun/index.html b/Documentation/Systems/Kestrel/Environments/gpubuildandrun/index.html new file mode 100644 index 000000000..791aa882e --- /dev/null +++ b/Documentation/Systems/Kestrel/Environments/gpubuildandrun/index.html @@ -0,0 +1,6525 @@ + + + + + + + + + + + + + + + + + + + + + + + Building Applications for GPUs - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Building and Running on Kestrel's H100 GPU nodes.#

+

This page describes how to build and run on Kestrel's GPU nodes using several programming paradigms. There are pure Cuda programs, Cuda aware MPI programs, MPI programs without Cuda, MPI programs with Cuda, MPI programs with Openacc, and pure Openacc programs.

+

The examples are contained in a tarball available on Kestrel via the command:

+
tar -xzf /nopt/nrel/apps/examples/gpu/h100.tgz
+
+

Or you can use git to do a download:

+
git clone $USER@kestrel.hpc.nrel.gov:/nopt/nrel/apps/examples/gpu/0824 h100
+
+

After getting the source you can run all of the examples:

+

cd h100
+sbatch --account=MYACCOUNT script
+
+where you need to provide your account name. This will run in about 22 minutes using 2 GPU nodes. Some of the examples require 2 nodes but most will run on a single node.

+

Helper files#

+

There are a number of "helper" files shipped with the examples. The script onnodes is run while you have a job running. You specify the jobid and it will report what is running on each node owned by the job. This will include the core on which each task/thread is running. On GPU nodes it will also report what you have running on each GPU.

+
+onnodes script +
[tkaiser2@kl6 h100]$ ./onnodes
+x3102c0s41b0n0
+PID    LWP PSR COMMAND         %CPU
+3658483 3659124   4 jacobi           0.0
+3653038 3653038  14 (sd-pam)         0.0
+3653037 3653037  16 systemd          0.0
+3659075 3659075  27 sshd             0.0
+3658483 3658499  52 cuda00001800007  0.0
+3658480 3658497  64 cuda00001800007  0.0
+3658481 3658481  65 jacobi          23.6
+3658481 3659129  66 jacobi           0.0
+3658482 3658482  72 jacobi          20.8
+3658482 3658498  79 cuda00001800007  0.0
+3658480 3658480  84 jacobi          64.6
+3658483 3658483  88 jacobi          20.2
+3658480 3659127  89 jacobi           0.0
+3658481 3658496  92 cuda00001800007  0.0
+3659076 3659076  95 ps               0.0
+3658482 3659125 101 jacobi           0.0
+/usr/bin/nvidia-smi
+Wed Aug 21 11:55:50 2024       
++-----------------------------------------------------------------------------------------+
+| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
+|-----------------------------------------+------------------------+----------------------+
+| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
+|                                         |                        |               MIG M. |
+|=========================================+========================+======================|
+|   0  NVIDIA H100 80GB HBM3          On  |   00000000:04:00.0 Off |                    0 |
+| N/A   42C    P0            126W /  699W |     532MiB /  81559MiB |      0%      Default |
+|                                         |                        |             Disabled |
++-----------------------------------------+------------------------+----------------------+
+|   1  NVIDIA H100 80GB HBM3          On  |   00000000:64:00.0 Off |                    0 |
+| N/A   42C    P0            123W /  699W |     532MiB /  81559MiB |      0%      Default |
+|                                         |                        |             Disabled |
++-----------------------------------------+------------------------+----------------------+
+|   2  NVIDIA H100 80GB HBM3          On  |   00000000:84:00.0 Off |                    0 |
+| N/A   42C    P0            123W /  699W |     532MiB /  81559MiB |      0%      Default |
+|                                         |                        |             Disabled |
++-----------------------------------------+------------------------+----------------------+
+|   3  NVIDIA H100 80GB HBM3          On  |   00000000:E4:00.0 Off |                    0 |
+| N/A   43C    P0            119W /  699W |     532MiB /  81559MiB |      0%      Default |
+|                                         |                        |             Disabled |
++-----------------------------------------+------------------------+----------------------+
+
++-----------------------------------------------------------------------------------------+
+| Processes:                                                                              |
+|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
+|        ID   ID                                                               Usage      |
+|=========================================================================================|
+|    0   N/A  N/A   3658480      C   ...try2/h100/mpi/openacc/cray/./jacobi        522MiB |
+|    1   N/A  N/A   3658481      C   ...try2/h100/mpi/openacc/cray/./jacobi        522MiB |
+|    2   N/A  N/A   3658482      C   ...try2/h100/mpi/openacc/cray/./jacobi        522MiB |
+|    3   N/A  N/A   3658483      C   ...try2/h100/mpi/openacc/cray/./jacobi        522MiB |
++-----------------------------------------------------------------------------------------+
+[tkaiser2@kl6 h100]$
+
+
+

There is a function module_restore defined in /nopt/nrel/apps/env.sh. +Sourcing /nopt/nrel/apps/env.sh sets modules back to the original state. +module_restore also modifies $PATH and $LD_LIBRARY_PATH putting paths with your +home directory at the beginning.

+
. /nopt/nrel/apps/env.sh
+module_restore
+
+

As of October 2024, /nopt/nrel/apps/env.sh is sourced automatically when you login so the +function module_restore should be in you path.

+

Just MPI#

+

There is an extra slurm script quick which does a build of C and Fortran MPI hello world. The script shows that to get a MPI program to build with the standard Programing environments PrgEnv-{cray,intel,gnu} you must module unload nvhpc and module load cuda. These environments build using Cray's MPI and the various back end compilers. There is a conflict between the default version of nvhpc and these environments. Also, Cray MPI wants cuda even if the program being built does not require it.

+

Programming paradigms Discussed#

+
    +
  • Multiple GPUs & multiple nodes
      +
    • Pure Cuda programs
    • +
    • Cuda aware MPI programs
    • +
    • MPI programs without Cuda,
    • +
    • MPI programs with Cuda
    • +
    • MPI programs with Openacc
    • +
    • Pure Openacc programs.
    • +
    • Library routines
    • +
    +
  • +
  • We’ll build with:
      +
    • Cray’s standard programming environment
    • +
    • NVIDIA’s environment
    • +
    • Gcc
    • +
    • A few examples with Intel MPI
    • +
    +
  • +
+

Common issues addressed with these examples#

+
    +
  • Can’t find library at run time
      +
    • Need to set LD_LIBRARY_PATH to point to directory containing the library. Try to load modules at run time.
    • +
    +
  • +
  • Module xxx is not compatible with your cray-libsci.
      +
    • Load an different version: cray-libsci/22.10.1.2 or cray-libsci/22.12.1.1 or cray-libsci/23.05.1.4
    • +
    +
  • +
  • Can’t find some function in the c++ library.
      +
    • Load a newer version of gcc
    • +
    +
  • +
  • At link time libgcc_s.so.1: file not recognized: File format not recognized.
      +
    • Linker is missing after some combinations of loads.
    • +
    • module load binutils
    • +
    +
  • +
  • Examples shown here don’t work.
      +
    • Make sure you are running and or launching from a GPU node
    • +
    +
  • +
  • cc1: error: bad value ‘znver4’ for ‘-march=’ switch.
      +
    • -march=skylake
    • +
    +
  • +
  • Package 'nccl', required by 'virtual:world', not found…
      +
    • module unload nvhpc
    • +
    +
  • +
  • lib/libmpi_gtl_cuda.so: undefined reference to…
      +
    • module load cuda
    • +
    +
  • +
+

The examples#

+

The examples are spread across a number of directories. All examples can be run with a single sbatch command.

+
    +
  • Our driver script is just "script".
  • +
  • Each example directory contains a file "doit".
  • +
  • Our driver looks for each directory with an example; Goes there and sources doit.
  • +
  • +

    We can select the default gcc compiler to use by setting the environmental variable MYGCC; This can be done outside of the script before sbatch. The possible versions of gcc are set as shown below:

    +
      +
    • export MYGCC=gcc-native/12.1
    • +
    • export MYGCC=gcc-stdalone/10.1.0
    • +
    • export MYGCC=gcc-stdalone/12.3.0
    • +
    • export MYGCC=gcc-stdalone/13.1.0
    • +
    +
  • +
  • +

    If we know that an example will not run with the chosen version of gcc "doit" will substitute on the fly

    +
  • +
  • You can run a subset of the tests by setting the variable doits. For example:
  • +
+
export doits="./cudalib/factor/doit ./cudalib/fft/doit ./mpi/cudaaware/doit"
+
+

General notes for all examples#

+
    +
  • All examples run module_restore to set the environment to a know state. See above.
  • +
  • Many of the examples unload PrgEnv-cray/8.5.0 and nvhpc/24.1 to prevent conflicts with other modules.
  • +
  • There is a compile and run of one or more programs.
  • +
  • MPI programs are run with srun or mpirun on one or two nodes. Mpirun is used with some versions of NVIDIA's environment because srun is not supported.
  • +
  • GPU programs that use a single GPU are run on each GPU in turn.
  • +
+

The script#

+

Our script, shown below does the following:

+
    +
  1. Test to make sure we are starting from a GPU node.
  2. +
  3. Define a simple timer.
  4. +
  5. Save our environment and a copy of the script.
  6. +
  7. Bring the function module_restore into our environment (see above).
  8. +
  9. Set our default version of gcc.
  10. +
  11. Find our examples if the user has not set a list beforehand and echo our list.
  12. +
  13. Go into each directory and run the test.
  14. +
+
+script +
#!/bin/bash
+#SBATCH --time=0:30:00
+#SBATCH --partition=gpu-h100
+#SBATCH --nodes=2
+#SBATCH --gres=gpu:h100:4
+#SBATCH --exclusive
+#SBATCH --output=output-%j.out
+#SBATCH --error=infor-%j.out
+
+
+if echo $SLURM_SUBMIT_HOST | egrep "kl5|kl6" >> /dev/null  ; then : ; else echo Run script from a GPU node; exit ; fi
+# a simple timer
+dt ()
+{
+    now=`date +"%s.%N"`;
+    if (( $# > 0 )); then
+        rtn=$(printf "%0.3f" `echo $now - $1 | bc`);
+    else
+        rtn=$(printf "%0.3f" `echo $now`);
+    fi;
+    echo $rtn
+}
+
+printenv > env-$SLURM_JOB_ID.out
+cat $0 > script-$SLURM_JOB_ID.out
+
+#runs script to put our restore function in our environment
+. /nopt/nrel/apps/env.sh
+module_restore
+
+#some possible values for gcc module
+#export MYGCC=gcc-native/12.1
+#export MYGCC=gcc-stdalone/10.1.0
+#export MYGCC=gcc-stdalone/12.3.0 
+#export MYGCC=gcc-stdalone/13.1.0
+
+if [ -z ${MYGCC+x} ]; then export MYGCC=gcc-native/12.1  ; else echo MYGCC already set ; fi
+echo MYGCC=$MYGCC
+
+if [ -z ${doits+x} ]; then 
+    doits=`find . -name doit | sort -t/ -k2,2`
+else 
+    echo doits already set 
+fi
+
+for x in $doits ; do
+    echo running example in `dirname $x`
+done
+
+startdir=`pwd`
+t1=`dt`
+for x in $doits ; do
+ dir=`dirname $x`
+ echo ++++++++ $dir >&2 
+ echo ++++++++
+ echo $dir
+ cd $dir
+ tbegin=`dt`
+ . doit | tee  $SLURM_JOB_ID
+ echo Runtime `dt $tbegin` $dir `dt $t1` total
+ cd $startdir
+done
+echo FINISHED `dt $t1`
+
+# post  (this is optional)
+mkdir -p /scratch/$USER/gputest/$SLURM_JOB_ID
+cp *out  /scratch/$USER/gputest/$SLURM_JOB_ID
+# . cleanup
+
+
+

cuda/cray#

+

Here we build and run a single GPU code stream.cu. This code is a standard benchmark that measures the floating point performance for a GPU.

+

In this case we are loading PrgEnv-nvhpc/8.4.0 which requires cray-libsci/23.05.1.4. We compile with the "wrapper" compiler CC which, in this case builds with NVIDIA's backend compiler. CC would "pull in" Cray's MPI it it was required.

+

We run on each GPU of each Node in our allocation.

+
+cuda/cray +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml PrgEnv-nvhpc/8.4.0
+ml cray-libsci/23.05.1.4
+ml binutils
+: << ++++ 
+ Compile our program
+ CC as well as cc, and ftn are wrapper compilers. Because
+ we have PrgEnv-nvidia loaded they map to Nvidia's compilers
+ but use would use Cray MPI if this was an MPI program.
+ Note we can also use nvcc since this is not an MPI program.
+++++
+
+rm -rf ./stream.sm_90
+CC -gpu=cc90  -cuda -target-accel=nvidia90  stream.cu  -o stream.sm_90
+# nvcc -std=c++11 -ccbin=g++ stream.cu -arch=sm_90 -o stream.sm_90
+
+: Run on all of our nodes
+nlist=`scontrol show hostnames | sort -u`
+for l in $nlist ; do   
+  echo $l
+  for GPU in 0 1 2 3 ; do
+: stream.cu will read the GPU on which to run from the command line
+      srun -n 1 --nodes=1 -w $l ./stream.sm_90 -g $GPU
+  done
+  echo
+done
+
+
+

cuda/gccalso#

+

Here we build and run a single GPU code stream.cu. This code is a standard benchmark that measures the floating point performance for a GPU. In this case we break the compile into two parts; compiling the "normal" C portions with gcc and the Cuda portions with compilers enabled via the load nvhpc-nompi. This is NVIDIA's compilers without MPI.

+
+cuda/gccalso +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload  PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml nvhpc-nompi/24.1
+ml binutils
+
+ml 2>&1 | grep gcc-stdalone/13.1.0 ; if [ $? -eq 0 ]  ; then echo REPLACING gcc-stdalone/13.1.0 ; ml gcc-stdalone/12.3.0  ; fi
+
+: << ++++ 
+ Compile our program
+ The module nvhpc-nompi gives us access to Nvidia's compilers
+ nvc, nvc++, nvcc, nvfortran as well as the Portland Group 
+ compilers which are actually links to these.  We do not
+ have direct access to MPI with this set of modules loaded.
+ Here we compile routines that do not containe cuda with g++.
+++++
+
+
+g++ -c normal.c 
+nvcc -std=c++11 -arch=sm_90 cuda.cu normal.o -o stream.sm_90
+
+: Run on all of our nodes
+nlist=`scontrol show hostnames | sort -u`
+for l in $nlist ; do   
+  echo $l
+  for GPU in 0 1 2 3 ; do
+: stream.cu will read the GPU on which to run from the command line
+      srun -n 1 --nodes=1 -w $l ./stream.sm_90 -g $GPU
+  done
+  echo
+done
+
+
+

cuda/nvidia#

+

Steam.cu runs a standard benchmark showing the computational speed of the gpu for simple math operations.

+

We use nvhpc-nompi which is a NREL written environment that builds cuda programs without MPI and run on each of the GPUs one at a time.

+
+cuda/nvidia +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload  PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+ml nvhpc-nompi/24.1
+: << ++++ 
+ Compile our program
+ The module nvhpc-nompi gives us access to Nvidia's compilers
+ nvc, nvc++, nvcc, nvfortran as well as the Portland Group 
+ compilers which are actually links to these.  We do not
+ have direct access to MPI with this set of modules loaded.
+++++
+
+
+nvcc -std=c++11 -arch=sm_90 stream.cu -o stream.sm_90
+
+: Run on all of our nodes
+nlist=`scontrol show hostnames | sort -u`
+for l in $nlist ; do   
+  echo $l
+  for GPU in 0 1 2 3 ; do
+: stream.cu will read the GPU on which to run from the command line
+      srun -n 1 --nodes=1 -w $l ./stream.sm_90 -g $GPU
+  done
+  echo
+done
+
+
+

mpi/normal/cray#

+

We are building MPI programs that do not contain Cuda. We unload nvhpc and load an older version to prevent compile issues. We need to load cuda because Cray's MPI expects it, even for nonCuda programs. We compile with ftn and cc which are "replacements" for the more traditional mpicc and mpifort. These will pull in MPI as needed. These should be used for codes even if they don't contain MPI. Parallel programs built with PrgEnv-* should be launched with srun as shown here.

+
+mpi/normal/cray +
: Start from a known module state, the default
+module_restore
+
+
+: Load modules
+#module unload nvhpc/24.1
+ml PrgEnv-cray/8.4.0 
+
+ml cuda
+: << ++++ 
+ Compile our program.
+
+ Here we use cc and ftn.  These are wrappers
+ that point to Cray C (clang) Cray Fortran
+ and Cray MPI. cc and ftn are part of PrgEnv-cray
+ with is part of the default setup.
+++++
+
+cc helloc.c -o helloc
+ftn hellof.f90 -o hellof
+
+: We run with two tasks per nodes an two tasks on one node.
+for arg in "--tasks-per-node=2" "-n 2 --nodes=1" ; do 
+   echo running Fortran version
+   srun $arg hellof
+   echo
+   echo running C version
+   srun $arg helloc
+   echo
+done
+
+: With PrgEnv-intel we get the Intel backend compilers
+ml PrgEnv-intel
+ml cray-libsci/23.05.1.4
+#ml gcc-stdalone/13.1.0
+ml binutils
+
+cc helloc.c -o helloc.i
+ftn hellof.f90 -o hellof.i
+
+: We run with two tasks per nodes an two tasks on one node.
+for arg in "--tasks-per-node=2" "-n 2 --nodes=1" ; do 
+   echo running Fortran version with Intel backend
+   srun $arg hellof.i
+   echo
+   echo running C version with Intel backend
+   srun $arg helloc.i
+   echo
+done
+
+: With PrgEnv-gnu we get the gnu backend compilers
+: As of 04/04/24 the -march=znver3 flag is required
+: because the default version of gcc does not support the
+: current CPU on the GPU nodes.  Or you could
+: ml craype-x86-milan
+ml PrgEnv-gnu
+ml cray-libsci/23.05.1.4
+cc  -march=znver3 helloc.c -o helloc.g
+ftn -march=znver3 hellof.f90 -o hellof.g
+
+: We run with two tasks per nodes an two tasks on one node.
+for arg in "--tasks-per-node=2" "-n 2 --nodes=1" ; do 
+   echo running Fortran version with gnu backend
+   srun $arg hellof.g
+   echo
+   echo running C version with gnu backend
+   srun $arg helloc.g
+   echo
+done
+
+
+

mpi/normal/intel+abi#

+

In this case we are building hello world using Intel's compilers and Intel's MPI. We load the intel-onapi-{mpi,compilers} modules and build with mpiicx and and mpifc.

+

These hello world programs will report the version of the MPI library used. The report they are using Intel MPI.

+

However, if we load the modules craype and cray-mpich-abi the Intel MPI library gets replaced with Cray MPI at runtime. This is reported in the program output. the advantage is better performance for off node communication. Cray-mpich-abi will not work if the program contains C++ MPI calls but will work if C++ calls normal C MPI routines as dictated by the standard.

+
+mpi/normal/intel+abi +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml intel-oneapi-mpi
+ml intel-oneapi-compilers
+ml binutils
+
+: << ++++ 
+ Compile our program.
+
+ There are many ways to compile using Intel MPI.
+ Here we use the "Intel Suggested" way using mpiicx
+ and mpifc.  This gives us new Intel backend compilers
+ with Intel MPI. mpif90 and mpicc would give us gcc
+ and gfortan instead
+++++
+
+mpiicx helloc.c -o helloc
+mpifc hellof.f90 -o hellof
+
+: We run with two tasks per nodes an two tasks on one node.
+for arg in "--tasks-per-node=2" "-n 2 --nodes=1" ; do 
+   echo running Fortran version
+   srun $arg hellof
+   echo
+   echo running C version
+   srun $arg helloc
+   echo
+done
+
+: Finally we module load cray-mpich-abi.  With this module
+: loaded Intel MPI is replaced with Cray MPI without needing
+: to recompile. After the load we rerun and see Cray MPI
+: in the output
+
+ml craype
+ml cray-mpich-abi
+
+for arg in "--tasks-per-node=2" "-n 2 --nodes=1" ; do 
+   echo running Fortran version
+   srun $arg hellof
+   echo
+   echo running C version
+   srun $arg helloc
+   echo
+done
+
+
+

mpi/normal/nvidia/nrelopenmpi#

+

In this case we are building normal MPI programs but using a NREL built OpenMPI and a NREL installed version of NVIDIA's environment. This particular OpenMPI was built using NVIDIA's compilers and thus is more compatible with other NVIDIA packages. NREL's MPI versions are built with slurm support so these programs are launched with srun.

+
+mpi/normal/nvidia/nrelopenmpi +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml openmpi/4.1.6-nvhpc
+ml nvhpc-nompi/24.1
+ml binutils
+
+: << ++++ 
+ Compile our program
+ Here we use mpicc and mpif90.  There is support for Cuda
+ but we are not using it in this case.
+++++
+
+mpicc helloc.c -o helloc
+mpif90 hellof.f90 -o hellof
+
+: We run with two tasks per nodes an two tasks on one node.
+for arg in "--tasks-per-node=2" "-n 2 --nodes=1" ; do 
+   echo running Fortran version
+   srun $arg hellof
+   echo
+   echo running C version
+   srun $arg helloc
+   echo
+done
+
+
+

mpi/normal/nvidia/nvidiaopenmpi#

+

In this case we are building normal MPI programs but using a nvhpc/24.1. This particular MPI was built using NVIDIA's compilers and thus is more compatible with other NVIDIA packages. This version of MPI does not support slurm's srun command so we launch with mpirun.

+
+mpi/normal/nvidia/nvidiaopenmpi +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml  nvhpc-stdalone/24.1
+
+: << ++++ 
+ Compile our program
+ Here we use mpicc and mpif90.  There is support for Cuda
+ but we are not using it in this case.
+++++
+
+mpicc helloc.c -o helloc
+mpif90 hellof.f90 -o hellof
+
+: This version of MPI does not support srun so we use mpirun
+: We run with two tasks per nodes an two tasks on one node.
+for arg in "-N 2" "-n 2" ; do 
+   echo running Fortran version
+   mpirun $arg hellof
+   echo
+   echo running C version
+   mpirun $arg helloc
+   echo
+done
+
+
+

mpi/withcuda/cray#

+

This example is a MPI ping-pong test where the data starts and ends up on a GPU but passes through CPU memory. Here are the Cuda copy routines and MPI routines. d_A is a GPU (device) array. It is copied to/from A a CPU array using cudaMemcpy. A is sent/received via the MPI calls.

+
for(int i=1; i<=loop_count; i++){
+    if(rank == 0){
+        cudaMemcpy(A, d_A, N*sizeof(double), cudaMemcpyDeviceToHost) ;
+        MPI_Send(A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
+        MPI_Recv(A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
+        cudaMemcpy(d_A, A, N*sizeof(double), cudaMemcpyHostToDevice) ;
+    }
+    else if(rank == 1){
+        MPI_Recv(A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
+        cudaMemcpy(d_A, A, N*sizeof(double), cudaMemcpyHostToDevice) ;
+        cudaMemcpy(A, d_A, N*sizeof(double), cudaMemcpyDeviceToHost) ;
+        MPI_Send(A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
+    }
+}
+
+

We are using PrgEnv-nvhpc which combines Cray MPI and NVIDIA's back end compilers. As of the date of this writing this version of with NVIDIA's compilers are not compatible with GCC 13. So we test to see if it is loaded and replace it as needed.

+

Here we use CC. If we were compiling Fortran then ftn instead of CC. These are wrappers that point to Cray MPI.

+

We also build and run a multi-GPU version of stream which measures numerical performance of the GPU Stream is run simultaneously on all GPUs. This code can test that can be run to test if a GPU is running properly.

+

Since PrgEnv-* is compatible with slurm we launch using srun. We do a on-node and off-node test.

+
+mpi/withcuda/cray +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+ml >&2
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+#######
+ml 2>&1 | grep gcc-native/12.1 ; if [ $? -eq 0 ]  ; then echo REPLACING gcc-native/12.1 ; ml gcc-stdalone/13.1.0 ; fi
+#######
+ml >&2
+
+ml PrgEnv-nvhpc
+ml cray-libsci/23.05.1.4
+ml binutils
+: << ++++ 
+ Compile our program.
+
+ Here we use CC. If we were compiling Fortran
+ then ftn instead of CC.  These are wrappers
+ that point to Cray MPI and with PrgEnv-nvhpc
+ we get Nvidia's back end compilers.  
+++++
+
+CC -gpu=cc90   ping_pong_cuda_staged.cu -o staged
+
+
+: We run with 2 tasks total. One 1 and two nodes
+echo running staged on node
+srun  --nodes=1 --tasks-per-node=2 ./staged
+
+echo running staged off node
+srun  --nodes=2 --tasks-per-node=1 ./staged
+
+echo running multi-gpu stream
+CC -gpu=cc90  -DNTIMES=1000  mstream.cu -o mstream
+export VSIZE=3300000000
+export VSIZE=330000000
+srun --tasks-per-node=4  ./mstream -n $VSIZE
+
+
+

mpi/withcuda/nvidia/nrelopenmpi#

+

This example is a MPI ping-pong test where the data starts and ends up on a GPU but passes through CPU memory. See the explanation in the previous example.

+

We are using ml openmpi/4.1.6-nvhpc and ml nvhpc-nompi/24.1. These supply a NREL built version of OpenMPI with NVIDIA's backend compilers.

+

Here we use mpiCC. If we were compiling Fortran then ftn instead of CC. These are wrappers that point to Cray MPI.

+

We also build and run a multi-GPU version of stream which measures numerical performance of the GPU Stream is run simultaneously on all GPUs. This code cone test that can be run to test if a GPU is running properly.

+

Since PrgEnv-* is compatible with slurm we launch using srun. We do a on-node and off-node test.

+
+mpi/withcuda/nvidia/nrelopenmpi +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml openmpi/4.1.6-nvhpc
+ml nvhpc-nompi/24.1
+ml binutils
+
+: << ++++ 
+ Compile our program
+ Here we use mpiCC which uses, in this case a NREL built  version
+ of MPI and Nvidia's backend compiler. 
+++++
+
+mpiCC ping_pong_cuda_staged.cu -o staged
+
+: We run with 2 tasks total.
+: This version of MPI does not support srun so we use mpirun
+
+echo Run on a single node
+srun --tasks-per-node=2 --nodes=1 ./staged
+
+echo Run on two nodes 
+srun --tasks-per-node=1 --nodes=2 ./staged
+
+echo running multi-gpu stream
+mpiCC -gpu=cc90  -DNTIMES=1000  mstream.cu -o mstream
+export VSIZE=3300000000
+export VSIZE=330000000
+srun --tasks-per-node=4  ./mstream -n $VSIZE
+
+
+

mpi/withcuda/nvidia/nvidiaopenmpi#

+

This example is a MPI ping-pong test where the data starts and ends up on a GPU but passes through CPU memory. See the explanation two examples previous.

+

Here we use nvhpc/24.1. (Note we actually unload this module and then reload it. This is not actually necessary but is here for historical reasons. In this case we could have just left it loaded.)

+

We compile with mpiCC. Since NVIDIA's MPI does not support srun we launch with mpirun.

+
+mpi/withcuda/nvidia/nvidiaopenmpi +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml 2>&1 | grep gcc-stdalone/13.1.0 ; if [ $? -eq 0 ]  ; then echo REPLACING gcc-stdalone/13.1.0 ; ml gcc-stdalone/12.3.0 ; fi
+
+ml nvhpc-stdalone/24.1
+
+: << ++++ 
+ Compile our program
+ Here we use mpiCC which uses Nvidia's version of MPI and
+ their backend compiler. The "hpcx" has a few more optimizations.
+++++
+
+mpiCC ping_pong_cuda_staged.cu -o staged
+
+: We run with 2 tasks total.
+: This version of MPI does not support srun so we use mpirun
+
+echo Run on a single node
+mpirun -n 2 -N 2 ./staged
+
+echo Run on two nodes 
+mpirun -n 2 -N 1 ./staged
+
+
+echo running multi-gpu stream
+mpiCC -gpu=cc80  -DNTIMES=1000  mstream.cu -o mstream
+export VSIZE=3300000000
+export VSIZE=330000000
+mpirun -n 8 -N 4  ./mstream -n $VSIZE
+
+
+

mpi/cudaaware#

+

This example is a ping-pong test where the dat starts and ends up on a GPU. Unlike the previous three examples There is no explicit copy to/from the GPU and CPU. Data is sent directly between GPUs. The array d_A is a device array and is not defined on the CPU. This is much faster than doing an explicit copy.

+

for(int i=1; i<=5; i++){
+    if(rank == 0){
+        MPI_Send(d_A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
+        MPI_Recv(d_A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
+    }
+    else if(rank == 1){
+        MPI_Recv(d_A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
+        MPI_Send(d_A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
+    }
+}
+
+Here we use PrgEnv-nvhpc and also need to load a specific version cray-libsci/23.05.1.4.

+

We need to MPICH_GPU_SUPPORT_ENABLED=1 to make this work. Depending on the code setting MPICH_OFI_NIC_POLICY=GPU may improve performance.

+
??? example "mpi/cudaaware"
+```bash
+: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload nvhpc/24.1
+#module unload PrgEnv-cray/8.5.0
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml PrgEnv-nvhpc
+ml cray-libsci/23.05.1.4  
+ml binutils
+
+: << ++++ 
+ Compile our program.
+
+ Here we use cc and CC.  These are wrappers
+ that point to Cray MPI but use Nvidia backend 
+ comilers.
+++++
+
+CC -gpu=cc90  -cuda -target-accel=nvidia90  -c ping_pong_cuda_aware.cu
+cc -gpu=cc90  -cuda -target-accel=nvidia90 -lcudart -lcuda ping_pong_cuda_aware.o -o pp_cuda_aware
+
+export MPICH_GPU_SUPPORT_ENABLED=1
+export MPICH_OFI_NIC_POLICY=GPU
+srun -n 2 --nodes=1 ./pp_cuda_aware
+srun --tasks-per-node=1 --nodes=2 ./pp_cuda_aware
+unset MPICH_GPU_SUPPORT_ENABLED
+unset MPICH_OFI_NIC_POLICY
+
+```
+
+

Here is a plot comparing the bandwidth using Staged and Cuda aware MPI. +Bandwidth comparison between Staged and Aware MPI

+

openacc/cray#

+

Here we run one of NVIDIA's examples. It is a single GPU version nbody calculation. It runs the same calculation on the CPU and the GPU and reports the difference in performance.

+

We are using PrgEnv-nvhpc/8.5.0 which gives us access to NVIDIA's compilers and Cray's MPI. However, we don't use MPI.

+

We run on each GPU of each node in turn. The variable CUDA_VISIBLE_DEVICES sets the GPU number.

+

Since this is not a MPI program we don't actually need srun. However, we use it in this case with the -w option to select the node on which we will launch the application.

+
+openacc/cray +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+module load binutils
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml PrgEnv-nvhpc/8.5.0
+
+
+: << ++++ 
+ Compile our program
+ The module PrgEnv-nvhpc/8.5.0 gives us access to Nvidia's 
+ compilers nvc, nvc++, nvcc, nvfortran as well as the Portland 
+ Group compilers which are actually links to these.  Since we 
+ are not using MPI we could have also used nvhpc-nompi/24.1 or
+ even nvhpc-native/24.1.
+++++
+
+
+nvc -fast -Minline -Minfo -acc -DFP64 nbodyacc2.c -o nbody
+
+
+
+: Run on all of our nodes
+nlist=`scontrol show hostnames | sort -u`
+for l in $nlist ; do   
+  echo $l
+  for GPU in 0 1 2 3 ; do
+: This is one way to set the GPU on which a openacc program runs.
+      export CUDA_VISIBLE_DEVICES=$GPU
+      echo running on gpu $CUDA_VISIBLE_DEVICES
+: Since we are not running MPI we actaully do not need srun here.
+      srun -n 1 --nodes=1 -w $l ./nbody
+  done
+  echo
+done
+
+unset CUDA_VISIBLE_DEVICES
+
+
+

openacc/nvidia#

+

Here we run one of NVIDIA's examples. It is a single GPU version nbody calculation. It runs the same calculation on the CPU and the GPU and reports the difference in performance.

+

Here we use nvhpc which gives us access to NVIDIA's compilers. We don't use MPI.

+

We run on each GPU of each node in turn. The variable CUDA_VISIBLE_DEVICES sets the GPU number.

+

Since this is not a MPI program we don't actually need srun. However, we use it in this case with the -w option to select the node on which we will run.

+
+openacc/nvidia +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml nvhpc-stdalone/24.1
+
+
+: << ++++ 
+ Compile our program
+ The module nvhpc-stdalone gives us access to Nvidia's compilers
+ nvc, nvc++, nvcc, nvfortran as well as the Portland Group 
+ compilers which are actually links to these.  Since we are
+ not using MPI we could have also used nvhpc-nompi/24.1 or
+ even PrgEnv-nvhpc/8.5.0.
+++++
+
+
+nvc -fast -Minline -Minfo -acc -DFP64 nbodyacc2.c -o nbody
+
+
+: Run on all of our nodes
+nlist=`scontrol show hostnames | sort -u`
+for l in $nlist ; do   
+  echo $l
+  for GPU in 0 1 2 3 ; do
+: This is one way to set the GPU on which a openacc program runs.
+      export CUDA_VISIBLE_DEVICES=$GPU
+      echo running on gpu $CUDA_VISIBLE_DEVICES
+: Since we are not running MPI we actaully do not need srun here.
+      srun -n 1 --nodes=1 -w $l ./nbody
+  done
+  echo
+done
+
+unset CUDA_VISIBLE_DEVICES
+
+
+

mpi/openacc/cray#

+

This is a somewhat contrived example. If does, in fact combine MPI and OpenACC but the MPI does almost nothing. At the MPI level it is embarrassingly parallel and each MPI task does the same calculation which is enhanced via OpenACC. MPI starts the tasks and reports a summary of timings. However, MPI combined with OpenACC is a important paradigm. The GPU version of VASP can combine MPI and OpenACC.

+

Here we load PrgEnv-nvhpc which requires cray-libsci/23.05.1.4 giving us Cray MPI and nvidia backend compilers. Again recall that cc and ftn are wrappers that will build MPI and non-MPI programs.

+

We launch with srun since PrgEnv-* supports the slurm scheduler.

+
+mpi/openacc/cray +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml PrgEnv-nvhpc
+ml cray-libsci/23.05.1.4
+ml binutils
+
+: << ++++ 
+ Compile our program.
+
+ Here we use cc and ftn.  These are wrappers
+ that point to Cray C (clang) Cray Fortran
+ and Cray MPI. cc and ftn are part of PrgEnv-cray
+ which is part of the default setup.
+++++
+
+cc -acc -Minfo=accel -fast acc_c3.c  -o jacobi
+
+: We run with 4 tasks per nodes.
+srun --tasks-per-node=4 ./jacobi 46000 46000 5 nvidia
+
+
+

mpi/openacc/nvidia/nrelopenmpi#

+

As discussed above this is a somewhat contrived example. If does, in fact combine MPI and OpenACC but the MPI does almost nothing. At the MPI level it is embarrassingly parallel and each MPI task does the same calculation which is enhanced via OpenACC. MPI starts the tasks and reports a summary of timings. However, MPI combined with OpenACC is a important paradigm. The GPU version of VASP can combine MPI and OpenACC.

+

Here we load openmpi/4.1.6-nvhpc and nvhpc-nompi/24.1 which together give us a Cuda aware MPI with NVIDIA's OpenACC compile capability.

+

We launch with srun since NREL's OpenMPI supports the slurm scheduler.

+
+mpi/openacc/nvidia/nrelopenmpi +
cat doit
+: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml openmpi/4.1.6-nvhpc
+ml nvhpc-nompi/24.1
+ml binutils
+
+: << ++++ 
+ Compile our program
+ Here we use mpicc and mpif90.  There is support for Cuda
+ but we are not directly using it in this case, just openacc.
+++++
+
+mpicc -acc -Minfo=accel -fast acc_c3.c -o jacobi
+
+: We run with 4 tasks per nodes.
+srun --tasks-per-node=4 ./jacobi 46000 46000 5 nvidia
+
+
+

mpi/openacc/nvidia/nvidiaopenmpi#

+

As discussed above this is a somewhat contrived example. If does, in fact combine MPI and OpenACC but the MPI does almost nothing. At the MPI level it is embarrassingly parallel and each MPI task does the same calculation which is enhanced via OpenACC. MPI starts the tasks and reports a summary of timings. However, MPI combined with OpenACC is a important paradigm. The GPU version of VASP can combine MPI and OpenACC.

+

Here we load PrgEnv-cray/8.5.0. We do not unload and nvhpc-nompi/24.1 so we have NVIDIA's version of MPI in our path.

+

We launch with mpirun since NVIDIA's MPI lacks support for the slurm scheduler.

+
+mpi/openacc/nvidia/nvidiaopenmpi +
: Start from a known module state, the default
+module_restore
+
+: Load modules
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml nvhpc-stdalone/24.1
+
+
+: << ++++ 
+ Compile our program
+ Here we use mpicc and mpif90.  There is support for Cuda
+ but we are not using it in this case but we are using 
+ openacc.
+++++
+
+mpicc -acc -Minfo=accel -fast acc_c3.c -o jacobi
+
+: We run with 4 tasks per nodes.
+: This version of MPI does not support srun so we use mpirun
+mpirun -N 4 ./jacobi 46000 46000 5 nvidia
+
+
+

cudalib/factor#

+

We have two programs here. Both do a linear solve, actually a factorization. cpu.C is a CPU program and it does its solve using a LAPACK routine dgetrs. This program is built and run against Cray's libsci and Intel's MKL. The program cusolver_getrf_example.cu calls a NVIDIA cuda linear solver cusolverDnDgetrf.

+

We first build cpu.C using PrgEnv-gnu. Note that we need to load cuda even though we don't use it because PrgEnv-gnu expects it.

+

We next build the GPU version using nvhpc-stdalone which gives us access to Cuda and the libraries. For our compile/link line we need to provide the path to the include files and library.

+

Next we run these two examples. For the CPU version we run using 32 threads. For the GPU version we run on each of the GPUs in turn.

+

For the Intel version we

+
ml intel-oneapi-mkl
+ml intel-oneapi-compilers
+
+

build and run.

+
+cudalib/factor +
cat doit
+: Size of our matrix to solve
+export MSIZE=4500
+
+: Start from a known module state, the default
+: We are going to Cray libsci version with the GPU
+: environment even though it does not use GPUs
+: Start from a known module state, the default
+module_restore
+
+: Load modules
+#module unload PrgEnv-cray/8.5.0
+#module unload nvhpc/24.1
+
+ml PrgEnv-gnu/8.4.0 
+ml cuda
+
+# Here we build the CPU version with libsci We don't actaully use Cuda but the compiler wants it
+CC  -DMINE=$MSIZE  -fopenmp -march=native cpu.C -o invert.libsci
+
+: << ++++
+ Compile our GPU programs.
+ The module nvhpc-native gives us access to Nvidia's compilers
+ nvc, nvc++, nvcc, nvfortran as well as the Portland Group 
+ compilers which are actually links to these.
+++++
+#ml nvhpc-native
+ml nvhpc-stdalone
+: GPU version with libcusolver
+export L1=$NVHPC_ROOT/math_libs/lib64
+export L3=$NVHPC_ROOT/REDIST/cuda/12.3/targets/x86_64-linux/lib
+nvcc  -DMINE=$MSIZE -L$L1 -lcusolver -L$L3 -lnvJitLink cusolver_getrf_example.cu -o invert.gpu
+
+
+export OMP_NUM_THREADS=32
+echo 
+echo 
+echo ++++++++++++++++++++++
+echo running libsci version 
+echo ++++++++++++++++++++++
+./invert.libsci
+
+for GPU in 0 1 2 3 ; do
+echo 
+echo 
+echo ++++++++++++++++++++++
+echo running gpu version on GPU $GPU
+echo ++++++++++++++++++++++
+: invert.gpu will read the GPU on which to run from the command line
+./invert.gpu $GPU
+done
+
+: We are going to compile the Intel version using 
+: the CPU environment
+module_restore
+ml intel-oneapi-mkl
+ml intel-oneapi-compilers
+icpx  -DMINE=$MSIZE -qopenmp -D__INTEL__ -march=native cpu.C -mkl -lmkl_rt -o invert.mkl
+
+echo 
+echo 
+echo ++++++++++++++++++++++
+echo running MKL version
+echo ++++++++++++++++++++++
+
+./invert.mkl
+
+module unload  intel-oneapi-compilers
+module unload intel-oneapi-mkl
+
+unset L1
+unset L3
+unset OMP_NUM_THREADS
+unset MSIZE
+
+
+

cudalib/fft#

+

We are doing FFTs on a cube. There are two versions. 3d_mgpu_c2c_example.cpp is a multi GPU program that will also run on a single GPU. fftw3d.c calls fftw; in this case Cray's version. fftw3d.c can also be compiled for 1d and 2d; see the source.

+

For the GPU version we use nvhpc-stdalone which gives us access to NVIDIA's compilers and libraries. We compile with nvcc and also link with nvcc specifying the path to the Cuda fft library.

+

The first command line argument is the size of the cube.

+

In a single invocation the program will run on both 1 and then 4 GPUs. If the second command line option is 1 it will run on a single GPU version then on all 4 gpus. When it is 2 it will run the 4 gpu version first.

+

We actually run the code 4 times and see different runtimes.

+

For the FFTW version we load ml PrgEnv-cray/8.4.0, cray-fftw, and cuda. This program does not use cuda but the MPI compiler requires it for a proper link.

+

Again we run on a cube of size 512.

+
+cudalib/fft +
: Start from a known module state, the default
+module_restore
+
+
+
+: Load modules
+#module unload nvhpc/24.1
+#module unload PrgEnv-cray/8.5.0
+
+if [ -z ${MYGCC+x} ]; then module load gcc ; else module load $MYGCC ; fi
+ml nvhpc-stdalone
+ml binutils
+
+ml 2>&1 | grep gcc-stdalone/13.1.0 ; if [ $? -eq 0 ]  ; then echo REPLACING gcc-stdalone/13.1.0 ; ml gcc-stdalone/12.3.0 ; fi
+
+: << ++++ 
+ Compile our GPU programs.
+ The module nvhpc-stdalone gives us access to Nvidia's compilers
+ nvc, nvc++, nvcc, nvfortran as well as the Portland Group 
+ compilers which are actually links to these.
+++++
+
+nvcc -O3 -forward-unknown-to-host-compiler  --generate-code=arch=compute_90,code=[compute_90,sm_90] -std=c++11 -x cu 3d_mgpu_c2c_example.cpp -c
+export L1=$NVHPC_ROOT/REDIST/math_libs/12.3/targets/x86_64-linux/lib
+nvcc  -o 3dfft 3d_mgpu_c2c_example.o -L$L1 -lcufft
+
+: Run our program on a cube. The first parameter gives our cube size.
+: 2048 should work on the H100s.
+: Second parameter determines which algorithm runs first 1 GPU version or 4 GPU version
+echo
+echo
+for DOIT in `seq 1 4` ; do
+  echo set $DOIT
+  echo ++++++++++++++
+  echo RUN SINGLE GPU VERSION FIRST
+  ./3dfft 512 1
+  echo
+  echo
+  echo ++++++++++++++
+  echo RUN FOUR GPU VERSION FIRST
+  ./3dfft 512 2
+  echo
+  echo
+done
+
+: Build and run a fftw version
+module_restore
+#module unload nvhpc/24.1
+#module unload PrgEnv-cray/8.5.0
+ml  PrgEnv-cray/8.4.0 
+
+ml cray-fftw
+ml cuda
+cc -O3 fftw3d.c -o fftw3.exe
+
+echo
+echo
+echo ++++++++++++++
+echo run fftw libsci version
+./fftw3.exe 512
+
+
+

Source credits#

+
    +
  1. stream.cu - https://github.com/bcumming/cuda-stream with mods for MPI
  2. +
  3. nbodyacc2.c - Nvidia, part of the nvhpc distribution
  4. +
  5. acc_c3.c - Nvidia, part of the nvhpc distribution
  6. +
  7. helloc.c, hellof.f90 - Tim Kaiser tkaiser2@nrel.gov
  8. +
  9. ping_pong_cuda_aware.cu, ping_pong_cuda_staged.cu https://github.com/olcf-tutorials/MPI_ping_pong
  10. +
  11. cpu.C - Multiple sources with significant mods
  12. +
  13. cusolver_getrf_example.cu - https://github.com/NVIDIA/CUDALibrarySamples.git with significant mods
  14. +
  15. 3d_mgpu_c2c_example.cpp - https://github.com/NVIDIA/CUDALibrarySamples.git
  16. +
  17. ftw3d.c - Tim Kaiser tkaiser2@nrel.gov
  18. +
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Environments/index.html b/Documentation/Systems/Kestrel/Environments/index.html new file mode 100644 index 000000000..67ead92a2 --- /dev/null +++ b/Documentation/Systems/Kestrel/Environments/index.html @@ -0,0 +1,5300 @@ + + + + + + + + + + + + + + + + + + + + + + + Kestrel Environments Overview - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Kestrel Programming Environments Overview#

+

Definitions#

+

Toolchain: a combination of a compiler and an mpi library. Sometimes associated scientific libraries (scalapack, blas, etc.) or bundles of scientific libraries (MKL, libsci, etc.) are considered part of the toolchain.

+

Environment: a set of modules, including a toolchain. A "build environment" refers to the set of modules (including compiler and MPI library) used to compile a code. A "run-time environment" is the set of modules used to execute a code. The two typically, but not always, match.

+

Environments#

+

There are three types of module-based Toolchains available on Kestrel:

+
    +
  1. "PrgEnv-" Environments, shipped with Kestrel
  2. +
  3. NREL-built Environments
  4. +
  5. NREL-built Environments with cray-mpich-abi
  6. +
+

The "PrgEnv-" environments are new on Kestrel. PrgEnv stands for "programming environment," and Kestrel ships with several of these. There are advantages to using a PrgEnv environment, as these environments are tailored for some of the Cray-specific features of Kestrel. For example, Cray MPICH utilizes Kestrel's Cray Slingshot network more effectively than OpenMPI or Intel MPI, so it runs noticeably faster than the other two for jobs that require two or more nodes. All PrgEnv- environments utilize Cray MPICH by default.

+

The NREL-built environments function similarly to those on Eagle, and it is up to the user to load all necessary modules to build and run their applications.

+

NREL-built environments can make use of Cray MPICH via the cray-mpich-abi. As long as program is compiled with an MPICH-based MPI (e.g., Intel MPI but not Open MPI), the cray-mpich-abi can be loaded at runtime, which causes the program to use Cray MPICH for dynamically built binaries.

+

A note on OpenMPI#

+

Currently, OpenMPI does not run performantly or stably on Kestrel. You should do your best to avoid using OpenMPI. Please reach out to hpc-help@nrel.gov if you need help working around OpenMPI.

+

Summary of available compiler environments#

+
    +
  • (Cray) denotes that the module belongs to the default Cray module set.
  • +
  • (NREL) denotes that the module belongs to the NREL-built module set. If a compiler module is denoted (NREL), then the corresponding MPI module is also (NREL).
  • +
+

GNU#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PrgEnvCompiler ModuleMPI moduleLanguageWrapperCompilerMPI
gnugcc (Cray)cray-mpichFortranftngfortranCray MPICH
gnugcc (Cray)cray-mpichCccgccCray MPICH
gnugcc (Cray)cray-mpichC++CCg++Cray MPICH
n/agcc (NREL)openmpi/4.1.5-gccFortranmpifortgfortranOpen MPI
n/agcc (NREL)openmpi/4.1.5-gccCmpiccgccOpen MPI
n/agcc (NREL)openmpi/4.1.5-gccC++mpic++g++Open MPI
+

Cray#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PrgEnvCompiler ModuleMPI moduleLanguageWrapperCompilerMPI
craycce (Cray)cray-mpichFortranftncrayftnCray MPICH
craycce (Cray)cray-mpichCcccrayccCray MPICH
craycce (Cray)cray-mpichC++CCcrayCCCray MPICH
+

Intel#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PrgEnvCompiler ModuleMPI ModuleLanguageWrapperCompilerMPI
intelintel (Cray)cray-mpichFortranftnifortCray MPICH
intelintel (Cray)cray-mpichCcciccCray MPICH
intelintel (Cray)cray-mpichC++CCicpcCray MPICH
n/aintel-oneapi (NREL)intel-oneapi-mpiFortranmpiifortifortintel MPI
n/aintel-oneapi (NREL)intel-oneapi-mpiCmpiicciccintel MPI
n/aintel-oneapi (NREL)intel-oneapi-mpiC++mpiicpcicpcintel MPI
+

Note:

+

The Cray MPICH used for each different PrgEnv- is pointing to a different instance of MPICH, E.g. for PrgEnv-intel the MPICH used is located under /opt/cray/pe/mpich/8.1.21/ofi/intel/19.0 and for PrgEnv-cray the MPICH used is located under /opt/cray/pe/mpich/8.1.20/ofi/crayclang/10.0.

+

PrgEnv- Programming Environments#

+
+

Module Known Issues

+

As of July 30th, 2024, there are some modules that do not work correctly. The following points describe the issues and workarounds. We are working on permanent fixes for these issues.
+* If using PrgEnv-intel, cray-libsci/23.12 is loaded by default. Load cray-libsci/22.12 instead.
+* If using PrgEnv-gnu with gcc version 10, load cray-libsci/22.12 instead of the default cray-libsci version.
+* On the GPU nodes, if using PrgEnv-nvhpc, you need to load nvhpc/23.9. nvhpc/24.1 is loaded by default and will not work.
+* The module restore command could cause an "Unable to find cray-mpich" libraries error when used with PrgEnv-intel. If this happens, remove module restore from the list of commands.

+
+

Introduction#

+

These environments come packaged with:

+
    +
  1. A compiler, which corresponds to the name of the environment. E.g., PrgEnv-intel uses intel compilers
  2. +
  3. Cray MPICH
  4. +
  5. Cray LibSci, which can be used in place of MKL
  6. +
  7. Additional communication and network libraries
  8. +
+

Upon logging into the machine, the PrgEnv-gnu is loaded by default on both the CPU and GPU login nodes. If we module list, we can see the modules associated with PrgEnv-gnu.

+

We can swap between programming environments using the module swap command. For example, if PrgEnv-gnu is loaded but we want to use PrgEnv-cray instead, we can module swap PrgEnv-gnu PrgEnv-cray.

+

What is a PrgEnv module doing?#

+

PrgEnv modules can seem a bit mysterious. We can check out the inner workings of a PrgEnv module with the module show command. For example, for PrgEnv-gnu we can:

+

module show PrgEnv-gnu

+

Which outputs:

+
/opt/cray/pe/modulefiles/PrgEnv-gnu/8.3.3:
+
+conflict     PrgEnv-amd 
+conflict     PrgEnv-aocc 
+conflict     PrgEnv-cray 
+conflict     PrgEnv-gnu 
+conflict     PrgEnv-intel 
+conflict     PrgEnv-nvidia 
+setenv       PE_ENV GNU 
+setenv       gcc_already_loaded 1 
+module       swap gcc/12.1.0 
+module       switch cray-libsci cray-libsci/22.10.1.2 
+module       switch cray-mpich cray-mpich/8.1.20 
+module       load craype 
+module       load cray-dsmml 
+module       load craype-network-ofi 
+module       load cray-mpich 
+module       load cray-libsci 
+setenv       CRAY_PRGENVGNU loaded 
+
+

This tells us that PrgEnv-gnu conflicts with all other PrgEnvs. The modulefile sets some environment variables (the setenv lines), and loads the modules associated with the programming environment.

+

We can use the module whatis command to give us a brief summary of a module. For example, the command:

+

module whatis craype

+

outputs:

+

craype : Setup for Cray PE driver set and targeting modules

+

We mentioned previously that the different PrgEnvs use different locations for Cray-MPICH. We can see this by using module show cray-mpich in each different PrgEnv, and examining (for example) the CRAY_LD_LIBRARY_PATH environment variable.

+

Compiling inside a PrgEnv: ftn, cc, and CC#

+

ftn, cc, and CC are the Cray compiler wrappers for Fortran, C, and C++, respectively, which are part of the craype module. When a particular PrgEnv- programming environment is loaded, these wrappers will make use of the corresponding compiler. For example, if we load PrgEnv-gnu with:

+
module swap PrgEnv-cray PrgEnv-gnu
+
+

we would expect ftn to wrap around gfortran, the GNU fortran compiler. We can test this with:

+

ftn --version

+

Which outputs:

+
GNU Fortran (GCC) 12.1.0 20220506 (HPE)
+Copyright (C) 2022 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+

As expected. We can also which ftn: +

/opt/cray/pe/craype/2.7.17/bin/ftn
+
+Note1: In contrast with mpich, the location of the wrappers cc, CC and ftn is always the same /opt/cray/pe/craype/2.7.17/bin/ftn and does NOT depend on the loaded PrgEnv.

+

Note2: cc, CC and ftn are also wrappers around their mpi couterparts. For mpi codes, the wrappers call the necessary mpi compilers depending on which PrgEnv is loaded.

+

Note3: When changing between PrgEnvs, it is better to use module swap [current prgenv] [new prgenv] instead of module purge; module load [new prgenv] due to the way the environments set some environment variables.

+

ftn is part of the craype module. If we module unload craype and then type which ftn we find: +

/usr/bin/which: no ftn in (/opt/cray/pe/mpich/8.1.20/ofi/gnu/9.1/bin:/opt/cray/pe/mpich/8.1.20/bin:/opt/cray/libfabric/1.15.2.0/bin:/opt/cray/pe/gcc/12.1.0/bin:/home/ohull/.local/bin:/home/ohull/bin:/opt/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/nopt/xalt/xalt/bin:/nopt/nrel/utils/bin:/nopt/slurm/current/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/sbin:/bin)
+

+

I.e., it can't find the path to ftn, because craype is not loaded into the environment.

+

What happens if we module swap PrgEnv-gnu PrgEnv-cray, so that we're now using PrgEnv-cray, and then check ftn?

+
[ohull@eyas1 ~]$ ftn --version
+Cray Fortran : Version 14.0.4
+
+

ftn is now using Cray Fortran under the hood.

+

Note: you can still directly access the underlying compiler. For example, if we're using PrgEnv-gnu (so our compilers are the GCC compilers), we can use ftn, or we can use gfortran or mpifort directly. It is considered best practice to use the Cray wrappers (ftn, cc, CC) on a Cray machine like Kestrel.

+

In fact, the use of mpifort can be quite confusing. Inside the PrgEnv-gnu environment, we might assume that mpifort is a wrapper around OpenMPI. This is not correct, as mpifort wraps around Cray MPICH inside PrgEnv-gnu. If we module unload PrgEnv-gnu and then module load openmpi, then mpifort will wrap around OpenMPI. Using the Cray wrappers (ftn, cc, CC) helps avoid this confusion.

+

Submitting a job within a PrgEnv#

+

Submitting a Slurm job using a PrgEnv environment is no different than how you would normally submit a job. In your slurm script, below the #SBATCH directives, include:

+
module swap PrgEnv-cray [new PrgEnv]
+
+

We swap from PrgEnv-cray because this is the default PrgEnv that is loaded when logging onto Kestrel.

+

[new PrgEnv] can be PrgEnv-gnu or PrgEnv-intel.

+

Depending on the software you're trying to run, you may need to load additional modules like cray-hdf5 or cray-fftw.

+

NREL-built environments#

+

The NREL build modules are similar to Eagle, where the module are separate and no dependecy is created between modules.

+

The modules are grouped by type compilers_mpis utilities_libraries and applications, and a module can be loaded using module load $module_name.

+

The modules are optimized for Kestrel architecture and will be updated/upgraded every 6/12months or upon request. If there is a module you need but is not available, email hpc-help@nrel.gov

+

NREL-built environments with cray-mpich-abi#

+

For binaries dyanamically built with an MPICH-based MPI such as intel-mpi, the user can choose to use cray-mpich-abi at runtime to leverage its optimization for Kestrel. To check if your executable was dynamically built with intel MPI, you can ldd [your program name] | grep mpi.

+

the module cray-mpich-abi will cause the program to run with Cray MPICH at runtime instead of Intel MPI. In your slurm submit script, you must include the two lines:

+

module load craype +module load cray-mpich-abi

+

in order for the Cray MPICH abi to work properly.

+

Note: If your code depends on libmpicxx, the Cray MPICH ABI is unlikely to work. You can check this by ldd [your program name] | grep mpicxx.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Environments/tutorial/index.html b/Documentation/Systems/Kestrel/Environments/tutorial/index.html new file mode 100644 index 000000000..9e5cba9fc --- /dev/null +++ b/Documentation/Systems/Kestrel/Environments/tutorial/index.html @@ -0,0 +1,5327 @@ + + + + + + + + + + + + + + + + + + + + + + + Environments Tutorial - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Environments tutorial#

+

In this tutorial, we will walk through how to build and run a basic MPI code using the four principal toolchains/software stacks on Kestrel. We will discuss common pitfalls in building and running within each of these toolchains, too.

+

We summarize these toolchains in the below table:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PrgEnv-*CompilerMPI
craycray cceCray MPICH
intelintelCray MPICH
n/aintelIntel MPI
n/agccOpen MPI
+

Note: There is an option to compile with MPICH-based MPI (e.g., Intel MPI but not Open MPI) and then use the module cray-mpich-abi at run-time, which causes the code to use Cray MPICH instead of the MPI it was built with. More information on how to use this feature will be added soon.

+

Introduction#

+

Kestrel is a Cray machine whose nodes are connected by "Cray Slingshot" (contrast this to Eagle, which uses infiniband). We've found that packages that make use of Cray tools like Cray MPICH perform faster than when the same package is built and run without Cray tools (e.g. compiling and running with intel MPI), in part because these Cray tools are optimized to work well with Cray Slingshot.

+

Most of us coming from Eagle are probably used to running our codes with Intel MPI or Open MPI, but not Cray MPICH.

+

Using the cray-designed programming environments ("PrgEnvs") requires using special Cray compiler wrappers cc and ftn. These wrappers replace the MPI compiler wrappers you're used to, like mpicc, mpiicc, mpiifort, etc.

+

This guide will walk through how to utilize the Cray PrgEnv- environments with Cray MPICH, how to use "NREL-built" environments, and how to make sure your build is using the dependencies you expect.

+

What is "PrgEnv-"?#

+

Kestrel comes pre-packaged with several "programming environments." You can see which programming environments are available by typing module avail PrgEnv. For CPU codes, we focus on PrgEnv-cray and PrgEnv-intel. These environments provide compilers (accessible with the cc, CC, and ftn wrappers), Cray MPICH, and some other necessary lower-level libraries.

+

The Tutorial#

+

We're going to walk through building and running an MPI benchmarking code called IMB. This is a simple code that only requires a compiler and an MPI as dependencies (no scientific libraries, etc. are needed).

+

First, log onto Kestrel with +ssh [your username]@kestrel.hpc.nrel.gov

+

Let's grab an interactive node session:

+

salloc -N 1 -n 104 --time=01:00:00 --account=<your allocation handle>

+

Environment 1: PrgEnv-cray#

+

Make a new directory +

mkdir IMB-tutorial
+cd IMB-tutorial
+mkdir PrgEnv-cray
+cd PrgEnv-cray
+

+

Then download the code: +

git clone https://github.com/intel/mpi-benchmarks.git
+cd mpi-benchmarks
+

+

PrgEnv-cray was formally the default environment on Kestrel. Now, PrgEnv-gnu is the default. So, first we want to run the command module swap PrgEnv-gnu PrgEnv-cray to load the environment. To check, type module list and make sure you see PrgEnv-cray somewhere in the module list. +Now, we can build the code. Run the command:

+

CC=cc CXX=CC CXXFLAGS="-std=c++11" make IMB-MPI1

+

What does this do?

+

CC=cc : set the c compiler to be cc. Recall that cc is the Cray wrapper around a c-compiler. Because we're in PrgEnv-cray, we expect the c compiler to be Cray's. We can test this by typing cc --version, which outputs: +

[ohull@kl1 imb]$ cc --version
+No supported cpu target is set, CRAY_CPU_TARGET=x86-64 will be used.
+Load a valid targeting module or set CRAY_CPU_TARGET
+Cray clang version 14.0.4  (3d8a48c51d4c92570b90f8f94df80601b08918b8)
+Target: x86_64-unknown-linux-gnu
+Thread model: posix
+InstalledDir: /opt/cray/pe/cce/14.0.4/cce-clang/x86_64/share/../bin
+

+

As expected, we are using Cray's C compiler.

+

CXX=CC: This sets the C++ compiler to be CC, in the same way as CC=cc for the C compiler above.

+

CXXFLAGS="-std=c++11" tells the compiler to use the C++11 standard for compiling the C++ code, which is necessary because IMB has some code that is deprecated in C++17, which is the standard that Cray's C++ compiler defaults to.

+

Finally,

+

make IMB-MPI1 builds IMB-MPI1, the IMB executable that we want.

+

Let's see what libraries we dynamically linked to in this build. Once the code is done building, type: +ldd ./IMB-MPI1

+

This will show all libraries required by the program (on the lefthand side) and the specific implementation of those libraries that the build is currently pointing to (on the righthand side).

+

Let's focus on MPI. Run:

+

ldd ./IMB-MPI1 | grep mpi

+

This should output something like:

+
[ohull@kl1 PrgEnv-cray]$ ldd IMB-MPI1 | grep mpi
+    libmpi_cray.so.12 => /opt/cray/pe/lib64/libmpi_cray.so.12 (0x00007fddee9ea000)
+
+

So, the MPI library we're using is Cray's MPI (Cray MPICH)

+

Let's run the code:

+

srun -N 1 -n 104 ./IMB-MPI1 AllReduce > out

+

When it completes, take a look at the out file:

+

cat out

+

IMB swept from 1 MPI task to 104 MPI tasks, performing a number of MPI_ALLREDUCE calls between the MPI tasks (ranging from 0 bytes to 4194304 bytes)

+

Note -- very important: when you run IMB-MPI1, you MUST specify IMB-MPI1 as ./IMB-MPI1 or otherwise give a direct path to this specific version of IMB-MPI1. When we move to the NREL-built intel environment in this tutorial, we will have an IMB-MPI1 already loaded into the path by default, and the command srun IMB-MPI1 will execute the default IMB-MPI1, not the one you just built.

+

If you'd like, you can also submit this as a slurm job. Make a file submit-IMB.in, and paste the following contents:

+
#!/bin/bash
+#SBATCH --time=00:40:00
+#SBATCH --nodes=1
+#SBATCH --tasks-per-node=104
+
+#!/bin/bash
+
+srun -N 1 --tasks-per-node=104 your/path/to/IMB-tutorial/PrgEnv-cray/mpi-benchmarks/IMB-MPI1 Allreduce > out
+
+

Don't forget to update your/path/to/IMB-tutorial/PrgEnv-cray/mpi-benchmarks/IMB-MPI1 to the actual path to your IMB-MPI1 executable.

+

Then, sbatch submit-IMB.in

+

Environment 2: PrgEnv-intel#

+

We'll now repeat all the above steps, except now with PrgEnv-intel. Return to your IMB-tutorial directory, and mkdir PrgEnv-intel

+

Now, load the PrgEnv-intel environment:

+
module swap PrgEnv-cray PrgEnv-intel
+module load gcc-stdalone/12.3.0
+
+

Note that where possible, we want to avoid using module purge because it can unset some environment variables that we generally want to keep. We unload the cray-libsci package for the sake of simplicity (we are working through resolving a default versioning conflict between cray-libsci and PrgEnv-intel. If you need to use cray-libsci within PrgEnv-intel, please reach out to hpc-help@nrel.gov).

+

Again, we can test which C compiler we're using with: +cc --version +Now, this should output something like: +

[ohull@x1000c0s0b0n0 mpi-benchmarks]$ cc --version
+Intel(R) oneAPI DPC++/C++ Compiler 2023.2.0 (2023.2.0.20230622)
+Target: x86_64-unknown-linux-gnu
+Thread model: posix
+InstalledDir: /nopt/nrel/apps/cpu_stack/compilers/02-24/spack/opt/spack/linux-rhel8-sapphirerapids/gcc-12.2.1/intel-oneapi-compilers-2023.2.0-hwdq5hei2obxznfjhtlav4mi5h5jd4zw/compiler/2023.2.0/linux/bin-llvm
+Configuration file: /nopt/nrel/apps/cpu_stack/compilers/02-24/spack/opt/spack/linux-rhel8-sapphirerapids/gcc-12.2.1/intel-oneapi-compilers-2023.2.0-hwdq5hei2obxznfjhtlav4mi5h5jd4zw/compiler/2023.2.0/linux/bin-llvm/../bin/icx.cfg
+

+

Contrast this to when we ran cc --version in the PrgEnv-cray section. We're now using a different compiler (Intel oneAPI) under the hood.

+

We can now repeat the steps we took in the PrgEnv-cray section. Move up two directories and re-download the code:

+
cd ../../
+mkdir PrgEnv-intel
+cd PrgEnv-intel
+git clone https://github.com/intel/mpi-benchmarks.git
+cd mpi-benchmarks
+
+

and build it:

+

CC=cc CXX=CC CXXFLAGS="-std=c++11" make IMB-MPI1

+

Note that we specify the same compiler wrapper, cc, to be the C compiler (the CC=cc part of the line above), as we did in the PrgEnv-cray section. But, cc now wraps around the intel-oneapi C compiler, instead of the Cray C compiler. So, we will be building with a different compiler, even though the build command is identical!

+

Again, we can run with:

+

srun -N 1 -n 104 ./IMB-MPI1 AllReduce > out

+

Or check which libraries are dynamically linked:

+

ldd ./IMB-MPI1

+

Or, for MPI specifically:

+
[ohull@kl1 PrgEnv-intel]$ ldd ./IMB-MPI1 | grep mpi
+    libmpi_intel.so.12 => /opt/cray/pe/lib64/libmpi_intel.so.12 (0x00007f13f8f8f000)
+
+

Note that this MPI library is indeed still Cray MPICH, the name is different than in the PrgEnv-cray section because it is specifically Cray MPICH built to be compatible with intel compilers, not cray compilers, as in the last example.

+

You can also submit this inside a Slurm submit script:

+
#!/bin/bash
+#SBATCH --time=00:40:00
+#SBATCH --nodes=1
+#SBATCH --tasks-per-node=104
+#SBATCH --account=<your allocation handle>
+
+#!/bin/bash
+
+module restore
+module swap PrgEnv-cray PrgEnv-intel
+module unload cray-libsci
+
+srun -N 1 --tasks-per-node=104 your/path/to/IMB-tutorial/PrgEnv-intel/mpi-benchmarks/IMB-MPI1 Allreduce > out
+
+

Note that the only difference between this submit script and the one for Environment 1 is that we exchange PrgEnv-cray for PrgEnv-intel.

+

Environment 3: Intel Compilers and Intel MPI#

+

We've now seen two examples using Cray's environments, PrgEnv-cray and PrgEnv-intel. Let's build IMB using one of NREL's environments, which are separate from Cray's.

+

First, go back to your IMB-tutorial directory and re-clone the code:

+
cd ../../
+mkdir intel-intelMPI
+cd intel-intelMPI
+git clone https://github.com/intel/mpi-benchmarks.git
+cd mpi-benchmarks 
+
+

Then, load the NREL environment. To do this, first run: +

module unload PrgEnv-intel
+

+

Again, we want to avoid module purge where possible, so we unload the previous environment (PrgEnv-intel) in order to retain underlying environment variables.

+

Let's check out our options for Intel compilers now:

+

module avail intel

+

We should see a number of modules. Some correspond to applications built with an intel toolchain (e.g. amr-wind/main-intel-oneapi-mpi-intel, whose name implies that amr-wind was built with the intel oneapi MPI and intel compilers). Others correspond to the MPI (e.g. intel-oneapi-mpi/2021.8.0-intel) or the compilers itself (e.g. intel-oneapi-compilers/2022.1.0)

+

Let's load Intel MPI and Intel compilers:

+
module load intel-oneapi
+module load intel-oneapi-compilers
+module load intel-oneapi-mpi
+
+

Note that if we look back at module avail intel and look at the header above, e.g., intel-oneapi, we can see that these intel modules live in /nopt/nrel/apps/cpu_stack/modules/default/compilers_mpi -- this is different than the PrgEnvs, which can be found in /opt/cray/pe/lmod/modulefiles/core. This is one way to tell that you are using NREL's set of modules and not Cray's set of modules.

+

Now, we can build IMB with the intel compilers and Intel MPI:

+

CC=mpiicc CXX=mpiicpc CXXFLAGS="-std=c++11" make IMB-MPI1

+

Note that this command is different than the make commands we saw in the PrgEnv-cray and PrgEnv-intel sections.

+

Instead of CC=cc and CXX=CC we have CC=mpiicc and CXX=mpiicpc. mpiicc, is the intel MPI wrapper around the intel C compiler, and mpiicpc is the same but for C++.

+

Remember that warning about IMB-MPI1 being in the default path? This is now true, so be careful that when you run the package, you're running the version you just built, NOT the default path version.

+

If you're still inside your/path/to/IMB-tutorial/intel-intelMPI/mpi-benchmarks then we can run the command:

+

ldd ./IMB-MPI1 | grep mpi

+

This outputs something like:

+
[ohull@kl1 intel-intelMPI]$ ldd ./IMB-MPI1 | grep mpi
+    libmpicxx.so.12 => /nopt/nrel/apps/mpi/07-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mpi-2021.8.0-6pnag4mmmx6lvoczign5a4fslwvbgebb/mpi/2021.8.0/lib/libmpicxx.so.12 (0x00007f94e5e09000)
+    libmpifort.so.12 => /nopt/nrel/apps/mpi/07-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mpi-2021.8.0-6pnag4mmmx6lvoczign5a4fslwvbgebb/mpi/2021.8.0/lib/libmpifort.so.12 (0x00007f94e5a55000)
+    libmpi.so.12 => /nopt/nrel/apps/mpi/07-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mpi-2021.8.0-6pnag4mmmx6lvoczign5a4fslwvbgebb/mpi/2021.8.0/lib/release/libmpi.so.12 (0x00007f94e4138000)
+
+

We see a few more libraries than we saw with the PrgEnvs. For example, we now have libmpicxx, libmpifort, and libmpi, instead of just libmpi_intel or libmpi_cray, as was the case with the two PrgEnvs. We can see that our three MPI library dependencies are pointing to the corresponding library's in the NREL-built environments.

+

We can submit an IMB job with the following slurm script:

+
#!/bin/bash
+#SBATCH --time=00:40:00
+#SBATCH --nodes=1
+#SBATCH --tasks-per-node=104
+
+module restore
+module unload PrgEnv-cray
+
+module load intel-oneapi
+module load intel-oneapi-compilers
+module load intel-oneapi-mpi
+
+srun -N 1 --tasks-per-node=104  /your/path/to/IMB-tutorial/intel-intelMPI/mpi-benchmarks/IMB-MPI1 Allreduce > out
+
+

don't forget to replace /your/path/to/IMB-tutorial/intel-intelMPI/mpi-benchmarks/IMB-MPI1 with your actual path.

+

Environment 4: GCC and OpenMPI#

+

Environment 4 works similarly to Environment 3, except instead of using the NREL-built intel modules, we'll use GCC and OpenMPI instead. Note that OpenMPI is not ever recommended to use multi-node, because it is unstable on cray slingshot networks. You should only use OpenMPI for single-node jobs.

+

Return to your IMB-tutorial directory and set up for gcc-openMPI:

+
cd ../../
+mkdir gcc-openMPI
+cd gcc-openMPI
+git clone https://github.com/intel/mpi-benchmarks.git
+cd mpi-benchmarks 
+
+

Run:

+
module unload intel-oneapi intel-oneapi-mpi intel-oneapi-compilers
+module load binutils/2.41
+module load gcc-stdalone/12.3.0
+module load openmpi/5.0.3-gcc
+
+

We unload the intel environment we set up in the previous step, and load gcc and openmpi5 instead. Note that there are a number of versions of gcc available. the -stdalone tag denotes that it will not cause a forced unloading of other environment modules, unlike gcc with no -stdalone tag, which can force-switch the environment to PrgEnv-gnu.

+

Now, we can module avail openmpi to find openmpi-related modules. Note the version of openmpi we use:

+

module load openmpi/5.0.3-gcc

+

OpenMPI5 is more compatible with Kestrel's Cray Slingshot network than older versions of OpenMPI. While we do not generally recommend using OpenMPI, if you must use it, it is best to use OpenMPI5.

+

Now, we can build the code. Run the command:

+

CC=mpicc CXX=mpic++ CXXFLAGS="-std=c++11" make IMB-MPI1

+

Similar to using mpiicc and mpiicpc in the intel section, now we use mpicc and mpic++, because these are the Open MPI wrappers around the GCC C and C++ compilers (respectively). We are not using the cc and CC wrappers now because we are not using a PrgEnv.

+

Once the executable is built, check the mpi library it's using with ldd:

+

ldd ./IMB-MPI1 | grep libmpi

+

This command should return something like:

+
[ohull@x1007c7s7b0n0 mpi-benchmarks]$ ldd ./IMB-MPI1 | grep libmpi
+    libmpi.so.40 => /nopt/nrel/apps/mpi/07-23/spack/opt/spack/linux-rhel8-icelake/gcc-10.1.0/openmpi-4.1.5-s5tpzjd3y4scuw76cngwz44nuup6knjt/lib/libmpi.so.40 (0x00007f5e0c823000)
+
+

We see that libmpi is indeed pointing where we want it to: to the openmpi version of libmpi built with gcc-10.1.0.

+

Finally, we can submit an IMB job with the following slurm script:

+
#!/bin/bash
+#SBATCH --time=00:40:00
+#SBATCH --nodes=1
+#SBATCH --tasks-per-node=104
+
+module restore
+module unload PrgEnv-cray
+module unload cce
+
+module load openmpi/4.1.5-gcc
+module load gcc/10.1.0
+
+srun -N 1 --tasks-per-node=104 /your/path/to/IMB-tutorial/gcc-openMPI/mpi-benchmarks/IMB-MPI1 Allreduce > out
+
+

don't forget to replace /your/path/to/IMB-tutorial/gcc-openMPI/mpi-benchmarks/IMB-MPI1 with your actual path.

+

Final Words#

+

With all four environments built, you could now run a few benchmarks comparing how MPI performs between them. Try this using 1 node and using 2 nodes, and compare the results for each environment. You should see that performance between all four environments is competitive on 1 node, but the two PrgEnv builds run a bit faster for large message sizes on 2 nodes, and the gcc/openmpi build is liable to randomly fail in the 2 node case.

+

Keeping track of the environments on Kestrel can be tricky at first. The key point to remember is that there are two separate "realms" of environments: the Cray PrgEnvs, which use Cray MPICH and best practices dictate the use of the cc, CC, and ftn compiler wrappers for C, C++, and Fortran, respectively, and the NREL-built environments that function similar to how the environments on Eagle function, and which use the more familiar compiler wrappers like mpiicc (for compiling C code with intel/intel MPI) or mpicc (for compiling C code with gcc/Open MPI.)

+

Earlier in the article, we mentioned the existence of the cray-mpich-abi, which allows you to compile your code with a non-Cray MPICH-based MPI, like Intel MPI, and then run the code with Cray MPICH via use of the cray-mpich-abi module. We will include instructions for how to use this in an updated version of the tutorial.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Filesystems/index.html b/Documentation/Systems/Kestrel/Filesystems/index.html new file mode 100644 index 000000000..e7aba96c8 --- /dev/null +++ b/Documentation/Systems/Kestrel/Filesystems/index.html @@ -0,0 +1,4988 @@ + + + + + + + + + + + + + + + + + + + + + + + Filesystems - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Kestrel Filesystems#

+

Home File System#

+

The Home File System (HFS) on Kestrel is part of the ClusterStor used for the Parallel File System (PFS), providing highly reliable storage for user home directories and NREL-specific software. HFS has 1.2 petabytes (PB) of capacity. Snapshots of files on the HFS are available up to 30 days after change/deletion.

+

/home

+

The /home directory on Kestrel is intended to hold small files. These include shell startup files, scripts, source code, executables, and data files. Each user has a quota of 50 GB.

+
+To check your home quota usage, run the following command: +
[user@kl1 ~]$ lfs quota -uh $USER /home/$USER
+
+
+

/nopt

+

The /nopt directory on Kestrel resides on HFS and is where NREL-specific software, module files, licenses, and licensed software are kept.

+

Parallel File System#

+

The Parallel File System (PFS) ProjectFS and ScratchFS on Kestrel is a ClusterStor Lustre file system intended for high-performance I/O.

+
+

Warning

+

There are no backups of PFS data. Users are responsible for ensuring that critical data is copied to Mass Storage or other alternate data storage location.

+
+

ProjectFS#

+

ProjectFS provides 68 PB of capacity with 200 GB/s of IOR bandwidth. It is intended for longer term data storage.

+

/projects

+

Each project/allocation has a directory in /projects intended to host data, configuration, and applications shared by the project.

+

Directories in /projects have a quota assigned based on the project resource allocation for that fiscal year.

+
+To check your quota usage, run the following commands: +
# To determine your Project ID run:
+
+[user@kl1 ~]$ lfs project -d /projects/csc000
+110255 P /projects/csc000
+
+# In this case, 110255 is the Project ID for project csc000.
+
+# To see usage towards your quota, run:
+
+[user@kl1 ~]$ lfs quota -hp 110255 /projects/csc000
+
+Disk quotas for prj 110255 (pid 110255):
+    Filesystem    used   quota   limit   grace   files   quota   limit   grace 
+/projects/csc000    
+                617.5G    100T    100T       -  636875       0       0       -
+# An asterisk(*) by the used value indicates the project has exceeded its quota of storage, and writes to the directory are not allowed.
+
+
+

/kfs2/shared-projects

+

Projects may request a shared project directory to host data, configuration, and applications shared by multiple projects/allocations.

+

To request a /shared-projects directory, please contact hpc-help@nrel.gov and include the following information: +

1. The name of the primary/"host" allocation that the /shared-projects directory will belong to. 
+2. The name/email of a person who will authorize changes to the /shared-projects directory. 
+3. How much space you would like to request (in TB). 
+4. A list of other allocations that should have access to the /shared-projects directory. 
+5. What you would like to call the directory. For example, "/shared-projects/myproject-shared" or other similar descriptive name, ideally between about 4-15 characters in length. 
+6. A group name for the UNIX group ownership of the directory, the same or similar to the directory name provided in Step 5. 
+

+

/kfs2/datasets

+

The /kfs2/datasets directory on Kestrel hosts widely used data sets.

+

There are multiple big data sets that are commonly used across various projects for computation and analysis on NREL's HPC Systems. We provide a common location on Kestrel's filesystem at /kfs2/datasets, where these data sets are available for global reading by all compute nodes. Each data set contains a readme file that covers background, references, explanation of the data structure, and Python examples.

+

These datasets include:

+

/kfs2/datasets/NSRDB

+

The National Solar Radiation Database (NSRDB) is a serially complete collection of meteorological and solar irradiance data sets for the United States and a growing list of international locations for 1998-2017. The NSRDB provides foundational information to support U.S. Department of Energy programs, research, and the general public.

+

/kfs2/datasets/WIND

+

The Wind Integration National Data Set (WIND) Toolkit consists of wind resource data for North America and was produced using the Weather Research and Forecasting Model (WRF).

+

ScratchFS#

+

ScratchFS is a Lustre file system in a hybrid flash-disk configuration providing a total of 27 petabytes (PB) of capacity with 354 gigabytes (GB)/s of IOR bandwidth. It is intended to support intensive I/O and we recommend running jobs out of ScratchFS for the best performance.

+

/scratch

+

Each user has their own directory in /scratch.

+
+

Warning

+

Data in /scratch is subject to deletion after 28 days of inactivity. It is recommended to store your important data, libraries, and programs on ProjectFS.

+
+

Node File System#

+

Some Kestrel CPU compute nodes have an NVMe local solid-state drive (SSD) for use by compute jobs. They vary in size; 1.7TB on 256 of the standard compute nodes and 5.6TB on the bigmem nodes. The GPU nodes also all have local NVMe drives, with 3.4TB available per node. There are several possible scenarios in which a local disk may make your job run faster. For instance, you may have a job accessing or creating many small (temporary) files, you may have many parallel tasks accessing the same file, or your job may do many random reads/writes or memory mapping.

+

/tmp/scratch

+

The local disk on nodes that have one is mounted at /tmp/scratch. To write to the local disk, use the $TMPDIR environment variable, which is set to /tmp/scratch/<JOBID>. A node will not have read or write access to any other node's local scratch, only its own. Also, this directory will be cleaned once the job ends. You will need to transfer any files to be saved to another file system. Note that writing to $TMPDIR on a node without a real local disk will use RAM.

+

To request nodes with local disk, use the --tmp option in your job submission script. (e.g. --tmp=1600000). For more information about requesting this feature, please see the Running on Kestrel page.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Filesystems/lustre/index.html b/Documentation/Systems/Kestrel/Filesystems/lustre/index.html new file mode 100644 index 000000000..93f16a99b --- /dev/null +++ b/Documentation/Systems/Kestrel/Filesystems/lustre/index.html @@ -0,0 +1,5058 @@ + + + + + + + + + + + + + + + + + + + + + + + Lustre Best Practices - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Lustre Best Practices#

+

In some cases special care must be taken while using Lustre so as not to affect the performance of the filesystem for yourself and other users. The below Do's and Don'ts are provided as guidance.

+

Do#

+
    +
  • Use the lfs find
      +
    • e.g. +
      lfs find /scratch/username -type f -name "*.py"
      +
    • +
    +
  • +
  • Break up directories with many files into more directories if possible
  • +
  • Store small files and directories of small files on a single OST (Object Storage Target)
  • +
  • Limit the number of processes accessing a file. It may be better to read in a file once and then broadcast necessary information to other processes
  • +
  • Change your stripecount based on the filesize
  • +
  • Write many files to the node filesystem $TMPDIR: some compute nodes have local storage available, and it is not a part of the Lustre filesystem. Once your work is complete, the files can then be added to a tar archive and transferred to the /project/project_name for later use, or deleted from $TMPDIR if no longer needed.
  • +
  • Store data in /projects and run jobs from /scratch/$USER
  • +
  • Storing your conda environments in /projects or /scratch
  • +
+

Do Not#

+
    +
  • Use ls -l
  • +
  • Have a file accessed by multiple processes
  • +
  • In Python, avoid using os.walk or os.scandir
  • +
  • List files instead of using wildcards
      +
    • e.g. don't use cp * dir/
    • +
    • If you need to tar/rm/cp a large number of files use xargs or similar: +
      lfs find /scratch/username/old_data/ -t f -print0 | xargs -0 rm
      +
    • +
    +
  • +
  • Have many small files in a single directory
  • +
  • Store important files in /scratch
      +
    • e.g. don't keep data, libraries or programs in /scratch/username, as /scratch directories are subject to automated purging based on the Data Retention Policy
    • +
    +
  • +
+

Useful Lustre commands#

+
    +
  • Check your storage usage:
      +
    • lfs quota -h -u <username> /scratch
    • +
    +
  • +
  • See which MDT a directory is located on
      +
    • lfs getstripe --mdt-index /scratch/<username>
    • +
    • This will return an index 0-2 indicating the MDT
    • +
    +
  • +
  • Create a folder on a specific MDT (admin only)
      +
    • lfs mkdir –i <mdt_index> /dir_path
    • +
    +
  • +
+

Striping#

+

Lustre provides a way to stripe files, this spreads them across multiple OSTs. Striping a large file being accessed by many processes can greatly improve the performace. See Lustre file striping for more details.

+

lfs setstripe <file> -c <count> -s <size>
+
+* The stripecount determines how many OST the data is spread across +* The stripe size is how large each of the stripes are in KB, MB, GB

+

References#

+ + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Running/debugging_at_scale/index.html b/Documentation/Systems/Kestrel/Running/debugging_at_scale/index.html new file mode 100644 index 000000000..4767ac8c6 --- /dev/null +++ b/Documentation/Systems/Kestrel/Running/debugging_at_scale/index.html @@ -0,0 +1,4844 @@ + + + + + + + + + + + + + + + + + + + Approaches to Debugging at Scale - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Approaches to Debugging at Scale#

+

On an HPC system, occasionally there is the need to debug programs at relatively large scale, on a larger number of nodes than what is available via the short or debug queues. Because many of jobs run for several days, it may take a long time to acquire a large number of nodes.

+

To debug applications that use many nodes, there are three possible approaches.

+
+Approach 1: Run an Interactive Job +

Submit an interactive job asking for the number of tasks you will need. For example:

+

srun -n 3600 -t 1-00 -A <handle> --pty $SHELL +This asks for 3600 cores for 1 day. When the nodes are available for your job, you "land" in an interactive session (shell) on one of the compute nodes. From there you may run scripts, execute parallel programs across any of the nodes, or use an interactive debugger such as ARM DDT.

+

When you are done working, exit the interactive session.

+

Rarely will a request of this size and duration start right away, so running it within a screen session allows you to wait for your session to start without needing to stay connected to the HPC system. With this method, users must periodically check whether their session has started by reconnecting to their screen session.

+

Using screen sessions:

+
    +
  1. +

    On a login node, type "screen"

    +
  2. +
  3. +

    Check to see whether your environment is correct within the screen session. If needed, purge modules and reload: +

    [user@login2 ~]$ screen
    +
    +[user@login2 ~]$ module purge
    +[user@login2 ~]$ module load PrgEnv-intel
    +

    +
  4. +
  5. Request an interactive job:
  6. +
+

$ srun -n 3600 -t 1-00 -A <handle> --pty $SHELL +When you want to disconnect from the session, type control-a then control-d. The interactive job continues to run on the HPC system.

+

Later, to continue working in the interactive job session, reconnect to this screen session. To reconnect, if you have logged out of the system, first log in to the same login node. Then type screen -r to reattach to the screen session. If your interactive job has started, you will land on the compute node that you were given by the system.

+

When you are done with your work, type exit to end the interactive job, and then type exit again to end the screen session.

+
+
+Approach 2: Request a Reservation +

A more convenient approach may be to request a reservation for the number of nodes you need. A reservation may be shared by multiple users, and it starts and ends at specific times.

+

To request a reservation for a debugging session, please contact us and include:

+
* Project handle 
+* Number of nodes 
+* Time of the request
+
+

When the work is complete, please inform the HPC Operations team, so the reservation can be released. The project allocation will be charged for the reserved time, up until the reservation is released, whether that time is used or not.

+

When your reserved time starts you may run either interactive jobs or regular batch jobs on the nodes in the reservation.

+
+
+Approach 3: Offline Debugging +

It might be difficult to debug a large parallel job on an HPC system interactively. An alternative is to debug the problem by submitting a job for offline debugging.

+

The problem should be scaled down such that it can easily get access to an interactive queue (around 2 nodes). Create an interactive session and open the ARM DDT debugger(GUI). Run the program and set evaluations, tracepoints, watchpoints etc in the DDT session. Save the session file.

+

You can then submit a larger job with ARM DDT in offline mode pointing to the session file created in the previous step. At the end of the run, you can view the generated debugging report in html or text mode.

+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Running/example_sbatch/index.html b/Documentation/Systems/Kestrel/Running/example_sbatch/index.html new file mode 100644 index 000000000..0958483ed --- /dev/null +++ b/Documentation/Systems/Kestrel/Running/example_sbatch/index.html @@ -0,0 +1,4999 @@ + + + + + + + + + + + + + + + + + + + + + + + Example Sbatch Scripts - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Sample Batch Scripts for Running Jobs on the Kestrel System#

+

For a walkthrough of the elements of an sbatch script, please see Submitting Batch Jobs. For application specific recommendations and examples, please check the Application pages.

+
+Sample batch script for a CPU job in the debug queue +
#!/bin/bash 
+#SBATCH --account=<allocation handle>   # Required
+#SBATCH --ntasks=104                    # Tasks to be run 
+#SBATCH --nodes=1                       # Run the tasks on the same node 
+#SBATCH --time=5                        # Required, maximum job duration 
+#SBATCH --partition=debug 
+
+cd /scratch/$USER 
+
+srun ./my_program.sh
+
+
+
+Sample batch script with memory request +

Standard Kestrel CPU nodes have about 250G of usable RAM. There are 10 bigmem nodes with 2TB of ram. +

#!/bin/bash 
+#SBATCH --account=<allocation handle>   # Required 
+#SBATCH --ntasks=104                    # CPU cores requested for job 
+#SBATCH --time=01-00                    # Required, maximum job duration
+#SBATCH --mem=500G                      # Memory request
+
+
+cd /scratch/$USER 
+srun ./my_program.sh
+

+
+
+Sample batch script for a job in the shared partition +

If your job doesn't need a full CPU node (104 cores), you can run your job in the shared partition. When running on a shared node, the default memory per CPU is 1G. To change this amount, use the --mem-per-cpu=<MEM_REQUEST> flag.

+
#!/bin/bash
+#SBATCH --nodes=1 
+#SBATCH --partition=shared         
+#SBATCH --time=2:00:00                  # Required, maximum job duration
+#SBATCH --ntasks=26                     # CPUs requested for job 
+#SBATCH --mem-per-cpu=2000              # Requesting 2G per core.
+#SBATCH --account=<allocation handle>   # Required 
+
+cd /scratch/$USER 
+srun ./my_progam # Use your application's commands here  
+
+
+
+Sample batch script to utilize local disk +

On Kestrel, 256 of the standard compute nodes have 1.7TB of NVMe node local storage. Use the flag SBATCH --tmp=<LOCAL_DISK_REQUEST> to request a node with local disk space. The storage may then be accessed inside the job by using the $TMPDIR environment variable.

+
#!/bin/bash 
+#SBATCH --account=<allocation handle>      # Required 
+#SBATCH --ntasks=104                       # CPU cores requested for job 
+#SBATCH --nodes=1                  
+#SBATCH --time=01-00                       # Required, maximum job duration
+#SBATCH --tmp=1700000                      # Request local disk space
+
+# Copy files into $TMPDIR 
+cp /scratch/<userid>/myfiles* $TMPDIR 
+
+srun ./my_parallel_readwrite_program -input-options $TMPDIR/myfiles  # use your application's commands  
+
+
+
+Sample batch script for high-priority job +

A job may request high priority using --qos=high, which will give a small priority bump in the queue. This will charge your allocation at 2x the normal rate.

+
#!/bin/bash
+#SBATCH --job-name=job_monitor
+#SBATCH --account=<allocation handle>      # Required     
+#SBATCH --time=00:05:00                    # Required, maximum job duration
+#SBATCH --qos=high                         # Request high priority
+#SBATCH --ntasks=104
+#SBATCH -N 2 
+#SBATCH --output=job_monitor.out 
+
+cd /scratch/$USER 
+srun ./my_program.sh
+
+
+
+Sample batch script for a GPU job in the debug queue +

All GPU nodes in the debug queue are shared. You are limited to two GPUs per job, across 1 or 2 nodes. +

#!/bin/bash 
+#SBATCH --account=<allocation handle>   # Required
+#SBATCH --nodes=2  
+#SBATCH --gpus-per-node=1
+#SBATCH --mem=50G                       # Request CPU memory per node 
+#SBATCH --ntasks-per-node=2             # Request CPU cores per node
+#SBATCH --time=01:00:00                 # Required, maximum job duration 
+#SBATCH --partition=debug 
+
+cd /scratch/$USER 
+
+srun ./my_program.sh
+

+
+
+Sample batch script for a full GPU node +

Kestrel GPU nodes have 4 H100 GPUs. To run jobs on GPUs, your script should contain the --gpus=<NUM_GPUS> flag in the SBATCH directives. +Submit GPU jobs from the GPU login nodes.

+
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --account=<allocation handle>   # Required 
+#SBATCH --time=02:00:00                 # Required, maximum job duration
+#SBATCH --ntasks-per-node=128           # Maximum CPU cores for job 
+#SBATCH --gpus=4                        # GPU request 
+#SBATCH --exclusive                     # Request exclusive access to node. Allocates all CPU cores and GPUs by default.  
+#SBATCH --mem=0                         # Request all of the RAM available on node
+
+
+# Load modules
+module load vasp
+
+# Run program
+cd /scratch/$USER 
+srun my_graphics_intensive_scripting 
+
+
+
+Sample batch script for a partial GPU node +

GPU nodes can be shared so you may request fewer than all 4 GPUs on a node. When doing so, you must also request appropriate CPU cores and memory with the --ntasks-per-node=<NUM_CPUS> and --mem=<MEMORY_REQUEST> flags, respectively. Submit GPU jobs from the GPU login nodes.

+
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --account=<allocation handle>   # Required 
+#SBATCH --time=2:00:00                  # Required, maximum job duration
+#SBATCH --ntasks-per-node=20            # Request CPU cores 
+#SBATCH --gpus=2                        # GPU request 
+#SBATCH --mem=170G                      # Request CPU memory
+
+# Load modules
+
+# Run program
+cd /scratch/$USER 
+srun my_graphics_intensive_scripting 
+
+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Running/index.html b/Documentation/Systems/Kestrel/Running/index.html new file mode 100644 index 000000000..c3b54165a --- /dev/null +++ b/Documentation/Systems/Kestrel/Running/index.html @@ -0,0 +1,5163 @@ + + + + + + + + + + + + + + + + + + + + + + + Running on Kestrel - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Kestrel Job Partitions and Scheduling Policies#

+

Learn about job partitions and policies for scheduling jobs on Kestrel.

+

Kestrel Compute Nodes#

+

There are two general types of compute nodes on Kestrel: CPU nodes and GPU nodes. These nodes can be further broken down into four categories, listed on the Kestrel System Configuration Page.

+

CPU Nodes#

+

Standard CPU-based compute nodes on Kestrel have 104 cores and 240G of usable RAM. 256 of those nodes have a 1.7TB NVMe local disk. There are also 10 bigmem nodes with 2TB of RAM and 5.6TB NVMe local disk.

+

GPU Nodes#

+

Kestrel has 132 GPU nodes with 4 NVIDIA H100 GPUs, each with 80GB memory. These have Dual socket AMD Genoa 64-core processors (128 cores total) with about 350G of usable RAM. The GPU nodes also have 3.4TB of NVMe local disk.

+
+

Warning

+

You should use a login node that matches the architecture of the compute nodes that your jobs will be running on for compiling software and submitting jobs.

+
+

Using Node Local Storage#

+

The majority of CPU nodes do not have local disk storage, but there are 256 nodes with fast local NVMe drives for temporary storage by jobs with high disk I/O requirements. To request nodes with local disk, use the --tmp option in your job submission script (e.g. --tmp=1600000). When your job is allocated nodes with local disk, the storage may then be accessed inside the job by using the $TMPDIR environment variable as the path. Be aware that on nodes without local disk, writing to $TMPDIR will consume RAM, reducing the available memory for running processes.

+

Note that all of the Bigmem and H100 GPU nodes have real local disk.

+

Partitions#

+

Kestrel nodes are associated with one or more partitions. Each partition is associated with one or more job characteristics, which include run time, per-node memory requirements, and per-node local scratch disk requirements.

+

Excluding the shared and debug partitions, jobs will be automatically routed to the appropriate partitions by Slurm based on node quantity, walltime, hardware features, and other aspects specified in the submission. Jobs will have access to the largest number of nodes, thus shortest wait, if the partition is not specified during job submission..

+

The following table summarizes the partitions on Kestrel:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Partition NameDescriptionLimitsPlacement Condition
debugNodes dedicated to developing and
troubleshooting jobs. Debug nodes with each of the non-standard
hardware configurations are available.
The node-type distribution is:
- 2 bigmem nodes
- 2 nodes with 1.7 TB NVMe
- 4 standard nodes
- 2 GPU nodes (shared)
10 total nodes
- 1 job with a max of 2 nodes per user.
- 2 GPUs per user.
- 1/2 GPU node resources per user (Across 1-2 nodes).
- 01:00:00 max walltime.
-p debug
or
--partition=debug
shortNodes that prefer jobs with walltimes
<= 4 hours.
2016 nodes total.
No limit per user.
--time <= 4:00:00
--mem <= 246064
--tmp <= 1700000 (256 nodes)
standardNodes that prefer jobs with walltimes
<= 2 days.
2106 nodes total.
1050 nodes per user.
--mem <= 246064
--tmp <= 1700000
longNodes that prefer jobs with walltimes > 2 days.
Maximum walltime of any job is 10 days
525 nodes total.
262 nodes per user.
--time <= 10-00
--mem <= 246064
--tmp <= 1700000 (256 nodes)
bigmemNodes that have 2 TB of RAM and 5.6 TB NVMe local disk.8 nodes total.
4 nodes per user.
--mem > 246064
--time <= 2-00
--tmp > 1700000
bigmemlBigmem nodes that prefer jobs with walltimes > 2 days.
Maximum walltime of any job is 10 days.
4 nodes total.
3 nodes per user.
--mem > 246064
--time > 2-00
--tmp > 1700000
sharedNodes that can be shared by multiple users and jobs.64 nodes total.
Half of partition per user.
2 days max walltime.
-p shared
or
--partition=shared
sharedlNodes that can be shared by multiple users and prefer jobs with walltimes > 2 days.16 nodes total.
8 nodes per user.
-p sharedl
or
--partition=sharedl
gpu-h100Shareable GPU nodes with 4 NVIDIA H100 SXM 80GB Computational Accelerators.130 nodes total.
65 nodes per user.
1 <= --gpus <= 4
--time <= 2-00
gpu-h100sShareable GPU nodes that prefer jobs with walltimes <= 4 hours.130 nodes total.
65 nodes per user.
1 <= --gpus <= 4
--time <= 4:00:00
gpu-h100lShareable GPU nodes that prefer jobs with walltimes > 2 days.26 GPU nodes total.
13 GPU nodes per user.
1 <= --gpus <= 4
--time > 2-00
+

Use the option listed above on the srun, sbatch, or salloc command or in your job script to specify what resources your job requires.

+

For more information on running jobs and Slurm job scheduling, please see the Slurm documentation section.

+

Shared Node Partition#

+

Nodes in the shared partition can be shared by multiple users or jobs. This partition is intended for jobs that do not require a whole node.

+
+

Tip

+

Testing at NREL has been done to evaluate the performance of VASP using shared nodes. Please see the VASP page for specific recommendations.

+
+

Usage#

+

Currently, there are 64 standard compute nodes available in the shared partition. These nodes have about 240G of usable RAM and 104 cores. By default, your job will be allocated about 1G of RAM per core requested. To change this amount, you can use the --mem or --mem-per-cpu flag in your job submission. To allocate all of the memory available on a node, use the --mem=0 flag.

+
+Sample batch script for a job in the shared partition +
#!/bin/bash
+#SBATCH --nodes=1 
+#SBATCH --partition=shared         
+#SBATCH --time=2:00:00    
+#SBATCH --ntasks=26 # CPUs requested for job 
+#SBATCH --mem-per-cpu=2000 # Request 2GB per core.
+#SBATCH --account=<allocation handle>
+
+cd /scratch/$USER 
+srun ./my_progam # Use your application's commands here  
+
+
+

GPU Jobs#

+

Each GPU node has 4 NVIDIA H100 GPUs (80 GB), 128 CPU cores, and 350GB of useable RAM. All of the GPU nodes are shared. We highly recommend considering the use of partial GPU nodes if possible in order to efficiently use the GPU nodes and your AUs.

+

To request use of a GPU, use the flag --gpus=<quantity> with sbatch, srun, or salloc, or add it as an #SBATCH directive in your sbatch submit script, where <quantity> is a number from 1 to 4. All of the GPU memory for each GPU allocated will be available to the job (80 GB per GPU).

+

If your job will require more than the default 1 CPU core and 1G of CPU RAM per core allocated, you must request the quantity of cores and/or RAM that you will need, by using additional flags such as --ntasks= or --mem=. To request all of the memory available on the GPU node, use --mem=0.

+

The GPU nodes also have 3.4 TB of local disk space. Note that other jobs running on the same GPU node could also be using this space. Slurm is unable to divide this space to separate jobs on the same node like it does for memory or CPUs. If you need to ensure that your job has exclusive access to all of the disk space, you'll need to use the --exclusive flag to prevent the node from being shared with other jobs.

+
+

Warning

+

A job with the --exclusive flag will be allocated all of the CPUs and GPUs on a node, but is only allocated as much memory as requested. Use the flag --mem=0 to request all of the CPU RAM on the node.

+
+

GPU Debug Jobs#

+

There are two shared GPU nodes available for debugging. To use them, specify --partition=debug in your job script. In addition to the limits for the debug partition, 1 job per user, up to 2 nodes per user, up to 1 hour of walltime, a single GPU job is also limited to half of a total GPU node's resources. This is equivalent to 64 CPU cores, 2 GPUs, and 180G of RAM, which can be spread across 1 or 2 nodes. Unlike the other GPU nodes, the GPU debug nodes can't be used exclusively, so the --exclusive flag can't be used for debug GPU jobs.

+

Allocation Unit (AU) Charges#

+

The equation for calculating the AU cost of a job is:

+

AU cost = (Walltime in hours * Number of Nodes * QoS Factor * Charge Factor)

+

The CPU node charge factor is 10, and the GPU node charge factor is 100.

+

On shared nodes (nodes in the shared partition and GPU nodes), the value for Number of Nodes can be a fraction of a node. This value will be calculated based on either the number of cores, amount of memory, or the number of GPUs (on GPU nodes), whichever is a greater percentage of the total of that resource available on the node.

+
+Example Job Cost Calculation - CPU shared +

For example, if you request 123032M of RAM (half of the available RAM on the node), and 26 cores, you will be billed 5 AUs per node hour.

+
# To determine the Number of Nodes value: 
+123032/246064 = 0.5
+
+26/104 = 0.25 
+
+Number of Nodes = 0.5
+
+# Final calculation
+
+1 hour walltime * 0.5 nodes * 1 QoS Factor * 10 Charge Factor = 5 AUs
+
+
+
+Example Job Cost Calculation - GPU +

For example, if you request 270000M of RAM, 32 cores, and 2 GPUs you will be billed 75 AUs per node hour.

+
# To determine the Number of Nodes value: 
+
+# CPU RAM
+270000/360000  0.75
+
+# CPU Cores 
+32/128 = 0.25 
+
+# GPUs
+2/4 = 0.5
+
+
+Number of Nodes = 0.75
+
+# Final calculation
+
+1 hour walltime * 0.75 nodes * 1 QoS Factor * 100 Charge Factor = 75 AUs
+
+
+

If a job requests the maximum amount of any resource type available on the node (CPUs, GPUs, RAM), it will be charged with the full charge factor (10 or 100).

+

Performance Recommendations#

+

Please see this page for our most up-to-date performance recommendations on Kestrel.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Running/kestrel_job_priorities/index.html b/Documentation/Systems/Kestrel/Running/kestrel_job_priorities/index.html new file mode 100644 index 000000000..2ec1d3dfd --- /dev/null +++ b/Documentation/Systems/Kestrel/Running/kestrel_job_priorities/index.html @@ -0,0 +1,5030 @@ + + + + + + + + + + + + + + + + + + + + + + + Job Priorities on Kestrel - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+ +
+ + + +
+
+ + + + + + + +

Job Priorities on Kestrel#

+

Job priority on Kestrel is determined by a number of factors including queue wait time (AGE), job size, the need for limited resources (PARTITION), request for priority boost (QOS), and Fair-Share.

+

Learn about job partitions and scheduling policies.

+

How to View Your Job's Priority#

+

The sprio command may be used to look at your job's priority. Priority for a job in the queue is calculated as the sum of these components:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ComponentContribution
AGEJobs accumulate priority points per minute the job spends eligible in the queue.
JOBSIZELarger jobs have some priority advantage to allow them to accumulate needed nodes faster.
PARTITIONJobs routed to partitions with special features (memory, disk, GPUs) have priority to use nodes equipped with those features.
QOSJobs associated with projects that have exceeded their annual allocation are assigned low priority.
Jobs associated with projects that have an allocation remaining are assigned normal priority. These jobs start before jobs with a low priority.
A job may request high priority using --qos=high. Jobs with high priority start before jobs with low or normal priority. Jobs with qos=high use allocated hours at 2x the normal rate.
FAIR-SHAREEach projects Fair-Share value will be (Project Allocation) / (Total Kestrel Allocation). Those using less than their fair share in the last 2 weeks will have increased priority. Those using more than their fair share in the last 2 weeks will have decreased priority.
+

The squeue --start <JOBID> command can be helpful in estimating when a job will run.

+

The scontrol show job <JOBID> command can be useful for troubleshooting why a job is not starting.

+

How to Get High Priority for a Job#

+

You can submit your job to run at high priority or you can request a node reservation.

+

Running a Job at High Priority#

+

Jobs that are run at high priority will be charged against the project's allocation at twice the normal rate. If your job would have taken 60 hours to complete at normal priority, it will be charged 120 hours against your allocation when run with high priority.

+

If you've got a deadline coming up and you want to reduce the queue wait time for your jobs, you can run your jobs at high priority by submitting them with the --qos=high option. This will provide a small priority boost.

+

Requesting a Node Reservation#

+

If you are doing work that requires real-time Kestrel access in conjunction with other ESIF user facility laboratory resources, you may request that nodes be reserved for specific time periods.

+

Your project allocation will be charged for the entire time you have the nodes reserved, whether you use them or not.

+

To request a reservation, contact HPC Help.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/Running/performancerecs/index.html b/Documentation/Systems/Kestrel/Running/performancerecs/index.html new file mode 100644 index 000000000..28bcc5f36 --- /dev/null +++ b/Documentation/Systems/Kestrel/Running/performancerecs/index.html @@ -0,0 +1,5004 @@ + + + + + + + + + + + + + + + + + + + + + + + Performance Recommendations - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Performance Recommendations#

+

Please note that all of these recommendations are subject to change as we continue to improve the system.

+

OpenMP#

+

If you are running a code with OpenMP enabled, we recommend manually setting one of the following environment variables:

+
export OMP_PROC_BIND=spread # for non-intel built codes
+
+export KMP_AFFINITY=balanced # for codes built with intel compilers
+
+

You may need to export these variables even if you are not running your job with threading, i.e., with OMP_NUM_THREADS=1

+

MPI#

+

Currently, some applications on Kestrel are not scaling with the expected performance. We are actively working with the vendor's engineers to resolve these issues. For now, for these applications, we have compiled a set of recommendations that may help with performance. Note that any given recommendation may or may not apply to your specific application. We strongly recommend conducting your own performance and scalability tests on your performance-critical codes.

+
    +
  1. +

    Use Cray MPICH over OpenMPI or Intel MPI. If you need help rebuilding your code so that it uses Cray MPICH, please reach out to hpc-help@nrel.gov

    +
  2. +
  3. +

    For MPI collectives-heavy applications, setting the following environment variables (for Cray MPICH): +

    export MPICH_SHARED_MEM_COLL_OPT=mpi_bcast,mpi_barrier 
    +export MPICH_COLL_OPT_OFF=mpi_allreduce 
    +
    +These environment variables turn off some collective optimizations that we have seen can cause slowdowns. For more information on these environment variables, visit HPE's documentation site here.

    +
  4. +
  5. +

    For hybrid MPI/OpenMP codes, requesting more threads per task than you tend to request on Eagle. This may yield performance improvements.

    +
  6. +
+

MPI Stall Library#

+

For calculations requesting more than ~10 nodes, you can use the cray mpich stall library. This library can help reduce slowdowns in your calculation runtime caused by congestion in MPI communication, a possible performance bottleneck on Kestrel for calculations using ~10 nodes or more. To use the library, you must first make sure your code has been compiled within one of the PrgEnv-gnu, PrgEnv-cray, or PrgEnv-intel programming environments. Then, add the following lines to your sbatch submit script: +

stall_path=/nopt/nrel/apps/cray-mpich-stall
+export LD_LIBRARY_PATH=$stall_path/libs_mpich_nrel_{PRGENV-NAME}:$LD_LIBRARY_PATH
+export MPICH_OFI_CQ_STALL=1
+
+ Where {PRGENV-NAME} is replaced with one of cray, intel, or gnu. For example, if you compiled your code within the default PrgEnv-gnu environment, then you would export the following lines: +
stall_path=/nopt/nrel/apps/cray-mpich-stall
+export LD_LIBRARY_PATH=$stall_path/libs_mpich_nrel_gnu:$LD_LIBRARY_PATH
+export MPICH_OFI_CQ_STALL=1
+
+The default "stall" of the MPI tasks is 12 microseconds, which we recommend trying before manually adjusting the stall time. You can adjust the stall to be longer or shorter with export MPICH_OFI_CQ_STALL_USECS=[time in microseconds] e.g. for 6 microseconds, export MPICH_OFI_CQ_STALL_USECS=6. A stall time of 0 would be the same as "regular" MPI. As stall time increases, the amount of congestion decreases, up to a calculation-dependent "optimal" stall time. If you need assistance in using this stall library, please email hpc-help@nrel.gov.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/index.html b/Documentation/Systems/Kestrel/index.html new file mode 100644 index 000000000..eef821562 --- /dev/null +++ b/Documentation/Systems/Kestrel/index.html @@ -0,0 +1,4952 @@ + + + + + + + + + + + + + + + + + + + + + + + Kestrel - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + +

About the Kestrel Cluster#

+

Kestrel is configured to run compute-intensive and parallel computing jobs. It is a heterogeneous system comprised of 2,314 CPU-only nodes, and 132 GPU-accelerated nodes that run the Linux operating system (Red Hat Enterprise Linux), with a peak performance of 44 PetaFLOPS.

+

Please see the System Configurations page for more information about hardware, storage, and networking.

+

Accessing Kestrel#

+

Access to Kestrel requires an NREL HPC account and permission to join an existing allocation. Please see the System Access page for more information on accounts and allocations.

+

Kestrel has two types of login nodes, CPU and GPU, which share the same architecture as the corresponding compute nodes. You should use the CPU login nodes to compile software for use on and to submit jobs to the CPU compute nodes, and the GPU login nodes for GPU jobs.

+

For NREL Employees:#

+

Users on an NREL device may connect via ssh to Kestrel from the NREL network using:

+
    +
  • kestrel.hpc.nrel.gov (CPU)
  • +
  • kestrel-gpu.hpc.nrel.gov (GPU)
  • +
+

This will connect to one of the three login nodes using a round-robin load balancing approach. Users also have the option of connecting directly to an individual login node using one of the following names:

+
    +
  • kl1.hpc.nrel.gov (CPU)
  • +
  • kl2.hpc.nrel.gov (CPU)
  • +
  • kl3.hpc.nrel.gov (CPU)
  • +
  • kl5.hpc.nrel.gov (GPU)
  • +
  • kl6.hpc.nrel.gov (GPU)
  • +
+

For External Collaborators:#

+

If you are an external HPC user, you will need a One-Time Password Multifactor token (OTP) for two-factor authentication.

+

For command line access, you may login directly to kestrel.nrel.gov. Alternatively, you can connect to the SSH gateway host or the HPC VPN.

+

To access the GPU login nodes, first connect with one of the methods described above, and then ssh to kestrel-gpu.hpc.nrel.gov.

+
+

Login Node Policies

+

Kestrel login nodes are shared resources, and because of that are subject to process limiting based on usage to ensure that these resources aren't being used inappropriately. Each user is permitted up to 8 cores and 100GB of RAM at a time, after which the Arbiter monitoring software will begin moderating resource consumption, restricting further processes by the user until usage is reduced to acceptable limits.

+
+

Data Analytics and Visualization (DAV) Nodes#

+

There are eight DAV nodes available on Kestrel, which are nodes intended for HPC applications that require a graphical user interface. They are not general-purpose remote desktops, and are intended for HPC or visualization software that requires Kestrel.

+

FastX is available for HPC users to use graphical applications on the DAV nodes.

+

To connect to a DAV node using the load balancing algorithim, NREL employees can connect to kestrel-dav.hpc.nrel.gov. To connect from outside the NREL network, use kestrel-dav.nrel.gov.

+

Get Help With Kestrel#

+

Please see the Help and Support Page for further information on how to seek assistance with Kestrel or your NREL HPC account.

+ + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/kestrel_release_notes/index.html b/Documentation/Systems/Kestrel/kestrel_release_notes/index.html new file mode 100644 index 000000000..ba0cfb818 --- /dev/null +++ b/Documentation/Systems/Kestrel/kestrel_release_notes/index.html @@ -0,0 +1,5153 @@ + + + + + + + + + + + + + + + + + + + + + + + Kestrel Release Notes - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Kestrel Release Notes#

+

We will update this page with Kestrel release notes after major Kestrel upgrades.

+

October 8, 2024#

+
    +
  1. Slurm was upgraded from 23.11.7 to 23.11.10.
  2. +
  3. The load order of default bash profile data was changed on login nodes such that app-related environment variables load last.
  4. +
  5. PrgEnv-gnu/8.5.0 is now loaded by default when you login to Kestrel instead of PrgEnv-cray.
  6. +
  7. The module restore command shouldn't be used. It will load broken modules.
  8. +
+

August 14, 2024#

+

Jobs running on debug GPU nodes are now limited to a total of half of one GPU node's resources across one or two nodes. This is equivalent to 64 CPUs, 2 GPUs, and 180G of RAM on one node or 32 CPUs, 1 GPU, and 90GB of RAM on two nodes. --exclusive can no longer be used for GPU debug jobs.

+

August 9, 2024#

+

As of 08/09/2024 we have released new modules for VASP on Kestrel CPUs:

+
------------ /nopt/nrel/apps/cpu_stack/modules/default/application -------------
+   #new modules:
+   vasp/5.4.4+tpc     vasp/6.3.2_openMP+tpc    vasp/6.4.2_openMP+tpc
+   vasp/5.4.4_base    vasp/6.3.2_openMP        vasp/6.4.2_openMP
+
+   #legacy modules will be removed during next system time:
+   vasp/5.4.4         vasp/6.3.2               vasp/6.4.2            (D)
+
+

What’s new:

+
    +
  • New modules have been rebuilt with the latest Cray Programming Environment (cpe23), updated compilers, and math libraries.
  • +
  • OpenMP capability has been added to VASP 6 builds.
  • +
  • Modules that include third-party codes (e.g., libXC, libBEEF, VTST tools, and VASPsol) are now denoted with +tpc. Use module show vasp/<version> to see details of a specific version.
  • +
+

July 29 - July 30, 2024#

+
    +
  1. Two GPU login nodes were added. Use the GPU login nodes for compiling software to run on GPU nodes and for submitting GPU jobs.
  2. +
  3. GPU compute nodes were made available for general use and additional GPU partitions were added. See Running on Kestrel for additional information and recommendations.
  4. +
+

Module Updates/Changes

+
    +
  1. +

    Modules are automatically loaded depending on node type, e.g., the GPU module stack is automatically loaded on GPU nodes.

    +
  2. +
  3. +

    Naming convention for compilers:
    + example gcc compiler:

    +
      +
    • Gcc/version is the compiler used by CPE with Prgenv
    • +
    • Gcc-native/version: also meant to be used with Prgenv. The difference gcc-native and gcc is that the former is optimized for the specific architecture
    • +
    • Gcc-stdalone/version this gcc is meant to be used outside of CPE.
    • +
    • The same applies to nvhpc and aocc.
    • +
    +
  4. +
  5. +

    Intel vs oneapi:
    +Moving forward the naming -intel in modules e.g. adios/1.13.1-intel-oneapi-mpi-intel will be deprecated in favor of -oneapi e.g. adios/1.13.1-intel-oneapi-mpi-oneapi.
    +This is implemented for the gpu modules and will be implemented for the CPU in the future.
    +Oneapi is the new naming convention for intel compilers.

    +
  6. +
  7. +

    compilers-mixed:
    +In the list of compilers, you’ll see compilers with -mixed e.g. nvhpc-mixed (same applies to intel, gcc, aocc, etc). +Those are meant to be used with CPE Prgenv, where you can force a mix and match between compilers. +Example: loading Prgenv-nvhpc and loading gcc-mixed. +This is not recommended and should only be used if you know what you’re doing.

    +
  8. +
  9. +

    Nvhpc:
    +There 5 types of nvhpc modules:
    +Nvidia module is equivalent to nvhpc and is meant to be used with CPE (Prgenv-nvidia). +Per HPE’s instruction, only Prgenv-nvhpc should be used and not Prgenv-nvidia

    +
      +
    • Nvhpc which is meant to be used with CPE (Prgenv-nvhpc)
    • +
    • Nvhpc-mixed : meant to be used with CPE
    • +
    • Nvhpc-stdalone : can be used outside of CPE for your usual compilation will load the compilers and a precompiled openmpi that ships with nvhpc
    • +
    • nvhpc-nompi: Similar to Nvhpc-stdalone but doesn’t load the precompiled ompi
    • +
    • nvhpc-byo-compiler: only load libs and header files contained in the nvidia SDK, no compiler or mpi is loaded
    • +
    +
  10. +
  11. +

    Cuda:

    +
      +
    • Cuda/11.7 was removed. If you'd like to access cuda as a standalone you can load cuda/12.3, cuda/12.1 was also added (for the gpus)
    • +
    +
  12. +
  13. +

    Intel:

    +
      +
    • Intel, intel-oneapi and intel-classic are modules to be used with CPE. If you want to use standalone intel compilers outside of CPE please use: +Intel-oneapi-compilers.
    • +
    • intel-oneapi-compilers/2024.1.0 was added.
    • +
    +
  14. +
  15. +

    Anaconda:

    +
      +
    • The 2024 version is now added.
    • +
    +
  16. +
+

April 12 - April 17, 2024#

+
    +
  1. +

    The size of the shared node partition was doubled from 32 nodes to 64 nodes.

    +
  2. +
  3. +

    Cray programming environment (CPE) 23.12 is now the default on the system.

    +
  4. +
  5. +

    To use node local storage, you will now need to use the $TMPDIR environment variable. $TMPDIR will now be set to /tmp/scratch/$JOBID. Hard-coding /tmp/scratch won't work. This change was made to prevent conflicts between multiple users/jobs writing to local disk on shared nodes. As a reminder, writing to $TMPDIR will use local disk on the nodes that have one, and RAM (up to 128Gb) on nodes without.

    +
  6. +
  7. +

    /kfs2/pdatasets was renamed to /kfs2/datasets and a symlink /datasets was added.

    +
  8. +
+

Jan. 29 - Feb. 14, 2024 Upgrades#

+
    +
  1. +

    We have experienced that most previously built software runs without modification (this includes NREL provided modules) and performs at the same level.

    +
  2. +
  3. +

    Cray programming environment (CPE) 22.10, the default on the system, produces an error with cray-libsci when using PrgEnv-intel and the cc, CC, or ftn compiler wrappers. This error can be overcome either by swapping in a newer revision of cray-libsci, or by loading CPE/22.12.

    +

    In the first case, you can load PrgEnv-intel then swap to the newer libsci library:

    +
    module swap PrgEnv-cray PrgEnv-intel 
    +module swap cray-libsci cray-libsci/22.12.1.1 
    +
    +

    In the second case, you can load the newer CPE with PrgEnv-intel by:

    +
    module restore system 
    +module purge 
    +module use /opt/cray/pe/modulefiles/ 
    +module load cpe/22.12 
    +module load craype-x86-spr 
    +module load PrgEnv-cray 
    +module swap PrgEnv-cray PrgEnv-intel  
    +
    +
  4. +
  5. +

    CPE 23.12 is now available on the system but is a work-in-progress. We are still building out the CPE 23 NREL modules.

    +

    To load CPE 23.12:

    +
    module restore system 
    +source /nopt/nrel/apps/cpu_stack/env_cpe23.sh
    +module purge
    +module use /opt/cray/pe/modulefiles/
    +module load cpe/23.12
    +module load craype-x86-spr
    +module load intel-oneapi/2023.0.0
    +module load PrgEnv-intel
    +
    +

    To load our modules built with CPE 23.12, you need to source the following environment. (Note that we are still building/updating these)

    +

    source /nopt/nrel/apps/cpu_stack/env_cpe23.sh

    +

    NOTE: In CPE 23.12, some modules, when invoked, silently fail to load. We are still working on fixing this. For now, check that your modules have loaded appropriately with module list.

    +
  6. +
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Kestrel/modules/index.html b/Documentation/Systems/Kestrel/modules/index.html new file mode 100644 index 000000000..421928d50 --- /dev/null +++ b/Documentation/Systems/Kestrel/modules/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Modules - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Modules

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Swift/applications/index.html b/Documentation/Systems/Swift/applications/index.html new file mode 100644 index 000000000..d5f1ed280 --- /dev/null +++ b/Documentation/Systems/Swift/applications/index.html @@ -0,0 +1,4837 @@ + + + + + + + + + + + + + + + + + + + Applications - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Swift applications#

+

Some optimized versions of common applications are provided for the Swift cluster. Below is a list of how to utilize these applications and the optimizations for Swift.

+

Modules#

+

Many are available as part of the Modules setup.

+

TensorFlow#

+

TensorFlow has been built for the AMD architecture on Swift. This was done by using the following two build flags.

+
-march=znver2
+-mtune=znver2
+
+

This version of TensorFlow can be installed from a wheel file: +

pip install --upgrade --no-deps --force-reinstall /nopt/nrel/apps/wheels/tensorflow-2.4.2-cp38-cp38-linux_x86_64-cpu.whl
+

+

Currently, this wheel is not built with NVIDIA CUDA support for running on GPU.

+

TensorFlow installed on Swift with Conda may be significantly slower than the optimized version

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Swift/filesystems/index.html b/Documentation/Systems/Swift/filesystems/index.html new file mode 100644 index 000000000..5ce93c9a8 --- /dev/null +++ b/Documentation/Systems/Swift/filesystems/index.html @@ -0,0 +1,5062 @@ + + + + + + + + + + + + + + + + + + + + + + + Filesystems - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Swift Filesystem Architecture Overview#

+

Swift's central storage currently has a capacity of approximately 3PB, served over NFS (Network File System). It is a performant system with +multiple read and write cache layers and redundancies for data protection, but it is not a parallel filesystem, unlike Kestrel's Lustre configuration.

+

The underlying filesystem and volume management is via ZFS. Data is protected in ZFS RAID arrangements (raidz3) of 8 storage disks and 3 parity disks.

+

Each Swift fileserver serves a single storage chassis (JBOD, "just a bunch of disks") consisting of multiple spinning disks plus SSD drives for read and write caches.

+

Each fileserver is also connected to a second storage chassis to serve as a redundant backup in case the primary fileserver for that storage chassis fails, allowing continued access to the data on the storage chassis until the primary fileserver for that chassis is restored to service.

+

Project Storage: /projects#

+

Each active project is granted a subdirectory under /projects/<projectname>. This is where the bulk of data is expected to be, and where jobs should generally be run from. Storage quotas are based on the allocation award.

+

Quota usage can be viewed at any time by issuing a cd command into the project directory, and using the df -h command to view total, used, and remaining available space for the mounted project directory.

+

NFS Automount System#

+

Project directories are automatically mounted or unmounted via NFS on an "as-needed" basis. /projects directories that have not been accessed for a period of time will be umounted and not immediately visible via a command such as ls /projects, but will become immediately available if a file or path is accessed with an ls, cd, or other file access is made in that path.

+

Home Directories: /home#

+

/home directories are mounted as /home/<username>. Home directories are hosted under the user's initial /project directory. Quotas in /home are included as a part of the quota of that project's storage allocation.

+

Scratch Space: /scratch/username and /scratch/username/jobid#

+

For users who also have Kestrel allocations, please be aware that scratch space on Swift behaves differently, so adjustments to job scripts may be necessary.

+

The scratch directory on each Swift compute node is a 1.8TB spinning disk, and is accessible only on that node. The default writable path for scratch use is /scratch/<username>. There is no global, network-accessible /scratch space. /projects and /home are both network-accessible, and may be used as /scratch-style working space instead.

+

Temporary space: $TMPDIR#

+

When a job starts, the environment variable $TMPDIR is set to /scratch/<username>/<jobid> for the duration of the job. This is temporary space only, and should be purged when your job is complete. Please be sure to use this path instead of /tmp for your tempfiles.

+

There is no expectation of data longevity in scratch space, and it is subject to purging once the node is idle. If desired data is stored here during the job, please be sure to copy it to a /projects directory as part of the job script before the job finishes.

+

Mass Storage System#

+

There is no Mass Storage System for deep archive storage on Swift.

+

Backups and Snapshots#

+

There are no backups or snapshots of data on Swift. Though the system is protected from hardware failure by multiple layers of redundancy, please keep regular backups of important data on Swift, and consider using a Version Control System (such as Git) for important code.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Swift/index.html b/Documentation/Systems/Swift/index.html new file mode 100644 index 000000000..cee97ff67 --- /dev/null +++ b/Documentation/Systems/Swift/index.html @@ -0,0 +1,4937 @@ + + + + + + + + + + + + + + + + + + + + + + + Swift - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + +

About the Swift Cluster#

+

Swift is an AMD-based HPC cluster with AMD EPYC 7532 (Rome) CPU's that supports EERE Vehicles Technologies Office (VTO) projects. Any VTO funded EERE project is eligible for an allocation on Swift. Allocation decisions are made by EERE through the annual allocation cycle. Swift is well suited for parallel jobs up to 64 nodes and offers better queue wait times for projects that are eligible.

+

Please see the System Configurations page for more information about hardware, storage, and networking.

+

Accessing Swift#

+

Access to Swift requires an NREL HPC account and permission to join an existing allocation. Please see the System Access page for more information on accounts and allocations.

+

Login Nodes:#

+
swift.hpc.nrel.gov
+swift-login-1.hpc.nrel.gov
+
+

For NREL Employees:#

+

Swift can be reached from the NREL VPN via ssh to the login nodes as above.

+

For External Collaborators:#

+

There are currently no external-facing login nodes for Swift. There are two options to connect:

+
    +
  1. Connect to the SSH gateway host and log in with your username, password, and OTP code. Once connected, ssh to the login nodes as above.
  2. +
  3. Connect to the HPC VPN and ssh to the login nodes as above.
  4. +
+

Get Help With Swift#

+

Please see the Help and Support Page for further information on how to seek assistance with Swift or your NREL HPC account.

+ + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Swift/known/index.html b/Documentation/Systems/Swift/known/index.html new file mode 100644 index 000000000..30227b8a9 --- /dev/null +++ b/Documentation/Systems/Swift/known/index.html @@ -0,0 +1,4790 @@ + + + + + + + + + + + + + + + + + + + Known - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Known

+ + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Swift/modules/index.html b/Documentation/Systems/Swift/modules/index.html new file mode 100644 index 000000000..7a901875a --- /dev/null +++ b/Documentation/Systems/Swift/modules/index.html @@ -0,0 +1,4939 @@ + + + + + + + + + + + + + + + + + + + + + + + Modules - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Swift Modules#

+

This describes how to activate and use the modules available on Swift.

+

Source#

+

Environments are provided with a number of commonly used modules including compilers, common build tools, specific AMD optimized libraries, and some analysis tools. When you first login there is a default set of modules available. These can be seen by running the command:

+
module avail 
+
+

Since Swift is a new machine we are experimenting with additional environments. The environments are in date stamped subdirectory under in the directory /nopt/nrel/apps. Each environemnt directory has a file myenv.*. If the myenv.*. is missing from a directory then that environment is a work in progress. Sourcing myenv.* file will enable the environment and give you a new set of modules.

+

For example to enable the environment /nopt/nrel/apps/210728a source the provided environment file.

+
source /nopt/nrel/apps/210728a/myenv.2107290127
+
+

You will now have access to the modules provided. These can be listed using the following:

+
ml avail 
+
+

If you want to build applications you can then module load compilers and the like; for example

+
ml gcc openmpi
+
+

will load gnu 9.4 and openmpi.

+

Software is installed using a spack hierarchy. It is possible to add software to the hierarchy. This should be only done by people responsible for installing software for all users. It is also possible to do a spack install creating a new level of the hierarchy in your personal space. These procedures are documented in https://github.nrel.gov/tkaiser2/spackit.git in the file Notes03.md under the sections Building on the hierarchy and Building outside the hierarchy. If you want to try this please contact Tim Kaiser to walk through the procedure.

+

Most environments have an example directory. You can copy this directory to you own space and compile and run the examples. The files runintel and runopenmp are + simple batch scripts. These also have "module load" lines that you need to run before building with either compiler set.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Swift/running/index.html b/Documentation/Systems/Swift/running/index.html new file mode 100644 index 000000000..dfc0cbd00 --- /dev/null +++ b/Documentation/Systems/Swift/running/index.html @@ -0,0 +1,5662 @@ + + + + + + + + + + + + + + + + + + + + + + + Running on Swift - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Running on Swift#

+

Please see the Modules page for information about setting up your environment and loading modules.

+

Login nodes#

+
swift.hpc.nrel.gov
+swift-login-1.hpc.nrel.gov
+
+

swift.hpc.nrel.gov is a round-robin alias that will connect you to any available login node.

+

SSH Keys#

+

User accounts have a default set of keys cluster and cluster.pub. The config file will use these even if you generate a new keypair using ssh-keygen. If you are adding your keys to Github or elsewhere you should either use cluster.pub or will have to modify the config file.

+

Slurm and Partitions#

+

The most up to date list of partitions can always be found by running the sinfo command on the cluster.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PartitionDescription
longjobs up to ten days of walltime
standardjobs up to two days of walltime
gpuNodes with four NVIDIA A100 40 GB Computational Accelerators, up to two days of walltime
paralleloptimized for large parallel jobs, up to two days of walltime
debugtwo nodes reserved for short tests, up to four hours of walltime
+

Each partition also has a matching -standby partition. Allocations which have consumed all awarded AUs for the year may only submit jobs to these partitions, and their default QoS will be set to standby. Jobs in standby partitions will be scheduled when there are otherwise idle cycles and no other non-standby jobs are available. Jobs that run in the standby queue will not be charged any AUs.

+

Any allocation may submit a job to a standby QoS, even if there are unspent AUs.

+

By default, nodes can be shared between users. To get exclusive access to a node use the --exclusive flag in your sbatch script or on the sbatch command line.

+
+

Important

+

Use --cpus-per-task with srun/sbatch otherwise some applications may only utilize a single core.

+
+

GPU Nodes#

+

Swift now has ten GPU nodes. Each GPU node has 4 NVIDIA A100 40GB GPUs, 96 CPU cores, and 1TB RAM.

+

GPU nodes are also shared, meaning that less than a full node may be requested for a job, leaving the remainder of the node for use by other jobs concurrently. (See the section below on AU Charges for how this affects the AU usage rate.)

+

To request use of a GPU, use the flag --gres=gpu:<quantity> with sbatch, srun, or salloc, or add it as an #SBATCH directive in your sbatch submit script, where <quantity> is a number from 1 to 4.

+

CPU Core and RAM Defaults on GPU Nodes#

+

If your job will require more than the default 1 CPU core and 1.5GB RAM you must request the quantity of cores and/or RAM that you will need, by using additional flags such as --ntasks= or --mem=. See the Slurm Job Scheduling section for details on requesting additional resources.

+

Allocation Unit (AU) Charges#

+

The equation for calculating the AU cost of a job on Swift is:

+

AU cost = (Walltime in hours * Number of Nodes * QoS Factor * Charge Factor)

+

The Walltime is the actual length of time that the job runs, in hours or fractions thereof.

+

The Number of nodes can be whole nodes or fractions of a node. See below for more information.

+

The Charge Factor for Swift CPU nodes is 5.

+

The Charge Factor for Swift GPU nodes is 50, or 12.5 per GPU.

+

The QoS Factor for normal priority jobs is 1.

+

The QoS Factor for high-priority jobs is 2.

+

The QoS Factor for standby priority jobs is 0. There is no AU cost for standby jobs.

+

One CPU node for one hour of walltime at normal priority costs 5 AU total.

+

One CPU node for one hour of walltime at high priority costs 10 AU total.

+

One GPU for one hour of walltime at normal priority costs 12.5 AU total.

+

Four GPUs for one hour of walltime at normal priority costs 50 AU total.

+

Shared/Fractional CPU Nodes#

+

Swift allows jobs to share nodes, meaning fractional allocations are possible.

+

Standard (CPU) compute nodes have 128 CPU cores and 256GB RAM.

+

When a job only requests part of a node, usage is tracked on the basis of:

+

1 core = 2GB RAM = 1/128th of a node

+

Using all resources on a single node, whether CPU, RAM, or both, will max out at 128/128 per node = 1.

+

The highest quantity of resource requested will determine the total AU charge.

+

For example, a job that requests 64 cores and 128GB RAM (one half of a node) would be:

+

1 hour walltime * 0.5 nodes * 1 QoS Factor * 5 Charge Factor = 2.5 AU per node-hour.

+

Shared/Fractional GPU Nodes#

+

Jobs on Swift may also share GPU nodes.

+

Standard GPU nodes have 96 CPU cores, four NVIDIA A100 40GB GPUs, and 1TB RAM.

+

You may request 1, 2, 3, or 4 GPUs per GPU node, as well as any additional CPU and RAM required.

+

Usage is tracked on the basis of:

+

1 GPU = 25% of total cores (24/96) = 25% of total RAM (256GB/1TB) = 25% of a node

+

The highest quantity of resource requested will determine the total AU charge.

+

AU Calculation Examples#

+

AU calculations are performed automatically between the Slurm scheduler and Lex(NREL's web-based allocation tracking/management software). The following calculations are approximations to help illustrate how your AU will be consumed based on your job resource requests and are approximations only:

+

A request of 1 GPU, up to 24 CPU cores, and up to 256GB RAM will be charged at 12.5 AU/hr:

+
    +
  • 1/4 GPUs = 25% total GPUs = 50 AU * 0.25 = 12.5 AU (this is what will be charged)
  • +
  • 1 core = 1% total cores = 50 AU * 0.01 = 0.50 AU (ignored)
  • +
  • 1GB/1TB = 0.1% total RAM = 50 AU * 0.001 = 0.05 AU (ignored)
  • +
+

A request of 1 GPU, 48 CPU cores, and 100GB RAM will be charged at 25 AU/hr:

+
    +
  • 1/4 GPUs = 25% total GPUs = 50 AU * 0.25 = 12.5 AU (ignored)
  • +
  • 48/96 cores = 50% total cores = 50 AU * 0.5 = 25 AU (this is what will be charged)
  • +
  • 100GB/1TB = 10% total RAM = 50 AU * 0.10 = 5 AU (ignored)
  • +
+

A request of 2 GPUs, 55 CPU cores, and 200GB RAM will be charged at approximately 28.7 AU/hr:

+
    +
  • 2/4 GPUs = 50% total GPUS = 50 AU * 0.5 = 25 AU (ignored)
  • +
  • 55/96 cores = 57.3% of total cores = 50 AU * .573 = 28.65 AU (this is what will be charged)
  • +
  • 200GB/1TB = 20% total RAM = 50 AU * 0.2 = 10 AU (ignored)
  • +
+

A request of 1 GPU, 1 CPU core, and 1TB RAM will be charged at 50 AU/hr:

+
    +
  • 1/4 GPUs = 25% total GPUS = 50 AU * 0.25 = 12.5 AU (ignored)
  • +
  • 1/96 cores = 1% total cores = 50 AU * 0.01 = 0.50 AU (ignored)
  • +
  • 1TB/1TB = 100% total RAM = 50 AU * 1 = 50 AU (this is what will be charged)
  • +
+

Software Environments and Example Files#

+

Multiple software environments are available on Swift, with a number of commonly used modules including compilers, common build tools, specific AMD optimized libraries, and some analysis tools. The environments are in date stamped subdirectories, in the directory /nopt/nrel/apps. Each environment directory has a file myenv.*. Sourcing that file will enable the environment.

+

When you login you will have access to the default environments and the myenv file will have been sourced for you. You can see the directory containing the environment by running the module avail command.

+

In the directory for an environment you will see a subdirectory example. This contains a makefile for a simple hello world program written in both Fortran and C. The README.md file contains additional information, most of which is replicated here. It is suggested that you copy the example directory to your own /home for experimentation:

+
cp -r example ~/example
+cd ~/example
+
+

Conda#

+

There is a very basic version of conda in the "anaconda" directory in each /nopt/nrel/apps/YYMMDDa directory. However, there is a more complete environment pointed to by the module under /nopt/nrel/apps/modules. Please see our Conda Documentation for more information.

+

Simple batch script#

+

Here is a sample batch script for running the 'hello world' example program, runopenmpi.

+
#!/bin/bash
+#SBATCH --job-name="install"
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=2
+#SBATCH --exclusive
+#SBATCH --account=<myaccount>
+#SBATCH --partition=debug
+#SBATCH --time=00:01:00
+
+
+cat $0
+
+#These should be loaded before doing a make
+module load gcc  openmpi 
+
+export OMP_NUM_THREADS=2
+srun  -n 4 ./fhostone -F
+srun  -n 4 ./phostone -F
+
+

To run this you need to replace <myaccount> with the appropriate account and ensure that slurm is in your path by running:

+
module load slurm
+
+

Then submit the sbatch script with:

+
sbatch --partition=test runopenmpi
+
+

Building the 'hello world' example#

+

Obviously for the script given above to work you must first build the application. You need to:

+
    +
  1. Load the modules
  2. +
  3. make
  4. +
+

Loading the modules.#

+

We are going to use gnu compilers with OpenMPI.

+
ml gcc openmpi
+
+

Run make#

+
make
+
+

Full procedure#

+
[nrmc2l@swift-login-1 ~]$ cd ~
+[nrmc2l@swift-login-1 ~]$ mkdir example
+[nrmc2l@swift-login-1 ~]$ cd ~/example
+[nrmc2l@swift-login-1 ~]$ cp -r /nopt/nrel/apps/210928a/example/* .
+
+[nrmc2l@swift-login-1 ~ example]$ cat runopenmpi 
+#!/bin/bash
+#SBATCH --job-name="install"
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=2
+#SBATCH --exclusive
+#SBATCH --account=<myaccount>
+#SBATCH --partition=debug
+#SBATCH --time=00:01:00
+
+
+cat $0
+
+#These should be loaded before doing a make:
+module load gcc  openmpi 
+
+export OMP_NUM_THREADS=2
+srun  -n 4 ./fhostone -F
+srun  -n 4 ./phostone -F
+
+
+[nrmc2l@swift-login-1 ~ example]$ module load gcc  openmpi
+[nrmc2l@swift-login-1 ~ example]$ make
+mpif90 -fopenmp fhostone.f90 -o fhostone
+rm getit.mod  mympi.mod  numz.mod
+mpicc -fopenmp phostone.c -o phostone
+[nrmc2l@swift-login-1 ~ example]$ sbatch runopenmpi
+Submitted batch job 187
+[nrmc2l@swift-login-1 ~ example]$ 
+
+

Results#

+
[nrmc2l@swift-login-1 example]$ cat *312985*
+#!/bin/bash
+#SBATCH --job-name="install"
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=2
+#SBATCH --exclusive
+#SBATCH --partition=debug
+#SBATCH --time=00:01:00
+
+
+cat $0
+
+#These should be loaded before doing a make
+module load gcc  openmpi 
+
+export OMP_NUM_THREADS=2
+srun  -n 4 ./fhostone -F
+srun  -n 4 ./phostone -F
+
+MPI Version:Open MPI v4.1.1, package: Open MPI nrmc2l@swift-login-1.swift.hpc.nrel.gov Distribution, ident: 4.1.1, repo rev: v4.1.1, Apr 24, 2021
+task    thread             node name  first task    # on node  core
+0002      0000                 c1-31        0002         0000   018
+0000      0000                 c1-30        0000         0000   072
+0000      0001                 c1-30        0000         0000   095
+0001      0000                 c1-30        0000         0001   096
+0001      0001                 c1-30        0000         0001   099
+0002      0001                 c1-31        0002         0000   085
+0003      0000                 c1-31        0002         0001   063
+0003      0001                 c1-31        0002         0001   099
+0001      0000                 c1-30        0000         0001  0097
+0001      0001                 c1-30        0000         0001  0103
+0003      0000                 c1-31        0002         0001  0062
+0003      0001                 c1-31        0002         0001  0103
+MPI VERSION Open MPI v4.1.1, package: Open MPI nrmc2l@swift-login-1.swift.hpc.nrel.gov Distribution, ident: 4.1.1, repo rev: v4.1.1, Apr 24, 2021
+task    thread             node name  first task    # on node  core
+0000      0000                 c1-30        0000         0000  0072
+0000      0001                 c1-30        0000         0000  0020
+0002      0000                 c1-31        0002         0000  0000
+0002      0001                 c1-31        0002         0000  0067
+[nrmc2l@swift-login-1 example]$ 
+
+

Building with Intel Fortran or Intel C and OpenMPI#

+

You can build parallel programs using OpenMPI and the Intel Fortran ifort and Intel C icc compilers.

+

We have the example programs build with gnu compilers and OpenMP using the lines:

+
[nrmc2l@swift-login-1 ~ example]$ mpif90 -fopenmp fhostone.f90 -o fhostone
+[nrmc2l@swift-login-1 ~ example]$ mpicc -fopenmp phostone.c -o phostone
+
+

This gives us:

+

[nrmc2l@swift-login-1 ~ example]$ ls -l fhostone
+-rwxrwxr-x. 1 nrmc2l nrmc2l 42128 Jul 30 13:36 fhostone
+[nrmc2l@swift-login-1 ~ example]$ ls -l phostone
+-rwxrwxr-x. 1 nrmc2l nrmc2l 32784 Jul 30 13:36 phostone
+
+Note the size of the executable files.

+

If you want to use the Intel compilers, first load the appropriate modules:

+
module load openmpi intel-oneapi-compilers gcc
+
+

Then we can set the variables OMPI_FC=ifort and OMPI_CC=icc, and recompile:

+
[nrmc2l@swift-login-1 ~ example]$ export OMPI_FC=ifort
+[nrmc2l@swift-login-1 ~ example]$ export OMPI_CC=icc
+[nrmc2l@swift-login-1 ~ example]$ mpif90 -fopenmp fhostone.f90 -o fhostone
+[nrmc2l@swift-login-1 ~ example]$ mpicc -fopenmp phostone.c -o phostone
+
+
+[nrmc2l@swift-login-1 ~ example]$ ls -lt fhostone
+-rwxrwxr-x. 1 nrmc2l nrmc2l 41376 Jul 30 13:37 fhostone
+[nrmc2l@swift-login-1 ~ example]$ ls -lt phostone
+-rwxrwxr-x. 1 nrmc2l nrmc2l 32200 Jul 30 13:37 phostone
+[nrmc2l@swift-login-1 ~ example]$ 
+
+

Note the size of the executable files have changed. You can also see the difference by running the commands:

+
nm fhostone | grep intel | wc
+nm phostone | grep intel | wc
+
+

on the two versions of the program. It will show how many calls to Intel routines are in each, 51 and 36 compared to 0 for the gnu versions.

+

Building and Running with Intel MPI#

+

We can build with the Intel versions of MPI. We assume we will want to build with icc and ifort as the backend compilers. We load the modules:

+
ml gcc
+ml intel-oneapi-compilers
+ml intel-oneapi-mpi
+
+

Then, build and run the same example as above:

+
make clean
+make PFC=mpiifort PCC=mpiicc 
+
+

Giving us:

+
[nrmc2l@swift-login-1 example]$ ls -lt fhostone phostone
+-rwxrwxr-x. 1 nrmc2l hpcapps 160944 Aug  5 16:14 phostone
+-rwxrwxr-x. 1 nrmc2l hpcapps 952352 Aug  5 16:14 fhostone
+[nrmc2l@swift-login-1 example]$ 
+
+

We need to make some changes to our batch script. Replace the module load line with:

+
module load intel-oneapi-mpi intel-oneapi-compilers gcc
+
+

Our IntelMPI batch script, runintel under /example, is:

+
#!/bin/bash
+#SBATCH --job-name="install"
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=2
+#SBATCH --exclusive
+#SBATCH --account=<myaccount>
+#SBATCH --partition=debug
+#SBATCH --time=00:01:00
+
+
+cat $0
+
+#These should be loaded before doing a make
+module load intel-oneapi-mpi intel-oneapi-compilers gcc
+
+export OMP_NUM_THREADS=2
+srun  -n 4 ./fhostone -F
+srun  -n 4 ./phostone -F
+
+

Which produces the following output:

+
MPI Version:Intel(R) MPI Library 2021.3 for Linux* OS
+
+task    thread             node name  first task    # on node  core
+0000      0000                 c1-32        0000         0000   127
+0000      0001                 c1-32        0000         0000   097
+0001      0000                 c1-32        0000         0001   062
+0001      0001                 c1-32        0000         0001   099
+
+MPI VERSION Intel(R) MPI Library 2021.3 for Linux* OS
+
+task    thread             node name  first task    # on node  core
+0000      0000                 c1-32        0000         0000  0127
+0000      0001                 c1-32        0000         0000  0097
+0001      0000                 c1-32        0000         0001  0127
+0001      0001                 c1-32        0000         0001  0099
+
+

VASP, Jupyter, Julia, and Other Applications on Swift#

+

Please see the relevant page in the Applications section for more information on using applications on Swift and other NREL clusters.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Vermilion/help/index.html b/Documentation/Systems/Vermilion/help/index.html new file mode 100644 index 000000000..e3e9e6d6e --- /dev/null +++ b/Documentation/Systems/Vermilion/help/index.html @@ -0,0 +1,4836 @@ + + + + + + + + + + + + + + + + + + + Help and Support - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Vermilion Technical Support Contacts#

+

For assistance with accounts or allocations, software installation requests, or technical questions, please email HPC-Help@nrel.gov with "Vermilion" in the subject line.

+

Microsoft Teams#

+

There is a Microsoft Teams Vermilion channel that is one of the primary ways we communicate with Vermilion users about system updates and known problems.

+

Under the General Channel in the chat, you can post questions and collaborate with other users. We update the members annually from the project team listed.

+

For internal users (NREL), please follow these instructions if we missed you and you would like to join: +1. In Teams click on the “Teams” icon in far left nav +1. Click “Join or create a team” in lower left corner +1. In in the “Search teams” field in the upper far right, type “Vermilion” and hit return +1. Click Join

+

For external users, please follow the instructions listed in the CSC Tutorial Team - External Users announcement.

+

Additional Support#

+

Additional HPC help and contact information can be found on the NREL HPC Help main page.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Vermilion/index.html b/Documentation/Systems/Vermilion/index.html new file mode 100644 index 000000000..d67f09fd9 --- /dev/null +++ b/Documentation/Systems/Vermilion/index.html @@ -0,0 +1,4943 @@ + + + + + + + + + + + + + + + + + + + + + + + Vermilion - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + +

About Vermilion#

+

Vermilion is an OpenHPC-based cluster running on Dual AMD EPYC 7532 Rome CPUs and nVidia A100 GPUs. The nodes run as virtual machines in a local virtual private cloud (OpenStack). Vermilion is allocated for NREL workloads and intended for LDRD, SPP or Office of Science workloads. Allocation decisions are made by the IACAC through the annual allocation request process. Check back regularly as the configuration and capabilities for Vermilion are augmented over time.

+

Accessing Vermilion#

+

Access to Vermilion requires an NREL HPC account and permission to join an existing allocation. Please see the System Access page for more information on accounts and allocations.

+

For NREL Employees:#

+

To access vermilion, log into the NREL network and connect via ssh:

+
ssh vs.hpc.nrel.gov
+ssh vermilion.hpc.nrel.gov
+
+

For External Collaborators:#

+

There are currently no external-facing login nodes for Vermilion. There are two options to connect:

+
    +
  1. Connect to the SSH gateway host and log in with your username, password, and OTP code. Once connected, ssh to the login nodes as above.
  2. +
  3. Connect to the HPC VPN and ssh to the login nodes as above.
  4. +
+

There are currently two login nodes. They share the same home directory so work done on one will appear on the other. They are:

+
vs-login-1
+vs-login-2
+
+

You may connect directly to a login node, but they may be cycled in and out of the pool. If a node is unavailable, try connecting to another login node or the vs.hpc.nrel.gov round-robin option.

+

Get Help with Vermilion#

+

Please see the Help and Support Page for further information on how to seek assistance with Vermilion or your NREL HPC account.

+

Building code#

+

Don't build or run code on a login node. Login nodes have limited CPU and memory available. Use a compute or GPU node instead. Simply start an interactive job on an appropriately provisioned node and partition for your work and do your builds there. Similarly, build your projects under /projects/your_project_name/ as home directories are limited to 5GB per user.

+
+ + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Vermilion/known/index.html b/Documentation/Systems/Vermilion/known/index.html new file mode 100644 index 000000000..3082b7d79 --- /dev/null +++ b/Documentation/Systems/Vermilion/known/index.html @@ -0,0 +1,4793 @@ + + + + + + + + + + + + + + + + + + + Known Issues - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Known Issues

+ + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/Vermilion/running/index.html b/Documentation/Systems/Vermilion/running/index.html new file mode 100644 index 000000000..ee3aefa76 --- /dev/null +++ b/Documentation/Systems/Vermilion/running/index.html @@ -0,0 +1,5284 @@ + + + + + + + + + + + + + + + + + + + + + + + Running on Vermilion - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Running on Vermilion#

+

This page discusses the compute nodes, partitions, and gives some examples of building and running applications.

+

About Vermilion#

+

Compute hosts#

+

Vermilion is a collection of physical nodes with each regular node containing Dual AMD EPYC 7532 Rome CPUs. However, each node is virtualized. That is it is split up into virtual nodes with each virtual node having a portion of the cores and memory of the physical node. Similar virtual nodes are then assigned slurm partitions as shown below.

+

Shared file systems#

+

Vermilion's home directories are shared across all nodes. Each user has a quota of 5 GB. There is also /scratch/$USER and /projects spaces seen across all nodes.

+

Partitions#

+

Partitions are flexible and fluid on Vermilion. A list of partitions can be found by running the sinfo command. Here are the partitions as of 10/20/2022.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Partition NameQtyRAMCores/node/var/scratch
1K-blocks
AU Charge Factor
gpu
1 x NVIDIA Tesla A100
16114 GB306,240,805,33612
lg39229 GB601,031,070,0007
std60114 GB30515,010,8163.5
sm2861 GB16256,981,0000.875
t1516 GB461,665,0000.4375
+

Allocation Unit (AU) Charges#

+

The equation for calculating the AU cost of a job on Vermilion is:

+

AU cost = (Walltime in hours * Number of Nodes * Charge Factor)

+

The Walltime is the actual length of time that the job runs, in hours or fractions thereof.

+

The Charge Factor for each partition is listed in the table above.

+

Operating Software#

+

The Vermilion HPC cluster runs fairly current versions of OpenHPC and SLURM on top of OpenStack.

+

Examples: Build and run simple applications#

+

This section discusses how to compile and run a simple MPI application, as well as how to link against the Intel MKL library.

+

In the directory /nopt/nrel/apps/210929a you will see a subdirectory example. This contains a makefile for a simple hello world program written in both Fortran and C and several run scripts. The README.md file contains additional information, some of which is replicated here.

+

We will begin by creating a new directory and copying the source for a simple MPI test program. More details about the test program are available in the README.md file that accompanies it. Run the following commands to create a new directory and make a copy of the source code:

+
mkdir example
+cd example
+cp /nopt/nrel/apps/210929a/example/phostone.c .
+
+

Compile and run with Intel MPI#

+

First we will look at how to compile and run the application using Intel MPI. To build the application, we load the necessary Intel modules. Execute the following commands to load the modules and build the application, naming the output phost.intelmpi. Note that this application uses OpenMP as well as MPI, so we provide the -fopenmp flag to link against the OpenMP libraries.

+
ml intel-oneapi-mpi intel-oneapi-compilers
+mpiicc -fopenmp phostone.c -o phost.intelmpi
+
+

The following batch script is an example that runs the job using two MPI ranks on a single node with two threads per rank. Save this script to a file such as submit_intel.sh, replace <myaccount> with the appropriate account, and submit using sbatch submit_intel.sh. Feel free to experiment with different numbers of tasks and threads. Note that multi-node jobs on Vermilion can be finicky, and applications may not scale as well as they do on other systems. At this time, it is not expected that multi-node jobs will always run successfully.

+
+Intel MPI submission script +
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --exclusive
+#SBATCH --time=00:01:00
+#SBATCH --account=<myaccount>
+
+ml intel-oneapi-mpi intel-oneapi-compilers
+
+export OMP_NUM_THREADS=2
+export I_MPI_OFI_PROVIDER=tcp
+srun --mpi=pmi2 --cpus-per-task 2 -n 2 ./phost.intelmpi -F
+
+
+

Your output should look similar to the following:

+
MPI VERSION Intel(R) MPI Library 2021.9 for Linux* OS
+
+task    thread             node name  first task    # on node  core
+0000      0000           vs-std-0044        0000         0000  0001
+0000      0001           vs-std-0044        0000         0000  0000
+0001      0000           vs-std-0044        0000         0001  0003
+0001      0001           vs-std-0044        0000         0001  0002
+
+ +

The intel-oneapi-mkl module is available for linking against Intel's MKL +library. Then to build against MKL using the Intel compilers icc or ifort, you +normally just need to add the flag -qmkl. There are examples in the directory +/nopt/nrel/apps/210929a/example/mkl, and there is a Readme.md file that +explains in a bit more detail.

+

To compile a simple test program that links against MKL, run:

+
cp /nopt/nrel/apps/210929a/example/mkl/mkl.c .
+
+ml intel-oneapi-mkl intel-oneapi-compilers
+icc -O3 -qmkl mkl.c -o mkl
+
+

An example submission script is:

+
+Intel MKL submission script +
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --exclusive
+#SBATCH --time=00:01:00
+#SBATCH --account=<myaccount>
+
+source /nopt/nrel/apps/210929a/myenv.2110041605
+ml intel-oneapi-mkl intel-oneapi-compilers gcc
+
+./mkl
+
+
+

Compile and run with Open MPI#

+
+

Warning

+

Please note that multi-node jobs are not currently supported with Open MPI.

+
+

Use the following commands to load the Open MPI modules and compile the test program into an executable named phost.openmpi:

+
ml gcc openmpi
+mpicc -fopenmp phostone.c -o phost.openmpi
+
+

The following is an example script that runs two tasks on a single node, with two threads per task:

+
+Open MPI submission script +
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --exclusive
+#SBATCH --time=00:01:00
+#SBATCH --account=<myaccount>
+
+ml gcc openmpi
+
+export OMP_NUM_THREADS=2
+mpirun -np 2 --map-by socket:PE=2 ./phost.openmpi -F
+
+
+

Running VASP on Vermilion#

+

Please see the VASP page for detailed information and recommendations for running VASP on Vermilion.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Systems/index.html b/Documentation/Systems/index.html new file mode 100644 index 000000000..c2829b731 --- /dev/null +++ b/Documentation/Systems/index.html @@ -0,0 +1,4943 @@ + + + + + + + + + + + + + + + + + + + + + + + Systems - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

NREL Systems#

+

NREL operates three on-premises systems for computational work.

+

System Configurations#

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameKestrelSwiftVermilion
OSRedHat Enterprise LinuxRocky LinuxRedHat
Loginkestrel.hpc.nrel.govswift.hpc.nrel.govvs.hpc.nrel.gov
CPUDual socket Intel Xeon Sapphire RapidsDual AMD EPYC 7532 Rome CPUDual AMD EPYC 7532 Rome CPU
Cores per CPU Node104 cores128 coresVaries by partition
InterconnectHPE Slingshot 11InfiniBand HDR25GbE
HPC schedulerSlurmSlurmSlurm
Network Storage95PB Lustre3PB NFS440 TB
GPU132 4x NVIDIA H100 SXM GPUs10 4x NVIDIA A100 40GB GPUs5 nodes Single A100
Memory256GB, 384GB, 2TB256GB(CPU) 1T(GPU)256GB (base)
Number of Nodes2454484133 virtual
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Viz_Analytics/index.html b/Documentation/Viz_Analytics/index.html new file mode 100644 index 000000000..bdbfe20d5 --- /dev/null +++ b/Documentation/Viz_Analytics/index.html @@ -0,0 +1,4935 @@ + + + + + + + + + + + + + + + + + + + + + + + Visualization and Analytics Software Tools - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Visualization and Analytics Software Tools#

+

Learn about the available visualization and analytics software tools

+
+Note: +

The instructions shown on this page are given in the context of Eagle supercomputer.

+
+

VirtualGL/FastX#

+

Provides remote visualization for OpenGL-based applications. For more information, see using VirtualGL and FastX .

+

ParaView#

+

An open-source, multi-platform data analysis and visualization application. +For information, see using ParaView.

+

VAPOR#

+

VAPOR (Visualization and Analysis Platform for Ocean, Atmosphere, and Solar Researchers) enables interactive exploration of terascale gridded data sets that are large in both the spatial and temporal domains. Wavelet-based multiresolution data representation permits users to make speed/quality trade-offs for visual as well as non-visual data exploration tasks.

+

For more information see the VAPOR website.

+

R Statistical Computing Environment#

+

R is a language and environment for statistical computing and graphics. For more information, see running R.

+

MATLAB#

+

MATLAB is a high-performance language for technical computing. It integrates computation, visualization and programming in an easy-to-use environment where problems and solutions are expressed in familiar mathematical notation.

+

The name MATLAB stands for Matrix Laboratory. MATLAB was originally written to provide easy access to matrix software developed by the LINPACK and EISPACK projects. Today, MATLAB engines incorporate the LAPACK and BLAS libraries, embedding the state of the art in software for matrix computation.

+

For more information, see using MATLAB software.

+

Interactive Data Language +IDL, the Interactive Data Language, is an interactive application used for data analysis, visualization and cross-platform application development.

+

VisIt#

+

VisIt is a free interactive parallel visualization and graphical analysis tool for viewing scientific data on Unix and PC platforms. VisIt features a robust remote visualization capability. VisIt can be started on a local machine and used to visualize data on a remote compute cluster.

+

For more information, see using VisIt.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Viz_Analytics/paraview/index.html b/Documentation/Viz_Analytics/paraview/index.html new file mode 100644 index 000000000..a19da6c59 --- /dev/null +++ b/Documentation/Viz_Analytics/paraview/index.html @@ -0,0 +1,5310 @@ + + + + + + + + + + + + + + + + + + + + + + + Paraview - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

ParaView#

+

ParaView is an open-source, multi-platform data analysis and visualization application. ParaView users can quickly build visualizations to analyze their data using qualitative and quantitative techniques. The data exploration can be done interactively in 3D or programmatically using ParaView's batch processing capabilities. ParaView was developed to analyze extremely large data sets using distributed memory computing resources. It can be run on supercomputers to analyze data sets of terascale as well as on laptops for smaller data.

+

The following tutorials are meant for Eagle and Kestrel supercomputers.

+

Using ParaView in Client-Server Mode#

+

Running ParaView interactively in client-server mode is a convenient worflow for researchers who have a large amount of remotely-stored data that they'd like to visualize using a locally-installed copy of ParaView.
+In this model, the HPC does the heavy lifting of reading file data and applying filters, taking advantage of parallel processing when possible, then "serves" the rendered data to the ParaView client running locally on your desktop.
+This allows you to interact with ParaView as you normally would (i.e., locally) with all your preferences and shortcuts intact without the time consuming step of transferring data from the supercomputer to your desktop or relying on a remote desktop environment.

+

The first step is to install ParaView. +It is recommended that you use the binaries provided by Kitware on your workstation matching the NREL installed version. +This ensures client-server compatibility. +The version number that you install must identically match the version installed at NREL. +To determine which version of ParaView is installed on the cluster, connect to Eagle or Kestrel as you normally would, load the ParaView module with module load paraview, then check the version with pvserver --version.
+The version number, e.g., 5.11.0, will then be displayed to your terminal.
+To download the correct ParaView client binary version for your desktop environment, visit the ParaView website.

+
    +
  1. +

    Reserve Compute Nodes

    +

    The first step is to reserve the computational resources on Eagle/Kestrel that will be running the ParaView server.

    +

    This requires using the Slurm salloc directive and specifying an allocation name and time limit for the reservation.

    +

    Note that this is one of the few times where salloc is used instead of srun to launch the job, since we'll be launching multiple instances of pvserver using srun inside the job allocation in a later step. +In previous versions of Slurm (prior to 20.11) you would use srun instead of salloc, but that behavior has been deprecated due to changes in the way Slurm handles job steps inside an allocation. +The old "srun-then-srun" behavior may be replicated using the srun --overlap flag (see man srun and Slurm documentation for details), but the 'salloc-then-srun' construct works quite well and is what we'd recommend in this case for ease of use.

    +

    (Otherwise, for interactive jobs that just require one process on one node, the "salloc-then-srun" construct isn't necessary at all; for that type of job you may just use srun -A <account> -t <time> --pty $SHELL to land on a compute node and run your software as per normal, without needing an srun in front.)

    +

    To reserve the computational resources on Eagle/Kestrel:

    +
    salloc -A <alloc_name> -t <time_limit>
    +
    +

    where <alloc_name> will be replaced with the allocation name you wish to charge your time to and <time_limit> is the amount of time you're reserving the nodes for. +At this point, you may want to copy the name of the node that the Slurm scheduler assigns you (it will look something like r1i0n10, r4i3n3, etc., and follow immediately after the "@" symbol at the command prompt ) as we'll need it in Step 3.

    +

    In the example above, we default to requesting only a single node which limits the maximum number of ParaView server processes we can launch to the maximum number of cores on a single Eagle node (on Eagle, this is 36) or Kestrel node (on Kestrel, this is 104).
    +If you intend to launch more ParaView server processes than this, you'll need to request multiple nodes with your salloc command.

    +
    salloc -A <alloc_name> -t <time_limit> -N 2
    +
    +

    where the -N 2 option specifies that two nodes be reserved, which means the maximum number of ParaView servers that can be launched in Step 2 is 36 x 2 = 72 (Eagle) 104 x 2 = 208 (Kestrel).
    +Although this means you'll be granted multiple nodes with multiple names, the one to copy for Step 3 is still the one immediately following the "@" symbol.
    +See the table of recommended workload distributions in Step 2 for more insight regarding the number of nodes to request.

    +
  2. +
  3. +

    Start ParaView Server

    +

    After reserving the compute nodes, load the ParaView module with

    +
    module load paraview
    +
    +

    Next, start the ParaView server with another call to the Slrum srun directive

    +
    srun -n 8 pvserver --force-offscreen-rendering
    +
    +

    In this example, the ParaView server will be started on 8 processes.
    +The --force-offscreen-rendering option is present to ensure that, where possible, CPU-intensive filters and rendering calculations will be performed server-side (i.e., on the Eagle/Kestrel compute nodes) and not on your local machine.
    +Remember that the maximum number of ParaView server processes that can be launched is limited by the amount of nodes reserved in Step 1.
    +Although every dataset may be different, ParaView offers the following recommendations for balancing grid cells to processors.

    + + + + + + + + + + + + + + + + + + + + +
    Grid TypeTarget Cells/ProcessMax Cells/Process
    Structured Data5-10 M20 M
    Unstructured Data250-500 K1 M
    +

    So for example, if you have data stored in an unstructured mesh with 6 M cells, you'd want to aim for between 12 and 24 ParaView server processes, which easily fits on a single Eagle or Kestrel node.
    +If the number of unstructured mesh cells was instead around 60 M, you'd want to aim for 120 to 240 processes, which means requesting a minimum of 4 eagle nodes at the low end (36 x 4 = 144) or 2 Kestrel nodes.
    +Note, this 4-node/2-nodes request may remain in the queue longer while the scheduler looks for resources, so depending on your needs, it may be necessary to factor queue times into your optimal cells-per-process calculation.

    +

    Note: The --server-port=<port> option may be used with pvserver if you wish to use a port other than 11111 for Paraview. +You'll need to adjust the port in the SSH tunnel and tell your Paraview client which port to use, as well. +See the following sections for details.

    +
  4. +
  5. +

    Create SSH Tunnel

    +

    Next, we'll create what's called an SSH tunnel to connect your local desktop to the compute node(s) you reserved in Step 1.
    +This will allow your local installation of ParaView to interact with files stored remotely on Eagle/Kestrel.
    +In a new terminal window, execute the following line of code on your own computer:

    +

    For Kestrel: +

    ssh -L 11111:<node_name>:11111 <user_name>@kestrel.hpc.nrel.gov
    +
    +For Eagle: +
    ssh -L 11111:<node_name>:11111 <user_name>@eagle.hpc.nrel.gov
    +

    +

    where <node_name> is the node name you copied in Step 1 and <user_name> is your HPC username.

    +

    Note that if you changed the default port to something other than 11111 (see the previous section) you'll need to change the port settings in your SSH tunnel, as well. +The SSH command construct above follows the format of <local_port>:<node_name>:<remote_port>. +The <local_port> is the "beginning" of the tunnel on your computer, and is often the same as the "end" port of the tunnel, though this is not required. +You may set this to anything convenient to you, but you will need to tell your Paraview client the right port if you change it (see the next section for details.) is the port on the Eagle/Kestrel compute node where pvserver is running. +The default for pvserver is 11111, but if you changed this with pvserver --server-port= flag, you'll need to change in your ssh command to match.

    +
  6. +
  7. +

    Connect ParaView Client

    +

    Now that the ParaView server is running on a compute node and your desktop is connected via the SSH tunnel, you can open ParaView as usual.
    +From here, click the "Connect" icon or File > Connect.
    +Next, click the "Add Server" button and enter the following information.

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameValue
    NameEagle HPC or Kestrel HPC
    Server TypeClient/Server
    Hostlocalhost
    Port11111
    +

    Only the last three fields, Server Type, Host, and Port, are strictly necessary (and many of them will appear by default) while the Name field can be any recognizable string you wish to associate with this connection.
    +When these 4 fields have been entered, click "Configure" to move to the next screen, where we'll leave the Startup Type set to "Manual".
    +Note that these setup steps only need to be completed the first time you connect to the ParaView server, future post-processing sessions will require only that you double click on this saved connection to launch it.

    +

    When finished, select the server just created and click "Connect".
    +The simplest way to confirm that the ParaView server is running as expected is to view the Memory Inspector toolbar (View > Memory Inspector) where you should see a ParaView server for each process started in Step 2 (e.g., if -n 8 was specified, processes 0-7 should be visible).

    +

    That's it! You can now File > Open your data files as you normally would, but instead of your local hard drive you'll be presented with a list of the files stored on Eagle or Kestrel.

    +
  8. +
+

General Tips#

+
    +
  • The amount of time you can spend in a post-processing session is limited by the time limit specified when reserving the compute nodes in Step 1. If saving a large time series to a video file, your reservation time may expire before the video is finished. Keep this in mind and make sure you reserve the nodes long enough to complete your job.
  • +
  • Adding more parallel processes in Step 2, e.g., -n 36, doesn't necessarily mean you'll be splitting the data into 36 blocks for each operation. ParaView has the capability to use 36 parallel processes, but may use many fewer as it calculates the right balance between computational power and the additional overhead of communication between processors.
  • +
+

High-quality Rendering With ParaView#

+

How to use ParaView in batch mode to generate single frames and animations on Eagle/Kestrel

+

+

Building PvBatch Scripts in Interactive Environments#

+
    +
  1. +

    Begin by connecting to an Eagle or Kestrel login node:

    +
    ssh {username}@eagle.hpc.nrel.gov
    +
    +or
    +
    +ssh {username}@kestrel.hpc.nrel.gov
    +
    +
  2. +
  3. +

    Request an interactive compute session for 60 minutes):

    +
    salloc -A {allocation} -t 60
    +
    +

    Note: Slurm changes in January 2022 resulted in the need to use salloc to start your interactive session, since we'll be +running pvbatch on the compute node using srun in a later step. This "srun-inside-an-salloc" supercedes +the previous Slurm behavior of "srun-inside-an-srun", which will no longer work.

    +
  4. +
  5. +

    Once the session starts, load the appropriate modules:

    +
    module purge
    +module load paraview/osmesa
    +
    +

    Note: In this case, we select the paraview/server module as opposed to the default ParaView build, +as the server version is built for rendering using offscreen methods suitable for compute nodes.

    +
  6. +
  7. +

    and start your render job:

    +
    srun -n 1 pvbatch --force-offscreen-rendering render_sphere.py
    +
    +

    where render_sphere.py is a simple ParaView Python script to add a sphere source and +save an image.

    +
  8. +
+

Transitioning to Batch Post-Processing#

+

Tweaking the visualization options contained in the pvrender.py file inevitably requires some amount of trial +and error and is most easily accomplished in an interactive compute session like the one outlined above. Once +you feel that your script is sufficiently automated, you can start submitting batch jobs that require no user interaction.

+
    +
  1. +

    Prepare your script for sbatch. A minimal example of a batch script named batch_render.sh could look like:

    +
    #!/bin/bash
    +
    +#SBATCH --account={allocation}
    +#SBATCH --time=60:00
    +#SBATCH --job-name=pvrender
    +#SBATCH --nodes=2
    +
    +module purge
    +module load paraview/$version-server
    +
    +srun -n 1 pvbatch --force-offscreen-rendering render_sphere.py 1 &
    +srun -n 1 pvbatch --force-offscreen-rendering render_sphere.py 2 &
    +srun -n 1 pvbatch --force-offscreen-rendering render_sphere.py 3 &
    +
    +wait
    +
    +

    where we run multiple instances of our dummy sphere example, highlighting that different options can be +passed to each to post-process a large batch of simulated results on a single node. Note also that for more +computationally intensize rendering or larger file sizes (e.g., tens of millions of cells) the option -n 1 +option can be set as suggested in the client-server guide.

    +
  2. +
  3. +

    Submit the job and wait:

    +
    sbatch batch_render.sh
    +
    +
  4. +
+

Tips on Creating the PvBatch Python Script#

+

Your ParaView python script can be made in a number of ways. The easiest +is to run a fresh session of ParaView (use version 5.x on your local +machine) and select "Tools→Start Trace," then "OK". Perform all the +actions you need to set your scene and save a screenshot. Then select +"Tools → Stop Trace" and save the resulting python script (we will use +render_sphere.py in these examples). + 

+

Here are some useful components to add to your ParaView Python script.

+
    +
  • +

    Read the first command-line argument and use it to select a data + file to operate on.

    +
    import sys
    +doframe = 0
    +if len(sys.argv) > 1:
    +    doframe = int(sys.argv[1])
    +infile = "output%05d.dat" % doframe
    +
    +

    Note that pvbatch will pass any arguments after the script name to +the script itself. So you can do the following to render frame 45:

    +
    srun -n 1 pvbatch --force-offscreen-rendering render_sphere.py 45
    +
    +

    You could programmatically change this value inside the batch_render.sh script, your script would need +to iterate using something like:

    +
    for frame in 45 46 47 48
    +do
    +    srun -n 1 pvbatch --force-offscreen-rendering render_sphere.py $frame
    +done
    +
    +
  • +
+ + +
    +
  • +

    Set the output image size to match FHD or UHD standards:

    +
    renderView1.ViewSize = [3840, 2160]
    +renderView1.ViewSize = [1920, 1080]
    +
    +
  • +
  • +

    Don't forget to actually render the image!

    +
    pngname = "image%05d.png" % doframe
    +SaveScreenshot(pngname, renderView1)
    +
    +
  • +
+

Insight Center#

+

ParaView is supported in the Insight Center's immersive virtual environment. +Learn about the Insight Center.

+

For assistance, contact Kenny Gruchalla.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Viz_Analytics/virtualgl_fastx/index.html b/Documentation/Viz_Analytics/virtualgl_fastx/index.html new file mode 100644 index 000000000..744e3cc9d --- /dev/null +++ b/Documentation/Viz_Analytics/virtualgl_fastx/index.html @@ -0,0 +1,5314 @@ + + + + + + + + + + + + + + + + + + + + + + + VirtualGL and FastX - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Using VirtualGL and FastX#

+

VirtualGL and FastX provide remote desktop and visualization capabilities for graphical applications.

+

Remote Visualization on Kestrel#

+

In addition to standard ssh-only login nodes, Kestrel is also equipped with several specialized Data Analysis and Visualization (DAV) login nodes, intended for HPC applications on Kestrel that require a graphical user interface.

+
+

Note

+

DAV FastX nodes are a limited resource and not intended as a general-purpose remote desktop. We ask that you please restrict your usage to only HPC allocation-related work and/or visualization software that requires an HPC system.

+
+

There are seven internal DAV nodes on Kestrel available only to NREL users on the NREL VPN, on campus, or via the HPC VPN that are accessible via round-robin at kestrel-dav.hpc.nrel.gov. The individual nodes are named kd1 through kd7.hpc.nrel.gov.

+

There is also one node that is ONLY accessible by external (non-NREL) users available at kestrel-dav.nrel.gov. This address will connect to the node kd8, and requires both password and OTP for login.

+

All Kestrel DAV nodes have 104 CPU cores (2x 52-core Intel Xeon Sapphire Rapids CPUs), 256GB RAM, 2x 48GB NVIDIA A40 GPUs, and offer a Linux desktop (via FastX) with visualization capabilities, optional VirtualGL, and standard Linux terminal applications.

+

DAV nodes are shared resources that support multiple simultaneous users. CPU and RAM usage is monitored by automated software called Arbiter, and high usage may result in temporary throttling of processes.

+

VirtualGL#

+

VirtualGL is an open-source package that gives any Linux remote display software the ability to run OpenGL applications with full 3D hardware acceleration.

+

The traditional method of displaying graphics applications to a remote X server (indirect rendering) supports 3D hardware acceleration, but this approach causes all of the OpenGL commands and 3D data to be sent over the network to be rendered on the client machine. With VirtualGL, the OpenGL commands and 3D data are redirected to a 3D graphics accelerator on the application server, and only the rendered 3D images are sent to the client machine. VirtualGL "virtualizes" 3D graphics hardware, allowing users to access and share large-memory visualization nodes with high-end graphics processing units (GPUs) from their energy-efficient desktops.

+

FastX#

+

FastX provides a means to use a graphical desktop remotely. By connecting to a FastX session on a DAV node, users can run graphical applications with a similar experience to running on their workstation. Another benefit is that you can disconnect from a FastX connection, go to another location and reconnect to that same session, picking up where you left off.

+

Connecting to DAV Nodes Using FastX#

+

NREL users may use the web browser or the FastX desktop client. External users must use the FastX desktop client, or connect to the HPC VPN for the web client.

+
+NREL On-Site and VPN Users +

Using a Web Browser#

+

Launch a web browser on your local machine and connect to https://kestrel-dav.hpc.nrel.gov. After logging in with your HPC username/password you will be able to launch a FastX session by choosing a desktop environment of your choice. Either GNOME or XFCE are available for use.

+

Using the Desktop Client#

+

Download the Desktop Client and install it on your local machine, then follow these instructions to connect to one of the DAV nodes.

+

Step 1:

+

Launch the FastX Desktop Client.

+

Step 2:

+

Add a profile using the + button on the right end corner of the tool using the SSH protocol. +image

+

Step 3:

+

Give your profile a name and enter the settings...

+

Address/URL: kestrel-dav.hpc.nrel.gov

+

OR you may use the address of an individual kd or ed node if you would like to resume a previous session.

+

Username:

+

...and then save the profile.

+

Step 4:

+

Once your profile is saved, you will be prompted for your password to connect.

+

Step 5:

+

If a previous session exists, click (double click if in "List View") on current session to reconnect.

+

OR

+

Step 5a:

+

Click the PLUS (generally in the upper right corner of the session window) to add a session and continue to step 6.

+

Step 6:

+

Select a Desktop environment of your choice and click OK to launch. +

+
+
+Off-Site or Remote Users +

Remote users must use the Desktop Client via SSH for access. NREL Multifactor token (OTP) required.

+

Download the Desktop Client and install it on your local machine, then follow these instructions to connect to one of the DAV nodes.

+

Step 1:

+

Launch the FastX Desktop Client.

+

Step 2:

+

Add a profile using the + button on the right end corner of the tool using the SSH protocol. +Alt text

+

Step 3:

+

Give your profile a name and enter the settings...

+

Host: kestrel-dav.nrel.gov

+

Port: 22

+

Username:

+

...and then save the profile.

+

+

Step 4:

+

Once your profile is saved. You will be prompted for your password+OTP_token (your multifactor authentication code) to connect.

+

+

Step 5:

+

Select a Desktop environment of your choice and click OK.

+

+
+

Launching OpenGL Applications#

+

You can now run applications in the remote desktop. You can run X applications normally; however, to run hardware-accelerated OpenGL applications, you must run the application prefaced by the vglrun command. +

$ module load matlab
+$ vglrun matlab
+

+

Choosing a GPU on Kestrel#

+

Kestrel DAV nodes have two NVIDIA A40 GPUs. Using vglrun will default to the first GPU available, which may leave one GPU overutilized while the second is underutilized.

+

To run your OpenGL software with a GPU of your choosing, you may add the -d <gpu> flag to vglrun to pick a GPU. The first GPU is referred to as 0:0, the second as 0:1. For example, to run Matlab on the second GPU:

+

vglrun -d 0:1 matlab

+

to run Ansys on the first GPU:

+

vglrun -d 0:0 ansys

+

Download FastX Desktop Client#

+ + + + + + + + + + + + + + + + + + + + + +
Operating SystemInstaller
MacDownload
LinuxDownload
WindowsDownload
+

Multiple FastX Sessions#

+

FastX sessions may be closed without terminating the session and resumed at a later time. However, since there is a +license-based limit to the number of concurrent users, please fully log out/terminate your remote desktop session when +you are done working and no longer need to leave processes running. Avoid having remote desktop sessions open on multiple +nodes that you are not using, or your sessions may be terminated by system administrators to make licenses available for +active users.

+

Reattaching FastX Sessions#

+

Connections to the DAV nodes via kestrel-dav.hpc.nrel.gov will connect you to a random node. To resume a session that you have suspended, take note of the node your session is running on (kd1, kd2, kd3, kd4, kd5, kd6, or kd7) before you close the FastX client or browser window, and you may directly access that node when you are ready to reconnect at e.g. kd#.hpc.nrel.gov in the FastX client or through your web browser at https://kd#.hpc.nrel.gov.

+

Troubleshooting#

+

Could not connect to session bus: Failed to connect to socket /tmp/dbus-XXX: Connection refused#

+

This error is usually the result of a change to the default login environment, often by an alteration to ~/.bashrc by +altering your $PATH, or by configuring Conda to launch into a (base) or other environment immediately upon login.

+

For changes to your $PATH, be sure to prepend any changes with $PATH so that the default system paths are included before +any custom changes that you make. For example: $PATH=$PATH:/home/username/bin instead of $PATH=/home/username/bin/:$PATH.

+

For conda users, the command conda config --set auto_activate_base false will prevent conda from +launching into a base environment upon login.

+

No Free Licenses#

+

FastX has a limited number of licenses for concurrent usage, so please remember to log out of your X session AND out of FastX when you are done working. If you receive a "no free licenses" error when trying to start a new session, please contact hpc-help@nrel.gov for assistance.

+

How to Get Help#

+

Please contact the HPC Helpdesk at hpc-help@nrel.gov if you have any questions, technical issues, or receive a "no free licenses" error.

+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/Viz_Analytics/visit/index.html b/Documentation/Viz_Analytics/visit/index.html new file mode 100644 index 000000000..d15b83861 --- /dev/null +++ b/Documentation/Viz_Analytics/visit/index.html @@ -0,0 +1,4958 @@ + + + + + + + + + + + + + + + + + + + + + + + VisIT - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

VisIT#

+

VisIT is a free interactive parallel visualization and graphical analysis tool for viewing scientific data on Unix and PC platforms.

+

With VisIt, users can quickly generate visualizations from their data, animate them through time, manipulate them, and save the resulting images for presentations. It contains a rich set of visualization features so that you can view your data in a variety of ways. +Also, it can be used to visualize scalar and vector fields defined on two- and three-dimensional (2D and 3D) structured and unstructured meshes.

+

VisIt was designed to handle very large data set sizes in the terascale range, and yet can also handle small data sets in the kilobyte range.

+

For more information on VisIt, see their Lawrence Livermore National Laboratory website.

+

Using VisIT#

+
+Note: +

VisIT Client/Sever mode is only supported on Eagle.

+
+

VisIt features a robust remote visualization capability. To enable remote visualization (client/server), follow these steps.

+
    +
  1. On Eagle, add: +
    module use /nopt/nrel/apps/modules/centos74/modulefiles
    +module load visit/2.13.3-mesa
    +
    + to your .bashrc file in the home directory
  2. +
  3. On a local machine, download VisIt 2.13.3 for the appropriate platform from the Lawrence Livermore National Laboratory VisIt site.
  4. +
  5. The installed profile can be viewed and edited by clicking on 'Options → Host profiles ... '. A remote host profile should appear. +Alt text +Alt text
  6. +
  7. Go to Launch Profiles. +Alt text
  8. +
  9. Go to the Parallel tab, set up the job parameters, select sbatch/srun for ‘Parallel launch method’ and then click Apply. +Alt text
  10. +
  11. To connect to VisIt, go to File → Open file +Alt text
  12. +
  13. In the Host option, click on the drop down menu and choose the host Eagle_short +Alt text
  14. +
  15. It will display a window with an option to change the username, if the username is not correct, then click on change username. This is your HPC username
  16. +
  17. Type your HPC username and click Confirm username.
  18. +
  19. Enter your HPC password and click OK.
  20. +
  21. Wait for visit client to connect to the server on Eagle.
  22. +
  23. Enter the directory where your data is located into Path. +Alt text
  24. +
  25. Once you choose your data file, VisIt will display the job information; you can change them and then click OK.
  26. +
  27. VisIt will display the following window.
  28. +
  29. Once the job is submitted, you can start applying visualization filters to your data. For the job information:
      +
    • Bank / Account: enter the project name you are charging to.
    • +
    • Time limit: enter the time you need for the job in the following format H:M:S.
    • +
    +
  30. +
+ + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/getting_started/index.html b/Documentation/getting_started/index.html new file mode 100644 index 000000000..142b6e998 --- /dev/null +++ b/Documentation/getting_started/index.html @@ -0,0 +1,5036 @@ + + + + + + + + + + + + + + + + + + + + + + + Getting Started - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Getting Started

+ +

In order to use the NREL HPC systems, you will need to request a user account. For a guide to accessing our systems, please see our User Basics guide

+

Below we've collected answers for many of the most frequently asked questions.

+

Frequently Asked Questions#

+
+What is high-performance computing? +

Generally speaking, HPC infrastructure is coordinating many discrete units capable +of independent computation to cooperate on portions of a task to complete far more +computation in a given amount of time than any of the units could do individually. +In other words, an HPC system is lots of individual computers working together.

+
+
+Is NREL HPC related to the Information Technology Services Desk? +

HPC Operations and Information Technology Services (ITS) are separate groups with +different responsibilities. ITS will handle issues with your workstation or any other +digital device you are issued by NREL. HPC Operations will assist with issues regarding +HPC systems. Note that your NREL HPC account is a different account from your ITS credentials +that you use to login to your workstation, e-mail, and the many other IT services +provided by the Service Desk.

+
+
+What are project allocations? +

Over the fiscal year, there is a given amount of time each computer in the HPC system(s) +can be expected to be operational and capable of performing computation. HPC project +allocations allocate a portion of the total assumed available computing time. The sum of all awarded project +allocations' compute-time approximates the projected availability of the entire system. +Project allocations are identified by a unique "handle" which doubles as a Linux account +under which you submit HPC jobs related to the project to the job scheduler. Learn +more about requesting an allocation.

+
+
+How can I access NREL HPC systems? +

Begin by requesting an NREL HPC account. +Then, consult our guide on how to connect to NREL HPC systems.

+
+
+What is a one-time password (OTP) token? +

OTP tokens are a means of two-factor authentication by combining a temporary (usually +lasting 60 seconds) token to use along with your account password. Tokens are generated +using the current time stamp and a secure hashing algorithm. Note that you only need an +OTP to access systems outside the NREL firewall, namely if you are an external collaborator. +NREL employees can be on-site or use a VPN to access HPC systems via the *.hpc.nrel.gov domain.

+
+
+What is a virtual private network (VPN)? +

VPNs simulate being within a firewall (which is an aggressive filter on inbound network +traffic) by encapsulating your traffic in a secure channel that funnels through the +NREL network. While connected to a VPN, internal network domains such as *.hpc.nrel.gov +can be accessed without secondary authentication (as the VPN itself counts as a secondary +authentication). NREL employees may use the NREL VPN while external collaborators +may use the NREL HPC VPN using their OTP token. This provides the convenience of not +having to continually type in your current OTP token when accessing multiple systems +in a single session.

+
+
+What is a "job?" +

This is the general term used for any task submitted to the HPC systems to be queued +and wait for available resources to be executed. Jobs vary in how computationally +intensive they are.

+
+
+What is a "node?" +

A node is a complete, independent system with its own operating system and resources, +much like your laptop or desktop. HPC nodes are typically designed to fit snugly in +tight volumes, but in principle you could convert several laptops into a cluster, +and they would then be "nodes."

+
+
+What are "login" and "compute" nodes? +

Login nodes are the immediate systems your session is opened on once you successfully +authenticate. They serve as preparation systems to stage your user environment and +launch jobs. These login nodes are shared resources, and because of that the HPC team +employs a program called Arbiter2 to ensure that these resources aren't being used +inappropriately (see 'What is proper NREL HPC login node etiquette' for more detail). +Compute nodes are where your jobs get computed when submitted to the scheduler. +You gain exclusive access to compute nodes that are executing your jobs, whereas there +are often many users logged into the login nodes at any given time.

+
+
+What is proper NREL HPC login node etiquette? +

As mentioned above, login nodes are a shared resource, and are subject to process +limiting based on usage. If you do computationally intensive work on these systems, it will unfairly +occupy resources and make the system less responsive for other users. Please reserve +your computationally intensive tasks (especially those that will fully utilize CPU +cores) for jobs submitted to compute nodes. Offenders of login node abuse will be +admonished accordingly. For more information please see our policy on what +constitutes inappropriate use.

+
+
+What is "system time?" +

System time is a regularly occurring interval of time during which NREL HPC systems +are taken offline for necessary patches, updates, software installations, and anything +else to keep the systems useful, updated, and secure. You will not be able to access +the system or submit jobs during system times. A reminder announcement is sent out prior to every system time detailing +what changes will take place, and includes an estimate of how long the system time will be. +You can check the system status page if you are ever +unsure if an NREL HPC system is currently down for system time.

+
+
+How can I more closely emulate a Linux/macOS workflow on my Windows workstation? +

As you become familiar with navigating the HPC Linux systems you may come to prefer +to use the same command-line interfaces locally on your workstation to keep your workflow +consistent. There are many terminal emulators that can be used on Windows which provide +the common Linux and macOS command-line interface. The official Linux command-line +emulator for Windows is known as the Windows Subsystem for Linux. +Other recommended terminal applications include: Git Bash, Git for WIndows, +Cmder, and MYSYS2. Note that PuTTY is not a terminal emulator, +it is only an SSH client. The applications listed above implement an ssh command, +which mirrors the functionality of PuTTY.

+
+
+What is the secure shell (SSH) protocol? +

Stated briefly, the SSH protocol establishes an encrypted channel to share various +kinds of network traffic. Not to be confused with the ssh terminal command or +SSH clients which are applications that implement the SSH protocol in software to +create secure connections to remote systems.

+
+
+Why aren't my jobs running? +

Good question! There may be hundreds of reasons why. Please contact HPC support +with a message containing as many relevant details as you can provide so we are more +likely to be able to offer useful guidance (such as what software you're using, how +you are submitting your job, what sort of data you are using, how you are setting +up your software environment, etc.).

+
+
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/help/index.html b/Documentation/help/index.html new file mode 100644 index 000000000..4fda15437 --- /dev/null +++ b/Documentation/help/index.html @@ -0,0 +1,4986 @@ + + + + + + + + + + + + + + + + + + + + + + + Help and Support - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Help and Support

+ +

Get quick access to help resources for NREL's high-performance computing (HPC) systems.

+

Support Contact Information#

+

HPC-Help@nrel.gov - Email for general HPC questions, technical troubleshooting, account requests, and software installation assistance. In the email, please include your username, the system name, project handle, and any information that will help us identify and troubleshoot the issue.

+

HPC-Requests@nrel.gov - Email for HPC questions related to allocation requests and to request increases/decreases to allocation units or storage.

+

Microsoft Teams#

+

Each system has a Microsoft Teams channel where users can collaborate and post questions. The Swift and Vermilion Team chats are one of the primary ways we communicate announcements and status updates for these systems.

+

We update the team channel members annually based on HPC project members. However, if we missed you and you would like to join, please use the following instructions:

+
+Internal Users (NREL) +
    +
  1. In Teams, click on the "Teams" icon in the far left navigation bar.
  2. +
  3. Click "Join or create a team" in the lower left corner.
  4. +
  5. In the "Search teams" bar in the upper far right corner, type the name of the channel you need to join (e.g., "Vermilion" or "Swift") and hit return.
  6. +
  7. Click Join.
  8. +
+
+
+External Users (Non-NREL) +
    +
  1. You will receive a welcome email from the team owner with information about the team. Click on accept.
  2. +
  3. If you have never created a MS Office 365 account, you will be prompted to create one. If you already have a MS Office 365 account, login.
  4. +
  5. The first time you log in, you will be prompted to set up Microsoft Authenticator or other authenticator app.
  6. +
  7. From your mobile device, download and install the app from the Apple Store (for iOS) or the Google Play Store (for Android) and open the app.
      +
    • On your mobile device, you will be prompted to allow notifications. Select Allow.
    • +
    • On your mobile device, click OK on the screen for what information Microsoft gathers.
    • +
    • Click Skip on the "Add personal account" page.
    • +
    • Click Skip on the "Add non-Microsoft account" page.
    • +
    • Click Add Work Account on the "Add work account" page.
    • +
    • Click OK to allow access to the camera.
    • +
    +
  8. +
  9. Going forward, anytime you login, you will get a prompt on your phone to authenticate.
  10. +
+
+

Additional Resources#

+

HPC Website - Resources to get access to systems, basics on getting started with HPC, accounts and allocation information, and refer to our policies.

+

Computational Sciences Tutorials Team: Staff in the Computational Science Center host multiple tutorials and workshops on various computational science topics throughout the year, such as Visualization, Cloud, HPC, and others. The team has a calendar of the upcoming training schedule and past slide decks and recordings. Please use the above instructions if you would like to join the team.

+

Code Repository: The repository contains a collection of code examples, executables, and utilities. It is open for contributions from the user community.

+

HPC Office Hours: The HPC technical staff holds live office hours on alternating Tuesdays and Thursdays. Bring your HPC related questions for real-time discussion.

+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Documentation/index.html b/Documentation/index.html new file mode 100644 index 000000000..4c2f301d4 --- /dev/null +++ b/Documentation/index.html @@ -0,0 +1,4907 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Documentation Home - NREL HPC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ + + +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Documentation Home

+ +

Welcome to the central source of user-contributed documentation for NREL's HPC systems. This repository is open to both NREL and non-NREL HPC users. You can browse the documentation here, or start contributing by visiting the repository in Git for more information.

+

Where to Begin#

+

Please use the navigation bar on the left to explore the available documentation by category.

+

Highlights#

+ +

Other NREL Documentation Resources#

+
    +
  • The NREL HPC Website is the home of Advanced Computing at NREL
  • +
  • Our Github Repository for specific application examples, scripts, workshop content, the contributor guide, and more.
  • +
  • The gh-pages branch (this site) is also open for contribution.
  • +
+ + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+
+
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/_includes/analytics.html b/_includes/analytics.html new file mode 100644 index 000000000..701b76cca --- /dev/null +++ b/_includes/analytics.html @@ -0,0 +1,8 @@ + + diff --git a/_includes/calendar.html b/_includes/calendar.html new file mode 100644 index 000000000..4a5d72dae --- /dev/null +++ b/_includes/calendar.html @@ -0,0 +1 @@ + diff --git a/assets/images/FastX/eagle-dav-replacement-mate-interface-step5-offsite.png b/assets/images/FastX/eagle-dav-replacement-mate-interface-step5-offsite.png new file mode 100644 index 000000000..20786206a Binary files /dev/null and b/assets/images/FastX/eagle-dav-replacement-mate-interface-step5-offsite.png differ diff --git a/assets/images/FastX/eagle-dav-ssh-login-fastx-cleaned-step3.png b/assets/images/FastX/eagle-dav-ssh-login-fastx-cleaned-step3.png new file mode 100644 index 000000000..ec005b381 Binary files /dev/null and b/assets/images/FastX/eagle-dav-ssh-login-fastx-cleaned-step3.png differ diff --git a/assets/images/FastX/eagle-dav-step4-offsite.png b/assets/images/FastX/eagle-dav-step4-offsite.png new file mode 100644 index 000000000..5703bd6b9 Binary files /dev/null and b/assets/images/FastX/eagle-dav-step4-offsite.png differ diff --git a/assets/images/FastX/fastx-installer-image-1.png b/assets/images/FastX/fastx-installer-image-1.png new file mode 100644 index 000000000..cc39f2f1a Binary files /dev/null and b/assets/images/FastX/fastx-installer-image-1.png differ diff --git a/assets/images/FastX/kestrel-dav-mate-gnome-step5.png b/assets/images/FastX/kestrel-dav-mate-gnome-step5.png new file mode 100644 index 000000000..ae5307e95 Binary files /dev/null and b/assets/images/FastX/kestrel-dav-mate-gnome-step5.png differ diff --git a/assets/images/FastX/kestrel-dav-ssh-login-fastx-step3-external.png b/assets/images/FastX/kestrel-dav-ssh-login-fastx-step3-external.png new file mode 100644 index 000000000..32bf917ae Binary files /dev/null and b/assets/images/FastX/kestrel-dav-ssh-login-fastx-step3-external.png differ diff --git a/assets/images/FastX/kestrel-dav-ssh-login-fastx-step3.png b/assets/images/FastX/kestrel-dav-ssh-login-fastx-step3.png new file mode 100644 index 000000000..aba460cb6 Binary files /dev/null and b/assets/images/FastX/kestrel-dav-ssh-login-fastx-step3.png differ diff --git a/assets/images/FastX/xfce-interface-cleaned-step5.png b/assets/images/FastX/xfce-interface-cleaned-step5.png new file mode 100644 index 000000000..3d71390e0 Binary files /dev/null and b/assets/images/FastX/xfce-interface-cleaned-step5.png differ diff --git a/assets/images/Julia/Julia-Calling-Python-C-Tutorial_29_0.svg b/assets/images/Julia/Julia-Calling-Python-C-Tutorial_29_0.svg new file mode 100644 index 000000000..4e8ab4eaa --- /dev/null +++ b/assets/images/Julia/Julia-Calling-Python-C-Tutorial_29_0.svg @@ -0,0 +1,314 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/images/Julia/Julia-Calling-Python-C-Tutorial_30_0.svg b/assets/images/Julia/Julia-Calling-Python-C-Tutorial_30_0.svg new file mode 100644 index 000000000..fa11200ee --- /dev/null +++ b/assets/images/Julia/Julia-Calling-Python-C-Tutorial_30_0.svg @@ -0,0 +1,202 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/images/Profiling/DDT-1.png b/assets/images/Profiling/DDT-1.png new file mode 100644 index 000000000..5b51bfbe3 Binary files /dev/null and b/assets/images/Profiling/DDT-1.png differ diff --git a/assets/images/Profiling/DDT-2.png b/assets/images/Profiling/DDT-2.png new file mode 100644 index 000000000..0f28ea686 Binary files /dev/null and b/assets/images/Profiling/DDT-2.png differ diff --git a/assets/images/Profiling/DDT-3.png b/assets/images/Profiling/DDT-3.png new file mode 100644 index 000000000..a5ec987df Binary files /dev/null and b/assets/images/Profiling/DDT-3.png differ diff --git a/assets/images/Profiling/MAP-1.png b/assets/images/Profiling/MAP-1.png new file mode 100644 index 000000000..8c8229d86 Binary files /dev/null and b/assets/images/Profiling/MAP-1.png differ diff --git a/assets/images/Profiling/MAP-2.png b/assets/images/Profiling/MAP-2.png new file mode 100644 index 000000000..b04be390a Binary files /dev/null and b/assets/images/Profiling/MAP-2.png differ diff --git a/assets/images/Profiling/MAP-3.png b/assets/images/Profiling/MAP-3.png new file mode 100644 index 000000000..a14119b94 Binary files /dev/null and b/assets/images/Profiling/MAP-3.png differ diff --git a/assets/images/Profiling/MAP-4.png b/assets/images/Profiling/MAP-4.png new file mode 100644 index 000000000..6b9848d4a Binary files /dev/null and b/assets/images/Profiling/MAP-4.png differ diff --git a/assets/images/Profiling/MAP-5.png b/assets/images/Profiling/MAP-5.png new file mode 100644 index 000000000..6ca45808d Binary files /dev/null and b/assets/images/Profiling/MAP-5.png differ diff --git a/assets/images/Profiling/MAP-6.png b/assets/images/Profiling/MAP-6.png new file mode 100644 index 000000000..f4929b76c Binary files /dev/null and b/assets/images/Profiling/MAP-6.png differ diff --git a/assets/images/Profiling/MAP-7.png b/assets/images/Profiling/MAP-7.png new file mode 100644 index 000000000..8c8229d86 Binary files /dev/null and b/assets/images/Profiling/MAP-7.png differ diff --git a/assets/images/Profiling/PR-1.png b/assets/images/Profiling/PR-1.png new file mode 100644 index 000000000..a6c13a731 Binary files /dev/null and b/assets/images/Profiling/PR-1.png differ diff --git a/assets/images/Profiling/PR-2.png b/assets/images/Profiling/PR-2.png new file mode 100644 index 000000000..de37360e1 Binary files /dev/null and b/assets/images/Profiling/PR-2.png differ diff --git a/assets/images/VASP/openmpscaling.png b/assets/images/VASP/openmpscaling.png new file mode 100644 index 000000000..fc1f92fd6 Binary files /dev/null and b/assets/images/VASP/openmpscaling.png differ diff --git a/assets/images/VASP/sharedscaling-192.png b/assets/images/VASP/sharedscaling-192.png new file mode 100644 index 000000000..764d8a582 Binary files /dev/null and b/assets/images/VASP/sharedscaling-192.png differ diff --git a/assets/images/VisIT/eagle-14.png b/assets/images/VisIT/eagle-14.png new file mode 100644 index 000000000..9fe2583f2 Binary files /dev/null and b/assets/images/VisIT/eagle-14.png differ diff --git a/assets/images/VisIT/eagle-5a.png b/assets/images/VisIT/eagle-5a.png new file mode 100644 index 000000000..c51496c79 Binary files /dev/null and b/assets/images/VisIT/eagle-5a.png differ diff --git a/assets/images/VisIT/eagle-5b.png b/assets/images/VisIT/eagle-5b.png new file mode 100644 index 000000000..8b1b545b0 Binary files /dev/null and b/assets/images/VisIT/eagle-5b.png differ diff --git a/assets/images/VisIT/eagle-6.png b/assets/images/VisIT/eagle-6.png new file mode 100644 index 000000000..e87174860 Binary files /dev/null and b/assets/images/VisIT/eagle-6.png differ diff --git a/assets/images/VisIT/eagle-8.png b/assets/images/VisIT/eagle-8.png new file mode 100644 index 000000000..150540b2d Binary files /dev/null and b/assets/images/VisIT/eagle-8.png differ diff --git a/assets/images/VisIT/eagle-9.png b/assets/images/VisIT/eagle-9.png new file mode 100644 index 000000000..ac626fc86 Binary files /dev/null and b/assets/images/VisIT/eagle-9.png differ diff --git a/assets/images/VisIT/eagle-software-visit-step7.png b/assets/images/VisIT/eagle-software-visit-step7.png new file mode 100644 index 000000000..51eed67f6 Binary files /dev/null and b/assets/images/VisIT/eagle-software-visit-step7.png differ diff --git a/assets/images/bw.png b/assets/images/bw.png new file mode 100644 index 000000000..45842308b Binary files /dev/null and b/assets/images/bw.png differ diff --git a/assets/images/favicon.png b/assets/images/favicon.png new file mode 100644 index 000000000..7d3490bbe Binary files /dev/null and b/assets/images/favicon.png differ diff --git a/assets/images/gpu_ai_benchmark.png b/assets/images/gpu_ai_benchmark.png new file mode 100644 index 000000000..5607d779c Binary files /dev/null and b/assets/images/gpu_ai_benchmark.png differ diff --git a/assets/images/output_4_0.png b/assets/images/output_4_0.png new file mode 100644 index 000000000..1585c6ffb Binary files /dev/null and b/assets/images/output_4_0.png differ diff --git a/assets/javascripts/bundle.83f73b43.min.js b/assets/javascripts/bundle.83f73b43.min.js new file mode 100644 index 000000000..43d8b70f6 --- /dev/null +++ b/assets/javascripts/bundle.83f73b43.min.js @@ -0,0 +1,16 @@ +"use strict";(()=>{var Wi=Object.create;var gr=Object.defineProperty;var Di=Object.getOwnPropertyDescriptor;var Vi=Object.getOwnPropertyNames,Vt=Object.getOwnPropertySymbols,Ni=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,ao=Object.prototype.propertyIsEnumerable;var io=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,$=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&io(e,r,t[r]);if(Vt)for(var r of Vt(t))ao.call(t,r)&&io(e,r,t[r]);return e};var so=(e,t)=>{var r={};for(var o in e)yr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Vt)for(var o of Vt(e))t.indexOf(o)<0&&ao.call(e,o)&&(r[o]=e[o]);return r};var xr=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var zi=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of Vi(t))!yr.call(e,n)&&n!==r&&gr(e,n,{get:()=>t[n],enumerable:!(o=Di(t,n))||o.enumerable});return e};var Mt=(e,t,r)=>(r=e!=null?Wi(Ni(e)):{},zi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var co=(e,t,r)=>new Promise((o,n)=>{var i=p=>{try{s(r.next(p))}catch(c){n(c)}},a=p=>{try{s(r.throw(p))}catch(c){n(c)}},s=p=>p.done?o(p.value):Promise.resolve(p.value).then(i,a);s((r=r.apply(e,t)).next())});var lo=xr((Er,po)=>{(function(e,t){typeof Er=="object"&&typeof po!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(Er,function(){"use strict";function e(r){var o=!0,n=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(k){return!!(k&&k!==document&&k.nodeName!=="HTML"&&k.nodeName!=="BODY"&&"classList"in k&&"contains"in k.classList)}function p(k){var ft=k.type,qe=k.tagName;return!!(qe==="INPUT"&&a[ft]&&!k.readOnly||qe==="TEXTAREA"&&!k.readOnly||k.isContentEditable)}function c(k){k.classList.contains("focus-visible")||(k.classList.add("focus-visible"),k.setAttribute("data-focus-visible-added",""))}function l(k){k.hasAttribute("data-focus-visible-added")&&(k.classList.remove("focus-visible"),k.removeAttribute("data-focus-visible-added"))}function f(k){k.metaKey||k.altKey||k.ctrlKey||(s(r.activeElement)&&c(r.activeElement),o=!0)}function u(k){o=!1}function d(k){s(k.target)&&(o||p(k.target))&&c(k.target)}function y(k){s(k.target)&&(k.target.classList.contains("focus-visible")||k.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),l(k.target))}function L(k){document.visibilityState==="hidden"&&(n&&(o=!0),X())}function X(){document.addEventListener("mousemove",J),document.addEventListener("mousedown",J),document.addEventListener("mouseup",J),document.addEventListener("pointermove",J),document.addEventListener("pointerdown",J),document.addEventListener("pointerup",J),document.addEventListener("touchmove",J),document.addEventListener("touchstart",J),document.addEventListener("touchend",J)}function te(){document.removeEventListener("mousemove",J),document.removeEventListener("mousedown",J),document.removeEventListener("mouseup",J),document.removeEventListener("pointermove",J),document.removeEventListener("pointerdown",J),document.removeEventListener("pointerup",J),document.removeEventListener("touchmove",J),document.removeEventListener("touchstart",J),document.removeEventListener("touchend",J)}function J(k){k.target.nodeName&&k.target.nodeName.toLowerCase()==="html"||(o=!1,te())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",L,!0),X(),r.addEventListener("focus",d,!0),r.addEventListener("blur",y,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var qr=xr((hy,On)=>{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var $a=/["'&<>]/;On.exports=Pa;function Pa(e){var t=""+e,r=$a.exec(t);if(!r)return t;var o,n="",i=0,a=0;for(i=r.index;i{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof It=="object"&&typeof Yr=="object"?Yr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof It=="object"?It.ClipboardJS=r():t.ClipboardJS=r()})(It,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return Ui}});var a=i(279),s=i.n(a),p=i(370),c=i.n(p),l=i(817),f=i.n(l);function u(V){try{return document.execCommand(V)}catch(A){return!1}}var d=function(A){var M=f()(A);return u("cut"),M},y=d;function L(V){var A=document.documentElement.getAttribute("dir")==="rtl",M=document.createElement("textarea");M.style.fontSize="12pt",M.style.border="0",M.style.padding="0",M.style.margin="0",M.style.position="absolute",M.style[A?"right":"left"]="-9999px";var F=window.pageYOffset||document.documentElement.scrollTop;return M.style.top="".concat(F,"px"),M.setAttribute("readonly",""),M.value=V,M}var X=function(A,M){var F=L(A);M.container.appendChild(F);var D=f()(F);return u("copy"),F.remove(),D},te=function(A){var M=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},F="";return typeof A=="string"?F=X(A,M):A instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(A==null?void 0:A.type)?F=X(A.value,M):(F=f()(A),u("copy")),F},J=te;function k(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?k=function(M){return typeof M}:k=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},k(V)}var ft=function(){var A=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},M=A.action,F=M===void 0?"copy":M,D=A.container,Y=A.target,$e=A.text;if(F!=="copy"&&F!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(Y!==void 0)if(Y&&k(Y)==="object"&&Y.nodeType===1){if(F==="copy"&&Y.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(F==="cut"&&(Y.hasAttribute("readonly")||Y.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if($e)return J($e,{container:D});if(Y)return F==="cut"?y(Y):J(Y,{container:D})},qe=ft;function Fe(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Fe=function(M){return typeof M}:Fe=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},Fe(V)}function ki(V,A){if(!(V instanceof A))throw new TypeError("Cannot call a class as a function")}function no(V,A){for(var M=0;M0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof D.action=="function"?D.action:this.defaultAction,this.target=typeof D.target=="function"?D.target:this.defaultTarget,this.text=typeof D.text=="function"?D.text:this.defaultText,this.container=Fe(D.container)==="object"?D.container:document.body}},{key:"listenClick",value:function(D){var Y=this;this.listener=c()(D,"click",function($e){return Y.onClick($e)})}},{key:"onClick",value:function(D){var Y=D.delegateTarget||D.currentTarget,$e=this.action(Y)||"copy",Dt=qe({action:$e,container:this.container,target:this.target(Y),text:this.text(Y)});this.emit(Dt?"success":"error",{action:$e,text:Dt,trigger:Y,clearSelection:function(){Y&&Y.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(D){return vr("action",D)}},{key:"defaultTarget",value:function(D){var Y=vr("target",D);if(Y)return document.querySelector(Y)}},{key:"defaultText",value:function(D){return vr("text",D)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(D){var Y=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return J(D,Y)}},{key:"cut",value:function(D){return y(D)}},{key:"isSupported",value:function(){var D=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],Y=typeof D=="string"?[D]:D,$e=!!document.queryCommandSupported;return Y.forEach(function(Dt){$e=$e&&!!document.queryCommandSupported(Dt)}),$e}}]),M}(s()),Ui=Fi},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,p){for(;s&&s.nodeType!==n;){if(typeof s.matches=="function"&&s.matches(p))return s;s=s.parentNode}}o.exports=a},438:function(o,n,i){var a=i(828);function s(l,f,u,d,y){var L=c.apply(this,arguments);return l.addEventListener(u,L,y),{destroy:function(){l.removeEventListener(u,L,y)}}}function p(l,f,u,d,y){return typeof l.addEventListener=="function"?s.apply(null,arguments):typeof u=="function"?s.bind(null,document).apply(null,arguments):(typeof l=="string"&&(l=document.querySelectorAll(l)),Array.prototype.map.call(l,function(L){return s(L,f,u,d,y)}))}function c(l,f,u,d){return function(y){y.delegateTarget=a(y.target,f),y.delegateTarget&&d.call(l,y)}}o.exports=p},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(o,n,i){var a=i(879),s=i(438);function p(u,d,y){if(!u&&!d&&!y)throw new Error("Missing required arguments");if(!a.string(d))throw new TypeError("Second argument must be a String");if(!a.fn(y))throw new TypeError("Third argument must be a Function");if(a.node(u))return c(u,d,y);if(a.nodeList(u))return l(u,d,y);if(a.string(u))return f(u,d,y);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(u,d,y){return u.addEventListener(d,y),{destroy:function(){u.removeEventListener(d,y)}}}function l(u,d,y){return Array.prototype.forEach.call(u,function(L){L.addEventListener(d,y)}),{destroy:function(){Array.prototype.forEach.call(u,function(L){L.removeEventListener(d,y)})}}}function f(u,d,y){return s(document.body,u,d,y)}o.exports=p},817:function(o){function n(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var p=window.getSelection(),c=document.createRange();c.selectNodeContents(i),p.removeAllRanges(),p.addRange(c),a=p.toString()}return a}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,a,s){var p=this.e||(this.e={});return(p[i]||(p[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var p=this;function c(){p.off(i,c),a.apply(s,arguments)}return c._=a,this.on(i,c,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),p=0,c=s.length;for(p;p0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function N(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],a;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(s){a={error:s}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(a)throw a.error}}return i}function q(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||p(d,L)})},y&&(n[d]=y(n[d])))}function p(d,y){try{c(o[d](y))}catch(L){u(i[0][3],L)}}function c(d){d.value instanceof nt?Promise.resolve(d.value.v).then(l,f):u(i[0][2],d)}function l(d){p("next",d)}function f(d){p("throw",d)}function u(d,y){d(y),i.shift(),i.length&&p(i[0][0],i[0][1])}}function uo(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof he=="function"?he(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(a){return new Promise(function(s,p){a=e[i](a),n(s,p,a.done,a.value)})}}function n(i,a,s,p){Promise.resolve(p).then(function(c){i({value:c,done:s})},a)}}function H(e){return typeof e=="function"}function ut(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var zt=ut(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(o,n){return n+1+") "+o.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function Qe(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ue=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=he(a),p=s.next();!p.done;p=s.next()){var c=p.value;c.remove(this)}}catch(L){t={error:L}}finally{try{p&&!p.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var l=this.initialTeardown;if(H(l))try{l()}catch(L){i=L instanceof zt?L.errors:[L]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=he(f),d=u.next();!d.done;d=u.next()){var y=d.value;try{ho(y)}catch(L){i=i!=null?i:[],L instanceof zt?i=q(q([],N(i)),N(L.errors)):i.push(L)}}}catch(L){o={error:L}}finally{try{d&&!d.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new zt(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ho(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Qe(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Qe(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Tr=Ue.EMPTY;function qt(e){return e instanceof Ue||e&&"closed"in e&&H(e.remove)&&H(e.add)&&H(e.unsubscribe)}function ho(e){H(e)?e():e.unsubscribe()}var Pe={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var dt={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,a=n.isStopped,s=n.observers;return i||a?Tr:(this.currentObservers=null,s.push(r),new Ue(function(){o.currentObservers=null,Qe(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,a=o.isStopped;n?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new j;return r.source=this,r},t.create=function(r,o){return new To(r,o)},t}(j);var To=function(e){oe(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:Tr},t}(g);var _r=function(e){oe(t,e);function t(r){var o=e.call(this)||this;return o._value=r,o}return Object.defineProperty(t.prototype,"value",{get:function(){return this.getValue()},enumerable:!1,configurable:!0}),t.prototype._subscribe=function(r){var o=e.prototype._subscribe.call(this,r);return!o.closed&&r.next(this._value),o},t.prototype.getValue=function(){var r=this,o=r.hasError,n=r.thrownError,i=r._value;if(o)throw n;return this._throwIfClosed(),i},t.prototype.next=function(r){e.prototype.next.call(this,this._value=r)},t}(g);var At={now:function(){return(At.delegate||Date).now()},delegate:void 0};var Ct=function(e){oe(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=At);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,a=o._infiniteTimeWindow,s=o._timestampProvider,p=o._windowTime;n||(i.push(r),!a&&i.push(s.now()+p)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,a=n._buffer,s=a.slice(),p=0;p0?e.prototype.schedule.call(this,r,o):(this.delay=o,this.state=r,this.scheduler.flush(this),this)},t.prototype.execute=function(r,o){return o>0||this.closed?e.prototype.execute.call(this,r,o):this._execute(r,o)},t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!=null&&n>0||n==null&&this.delay>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.flush(this),0)},t}(gt);var Lo=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t}(yt);var kr=new Lo(Oo);var Mo=function(e){oe(t,e);function t(r,o){var n=e.call(this,r,o)||this;return n.scheduler=r,n.work=o,n}return t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!==null&&n>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=vt.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var a=r.actions;o!=null&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==o&&(vt.cancelAnimationFrame(o),r._scheduled=void 0)},t}(gt);var _o=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o=this._scheduled;this._scheduled=void 0;var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(yt);var me=new _o(Mo);var S=new j(function(e){return e.complete()});function Yt(e){return e&&H(e.schedule)}function Hr(e){return e[e.length-1]}function Xe(e){return H(Hr(e))?e.pop():void 0}function ke(e){return Yt(Hr(e))?e.pop():void 0}function Bt(e,t){return typeof Hr(e)=="number"?e.pop():t}var xt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Gt(e){return H(e==null?void 0:e.then)}function Jt(e){return H(e[bt])}function Xt(e){return Symbol.asyncIterator&&H(e==null?void 0:e[Symbol.asyncIterator])}function Zt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var er=Zi();function tr(e){return H(e==null?void 0:e[er])}function rr(e){return fo(this,arguments,function(){var r,o,n,i;return Nt(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,nt(r.read())];case 3:return o=a.sent(),n=o.value,i=o.done,i?[4,nt(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,nt(n)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function or(e){return H(e==null?void 0:e.getReader)}function U(e){if(e instanceof j)return e;if(e!=null){if(Jt(e))return ea(e);if(xt(e))return ta(e);if(Gt(e))return ra(e);if(Xt(e))return Ao(e);if(tr(e))return oa(e);if(or(e))return na(e)}throw Zt(e)}function ea(e){return new j(function(t){var r=e[bt]();if(H(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function ta(e){return new j(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?b(function(n,i){return e(n,i,o)}):le,Te(1),r?De(t):Qo(function(){return new ir}))}}function jr(e){return e<=0?function(){return S}:E(function(t,r){var o=[];t.subscribe(T(r,function(n){o.push(n),e=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new g}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,p=s===void 0?!0:s;return function(c){var l,f,u,d=0,y=!1,L=!1,X=function(){f==null||f.unsubscribe(),f=void 0},te=function(){X(),l=u=void 0,y=L=!1},J=function(){var k=l;te(),k==null||k.unsubscribe()};return E(function(k,ft){d++,!L&&!y&&X();var qe=u=u!=null?u:r();ft.add(function(){d--,d===0&&!L&&!y&&(f=Ur(J,p))}),qe.subscribe(ft),!l&&d>0&&(l=new at({next:function(Fe){return qe.next(Fe)},error:function(Fe){L=!0,X(),f=Ur(te,n,Fe),qe.error(Fe)},complete:function(){y=!0,X(),f=Ur(te,a),qe.complete()}}),U(k).subscribe(l))})(c)}}function Ur(e,t){for(var r=[],o=2;oe.next(document)),e}function P(e,t=document){return Array.from(t.querySelectorAll(e))}function R(e,t=document){let r=fe(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function fe(e,t=document){return t.querySelector(e)||void 0}function Ie(){var e,t,r,o;return(o=(r=(t=(e=document.activeElement)==null?void 0:e.shadowRoot)==null?void 0:t.activeElement)!=null?r:document.activeElement)!=null?o:void 0}var wa=O(h(document.body,"focusin"),h(document.body,"focusout")).pipe(_e(1),Q(void 0),m(()=>Ie()||document.body),G(1));function et(e){return wa.pipe(m(t=>e.contains(t)),K())}function $t(e,t){return C(()=>O(h(e,"mouseenter").pipe(m(()=>!0)),h(e,"mouseleave").pipe(m(()=>!1))).pipe(t?Ht(r=>Le(+!r*t)):le,Q(e.matches(":hover"))))}function Jo(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)Jo(e,r)}function x(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)Jo(o,n);return o}function sr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function Tt(e){let t=x("script",{src:e});return C(()=>(document.head.appendChild(t),O(h(t,"load"),h(t,"error").pipe(v(()=>$r(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(m(()=>{}),_(()=>document.head.removeChild(t)),Te(1))))}var Xo=new g,Ta=C(()=>typeof ResizeObserver=="undefined"?Tt("https://unpkg.com/resize-observer-polyfill"):I(void 0)).pipe(m(()=>new ResizeObserver(e=>e.forEach(t=>Xo.next(t)))),v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function ce(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ge(e){let t=e;for(;t.clientWidth===0&&t.parentElement;)t=t.parentElement;return Ta.pipe(w(r=>r.observe(t)),v(r=>Xo.pipe(b(o=>o.target===t),_(()=>r.unobserve(t)))),m(()=>ce(e)),Q(ce(e)))}function St(e){return{width:e.scrollWidth,height:e.scrollHeight}}function cr(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}function Zo(e){let t=[],r=e.parentElement;for(;r;)(e.clientWidth>r.clientWidth||e.clientHeight>r.clientHeight)&&t.push(r),r=(e=r).parentElement;return t.length===0&&t.push(document.documentElement),t}function Ve(e){return{x:e.offsetLeft,y:e.offsetTop}}function en(e){let t=e.getBoundingClientRect();return{x:t.x+window.scrollX,y:t.y+window.scrollY}}function tn(e){return O(h(window,"load"),h(window,"resize")).pipe(Me(0,me),m(()=>Ve(e)),Q(Ve(e)))}function pr(e){return{x:e.scrollLeft,y:e.scrollTop}}function Ne(e){return O(h(e,"scroll"),h(window,"scroll"),h(window,"resize")).pipe(Me(0,me),m(()=>pr(e)),Q(pr(e)))}var rn=new g,Sa=C(()=>I(new IntersectionObserver(e=>{for(let t of e)rn.next(t)},{threshold:0}))).pipe(v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function tt(e){return Sa.pipe(w(t=>t.observe(e)),v(t=>rn.pipe(b(({target:r})=>r===e),_(()=>t.unobserve(e)),m(({isIntersecting:r})=>r))))}function on(e,t=16){return Ne(e).pipe(m(({y:r})=>{let o=ce(e),n=St(e);return r>=n.height-o.height-t}),K())}var lr={drawer:R("[data-md-toggle=drawer]"),search:R("[data-md-toggle=search]")};function nn(e){return lr[e].checked}function Je(e,t){lr[e].checked!==t&&lr[e].click()}function ze(e){let t=lr[e];return h(t,"change").pipe(m(()=>t.checked),Q(t.checked))}function Oa(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function La(){return O(h(window,"compositionstart").pipe(m(()=>!0)),h(window,"compositionend").pipe(m(()=>!1))).pipe(Q(!1))}function an(){let e=h(window,"keydown").pipe(b(t=>!(t.metaKey||t.ctrlKey)),m(t=>({mode:nn("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),b(({mode:t,type:r})=>{if(t==="global"){let o=Ie();if(typeof o!="undefined")return!Oa(o,r)}return!0}),pe());return La().pipe(v(t=>t?S:e))}function ye(){return new URL(location.href)}function lt(e,t=!1){if(B("navigation.instant")&&!t){let r=x("a",{href:e.href});document.body.appendChild(r),r.click(),r.remove()}else location.href=e.href}function sn(){return new g}function cn(){return location.hash.slice(1)}function pn(e){let t=x("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Ma(e){return O(h(window,"hashchange"),e).pipe(m(cn),Q(cn()),b(t=>t.length>0),G(1))}function ln(e){return Ma(e).pipe(m(t=>fe(`[id="${t}"]`)),b(t=>typeof t!="undefined"))}function Pt(e){let t=matchMedia(e);return ar(r=>t.addListener(()=>r(t.matches))).pipe(Q(t.matches))}function mn(){let e=matchMedia("print");return O(h(window,"beforeprint").pipe(m(()=>!0)),h(window,"afterprint").pipe(m(()=>!1))).pipe(Q(e.matches))}function Nr(e,t){return e.pipe(v(r=>r?t():S))}function zr(e,t){return new j(r=>{let o=new XMLHttpRequest;return o.open("GET",`${e}`),o.responseType="blob",o.addEventListener("load",()=>{o.status>=200&&o.status<300?(r.next(o.response),r.complete()):r.error(new Error(o.statusText))}),o.addEventListener("error",()=>{r.error(new Error("Network error"))}),o.addEventListener("abort",()=>{r.complete()}),typeof(t==null?void 0:t.progress$)!="undefined"&&(o.addEventListener("progress",n=>{var i;if(n.lengthComputable)t.progress$.next(n.loaded/n.total*100);else{let a=(i=o.getResponseHeader("Content-Length"))!=null?i:0;t.progress$.next(n.loaded/+a*100)}}),t.progress$.next(5)),o.send(),()=>o.abort()})}function je(e,t){return zr(e,t).pipe(v(r=>r.text()),m(r=>JSON.parse(r)),G(1))}function fn(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/html")),G(1))}function un(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/xml")),G(1))}function dn(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function hn(){return O(h(window,"scroll",{passive:!0}),h(window,"resize",{passive:!0})).pipe(m(dn),Q(dn()))}function bn(){return{width:innerWidth,height:innerHeight}}function vn(){return h(window,"resize",{passive:!0}).pipe(m(bn),Q(bn()))}function gn(){return z([hn(),vn()]).pipe(m(([e,t])=>({offset:e,size:t})),G(1))}function mr(e,{viewport$:t,header$:r}){let o=t.pipe(ee("size")),n=z([o,r]).pipe(m(()=>Ve(e)));return z([r,t,n]).pipe(m(([{height:i},{offset:a,size:s},{x:p,y:c}])=>({offset:{x:a.x-p,y:a.y-c+i},size:s})))}function _a(e){return h(e,"message",t=>t.data)}function Aa(e){let t=new g;return t.subscribe(r=>e.postMessage(r)),t}function yn(e,t=new Worker(e)){let r=_a(t),o=Aa(t),n=new g;n.subscribe(o);let i=o.pipe(Z(),ie(!0));return n.pipe(Z(),Re(r.pipe(W(i))),pe())}var Ca=R("#__config"),Ot=JSON.parse(Ca.textContent);Ot.base=`${new URL(Ot.base,ye())}`;function xe(){return Ot}function B(e){return Ot.features.includes(e)}function Ee(e,t){return typeof t!="undefined"?Ot.translations[e].replace("#",t.toString()):Ot.translations[e]}function Se(e,t=document){return R(`[data-md-component=${e}]`,t)}function ae(e,t=document){return P(`[data-md-component=${e}]`,t)}function ka(e){let t=R(".md-typeset > :first-child",e);return h(t,"click",{once:!0}).pipe(m(()=>R(".md-typeset",e)),m(r=>({hash:__md_hash(r.innerHTML)})))}function xn(e){if(!B("announce.dismiss")||!e.childElementCount)return S;if(!e.hidden){let t=R(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return C(()=>{let t=new g;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),ka(e).pipe(w(r=>t.next(r)),_(()=>t.complete()),m(r=>$({ref:e},r)))})}function Ha(e,{target$:t}){return t.pipe(m(r=>({hidden:r!==e})))}function En(e,t){let r=new g;return r.subscribe(({hidden:o})=>{e.hidden=o}),Ha(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))}function Rt(e,t){return t==="inline"?x("div",{class:"md-tooltip md-tooltip--inline",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"})):x("div",{class:"md-tooltip",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"}))}function wn(...e){return x("div",{class:"md-tooltip2",role:"tooltip"},x("div",{class:"md-tooltip2__inner md-typeset"},e))}function Tn(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return x("aside",{class:"md-annotation",tabIndex:0},Rt(t),x("a",{href:r,class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}else return x("aside",{class:"md-annotation",tabIndex:0},Rt(t),x("span",{class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}function Sn(e){return x("button",{class:"md-clipboard md-icon",title:Ee("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}var Ln=Mt(qr());function Qr(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(p=>!e.terms[p]).reduce((p,c)=>[...p,x("del",null,(0,Ln.default)(c))," "],[]).slice(0,-1),i=xe(),a=new URL(e.location,i.base);B("search.highlight")&&a.searchParams.set("h",Object.entries(e.terms).filter(([,p])=>p).reduce((p,[c])=>`${p} ${c}`.trim(),""));let{tags:s}=xe();return x("a",{href:`${a}`,class:"md-search-result__link",tabIndex:-1},x("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&x("div",{class:"md-search-result__icon md-icon"}),r>0&&x("h1",null,e.title),r<=0&&x("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&x("nav",{class:"md-tags"},e.tags.map(p=>{let c=s?p in s?`md-tag-icon md-tag--${s[p]}`:"md-tag-icon":"";return x("span",{class:`md-tag ${c}`},p)})),o>0&&n.length>0&&x("p",{class:"md-search-result__terms"},Ee("search.result.term.missing"),": ",...n)))}function Mn(e){let t=e[0].score,r=[...e],o=xe(),n=r.findIndex(l=>!`${new URL(l.location,o.base)}`.includes("#")),[i]=r.splice(n,1),a=r.findIndex(l=>l.scoreQr(l,1)),...p.length?[x("details",{class:"md-search-result__more"},x("summary",{tabIndex:-1},x("div",null,p.length>0&&p.length===1?Ee("search.result.more.one"):Ee("search.result.more.other",p.length))),...p.map(l=>Qr(l,1)))]:[]];return x("li",{class:"md-search-result__item"},c)}function _n(e){return x("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>x("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?sr(r):r)))}function Kr(e){let t=`tabbed-control tabbed-control--${e}`;return x("div",{class:t,hidden:!0},x("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function An(e){return x("div",{class:"md-typeset__scrollwrap"},x("div",{class:"md-typeset__table"},e))}function Ra(e){var o;let t=xe(),r=new URL(`../${e.version}/`,t.base);return x("li",{class:"md-version__item"},x("a",{href:`${r}`,class:"md-version__link"},e.title,((o=t.version)==null?void 0:o.alias)&&e.aliases.length>0&&x("span",{class:"md-version__alias"},e.aliases[0])))}function Cn(e,t){var o;let r=xe();return e=e.filter(n=>{var i;return!((i=n.properties)!=null&&i.hidden)}),x("div",{class:"md-version"},x("button",{class:"md-version__current","aria-label":Ee("select.version")},t.title,((o=r.version)==null?void 0:o.alias)&&t.aliases.length>0&&x("span",{class:"md-version__alias"},t.aliases[0])),x("ul",{class:"md-version__list"},e.map(Ra)))}var Ia=0;function ja(e){let t=z([et(e),$t(e)]).pipe(m(([o,n])=>o||n),K()),r=C(()=>Zo(e)).pipe(ne(Ne),pt(1),He(t),m(()=>en(e)));return t.pipe(Ae(o=>o),v(()=>z([t,r])),m(([o,n])=>({active:o,offset:n})),pe())}function Fa(e,t){let{content$:r,viewport$:o}=t,n=`__tooltip2_${Ia++}`;return C(()=>{let i=new g,a=new _r(!1);i.pipe(Z(),ie(!1)).subscribe(a);let s=a.pipe(Ht(c=>Le(+!c*250,kr)),K(),v(c=>c?r:S),w(c=>c.id=n),pe());z([i.pipe(m(({active:c})=>c)),s.pipe(v(c=>$t(c,250)),Q(!1))]).pipe(m(c=>c.some(l=>l))).subscribe(a);let p=a.pipe(b(c=>c),re(s,o),m(([c,l,{size:f}])=>{let u=e.getBoundingClientRect(),d=u.width/2;if(l.role==="tooltip")return{x:d,y:8+u.height};if(u.y>=f.height/2){let{height:y}=ce(l);return{x:d,y:-16-y}}else return{x:d,y:16+u.height}}));return z([s,i,p]).subscribe(([c,{offset:l},f])=>{c.style.setProperty("--md-tooltip-host-x",`${l.x}px`),c.style.setProperty("--md-tooltip-host-y",`${l.y}px`),c.style.setProperty("--md-tooltip-x",`${f.x}px`),c.style.setProperty("--md-tooltip-y",`${f.y}px`),c.classList.toggle("md-tooltip2--top",f.y<0),c.classList.toggle("md-tooltip2--bottom",f.y>=0)}),a.pipe(b(c=>c),re(s,(c,l)=>l),b(c=>c.role==="tooltip")).subscribe(c=>{let l=ce(R(":scope > *",c));c.style.setProperty("--md-tooltip-width",`${l.width}px`),c.style.setProperty("--md-tooltip-tail","0px")}),a.pipe(K(),ve(me),re(s)).subscribe(([c,l])=>{l.classList.toggle("md-tooltip2--active",c)}),z([a.pipe(b(c=>c)),s]).subscribe(([c,l])=>{l.role==="dialog"?(e.setAttribute("aria-controls",n),e.setAttribute("aria-haspopup","dialog")):e.setAttribute("aria-describedby",n)}),a.pipe(b(c=>!c)).subscribe(()=>{e.removeAttribute("aria-controls"),e.removeAttribute("aria-describedby"),e.removeAttribute("aria-haspopup")}),ja(e).pipe(w(c=>i.next(c)),_(()=>i.complete()),m(c=>$({ref:e},c)))})}function mt(e,{viewport$:t},r=document.body){return Fa(e,{content$:new j(o=>{let n=e.title,i=wn(n);return o.next(i),e.removeAttribute("title"),r.append(i),()=>{i.remove(),e.setAttribute("title",n)}}),viewport$:t})}function Ua(e,t){let r=C(()=>z([tn(e),Ne(t)])).pipe(m(([{x:o,y:n},i])=>{let{width:a,height:s}=ce(e);return{x:o-i.x+a/2,y:n-i.y+s/2}}));return et(e).pipe(v(o=>r.pipe(m(n=>({active:o,offset:n})),Te(+!o||1/0))))}function kn(e,t,{target$:r}){let[o,n]=Array.from(e.children);return C(()=>{let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({offset:s}){e.style.setProperty("--md-tooltip-x",`${s.x}px`),e.style.setProperty("--md-tooltip-y",`${s.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),tt(e).pipe(W(a)).subscribe(s=>{e.toggleAttribute("data-md-visible",s)}),O(i.pipe(b(({active:s})=>s)),i.pipe(_e(250),b(({active:s})=>!s))).subscribe({next({active:s}){s?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Me(16,me)).subscribe(({active:s})=>{o.classList.toggle("md-tooltip--active",s)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:s})=>s)).subscribe({next(s){s?e.style.setProperty("--md-tooltip-0",`${-s}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),h(n,"click").pipe(W(a),b(s=>!(s.metaKey||s.ctrlKey))).subscribe(s=>{s.stopPropagation(),s.preventDefault()}),h(n,"mousedown").pipe(W(a),re(i)).subscribe(([s,{active:p}])=>{var c;if(s.button!==0||s.metaKey||s.ctrlKey)s.preventDefault();else if(p){s.preventDefault();let l=e.parentElement.closest(".md-annotation");l instanceof HTMLElement?l.focus():(c=Ie())==null||c.blur()}}),r.pipe(W(a),b(s=>s===o),Ge(125)).subscribe(()=>e.focus()),Ua(e,t).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function Wa(e){return e.tagName==="CODE"?P(".c, .c1, .cm",e):[e]}function Da(e){let t=[];for(let r of Wa(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let a;for(;a=/(\(\d+\))(!)?/.exec(i.textContent);){let[,s,p]=a;if(typeof p=="undefined"){let c=i.splitText(a.index);i=c.splitText(s.length),t.push(c)}else{i.textContent=s,t.push(i);break}}}}return t}function Hn(e,t){t.append(...Array.from(e.childNodes))}function fr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,a=new Map;for(let s of Da(t)){let[,p]=s.textContent.match(/\((\d+)\)/);fe(`:scope > li:nth-child(${p})`,e)&&(a.set(p,Tn(p,i)),s.replaceWith(a.get(p)))}return a.size===0?S:C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=[];for(let[l,f]of a)c.push([R(".md-typeset",f),R(`:scope > li:nth-child(${l})`,e)]);return o.pipe(W(p)).subscribe(l=>{e.hidden=!l,e.classList.toggle("md-annotation-list",l);for(let[f,u]of c)l?Hn(f,u):Hn(u,f)}),O(...[...a].map(([,l])=>kn(l,t,{target$:r}))).pipe(_(()=>s.complete()),pe())})}function $n(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return $n(t)}}function Pn(e,t){return C(()=>{let r=$n(e);return typeof r!="undefined"?fr(r,e,t):S})}var Rn=Mt(Br());var Va=0;function In(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return In(t)}}function Na(e){return ge(e).pipe(m(({width:t})=>({scrollable:St(e).width>t})),ee("scrollable"))}function jn(e,t){let{matches:r}=matchMedia("(hover)"),o=C(()=>{let n=new g,i=n.pipe(jr(1));n.subscribe(({scrollable:c})=>{c&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")});let a=[];if(Rn.default.isSupported()&&(e.closest(".copy")||B("content.code.copy")&&!e.closest(".no-copy"))){let c=e.closest("pre");c.id=`__code_${Va++}`;let l=Sn(c.id);c.insertBefore(l,e),B("content.tooltips")&&a.push(mt(l,{viewport$}))}let s=e.closest(".highlight");if(s instanceof HTMLElement){let c=In(s);if(typeof c!="undefined"&&(s.classList.contains("annotate")||B("content.code.annotate"))){let l=fr(c,e,t);a.push(ge(s).pipe(W(i),m(({width:f,height:u})=>f&&u),K(),v(f=>f?l:S)))}}return P(":scope > span[id]",e).length&&e.classList.add("md-code__content"),Na(e).pipe(w(c=>n.next(c)),_(()=>n.complete()),m(c=>$({ref:e},c)),Re(...a))});return B("content.lazy")?tt(e).pipe(b(n=>n),Te(1),v(()=>o)):o}function za(e,{target$:t,print$:r}){let o=!0;return O(t.pipe(m(n=>n.closest("details:not([open])")),b(n=>e===n),m(()=>({action:"open",reveal:!0}))),r.pipe(b(n=>n||!o),w(()=>o=e.open),m(n=>({action:n?"open":"close"}))))}function Fn(e,t){return C(()=>{let r=new g;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),za(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}var Un=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel p,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel p{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel,.nodeLabel p{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}a .nodeLabel{text-decoration:underline}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var Gr,Qa=0;function Ka(){return typeof mermaid=="undefined"||mermaid instanceof Element?Tt("https://unpkg.com/mermaid@11/dist/mermaid.min.js"):I(void 0)}function Wn(e){return e.classList.remove("mermaid"),Gr||(Gr=Ka().pipe(w(()=>mermaid.initialize({startOnLoad:!1,themeCSS:Un,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),m(()=>{}),G(1))),Gr.subscribe(()=>co(this,null,function*(){e.classList.add("mermaid");let t=`__mermaid_${Qa++}`,r=x("div",{class:"mermaid"}),o=e.textContent,{svg:n,fn:i}=yield mermaid.render(t,o),a=r.attachShadow({mode:"closed"});a.innerHTML=n,e.replaceWith(r),i==null||i(a)})),Gr.pipe(m(()=>({ref:e})))}var Dn=x("table");function Vn(e){return e.replaceWith(Dn),Dn.replaceWith(An(e)),I({ref:e})}function Ya(e){let t=e.find(r=>r.checked)||e[0];return O(...e.map(r=>h(r,"change").pipe(m(()=>R(`label[for="${r.id}"]`))))).pipe(Q(R(`label[for="${t.id}"]`)),m(r=>({active:r})))}function Nn(e,{viewport$:t,target$:r}){let o=R(".tabbed-labels",e),n=P(":scope > input",e),i=Kr("prev");e.append(i);let a=Kr("next");return e.append(a),C(()=>{let s=new g,p=s.pipe(Z(),ie(!0));z([s,ge(e),tt(e)]).pipe(W(p),Me(1,me)).subscribe({next([{active:c},l]){let f=Ve(c),{width:u}=ce(c);e.style.setProperty("--md-indicator-x",`${f.x}px`),e.style.setProperty("--md-indicator-width",`${u}px`);let d=pr(o);(f.xd.x+l.width)&&o.scrollTo({left:Math.max(0,f.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),z([Ne(o),ge(o)]).pipe(W(p)).subscribe(([c,l])=>{let f=St(o);i.hidden=c.x<16,a.hidden=c.x>f.width-l.width-16}),O(h(i,"click").pipe(m(()=>-1)),h(a,"click").pipe(m(()=>1))).pipe(W(p)).subscribe(c=>{let{width:l}=ce(o);o.scrollBy({left:l*c,behavior:"smooth"})}),r.pipe(W(p),b(c=>n.includes(c))).subscribe(c=>c.click()),o.classList.add("tabbed-labels--linked");for(let c of n){let l=R(`label[for="${c.id}"]`);l.replaceChildren(x("a",{href:`#${l.htmlFor}`,tabIndex:-1},...Array.from(l.childNodes))),h(l.firstElementChild,"click").pipe(W(p),b(f=>!(f.metaKey||f.ctrlKey)),w(f=>{f.preventDefault(),f.stopPropagation()})).subscribe(()=>{history.replaceState({},"",`#${l.htmlFor}`),l.click()})}return B("content.tabs.link")&&s.pipe(Ce(1),re(t)).subscribe(([{active:c},{offset:l}])=>{let f=c.innerText.trim();if(c.hasAttribute("data-md-switching"))c.removeAttribute("data-md-switching");else{let u=e.offsetTop-l.y;for(let y of P("[data-tabs]"))for(let L of P(":scope > input",y)){let X=R(`label[for="${L.id}"]`);if(X!==c&&X.innerText.trim()===f){X.setAttribute("data-md-switching",""),L.click();break}}window.scrollTo({top:e.offsetTop-u});let d=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([f,...d])])}}),s.pipe(W(p)).subscribe(()=>{for(let c of P("audio, video",e))c.pause()}),Ya(n).pipe(w(c=>s.next(c)),_(()=>s.complete()),m(c=>$({ref:e},c)))}).pipe(Ke(se))}function zn(e,{viewport$:t,target$:r,print$:o}){return O(...P(".annotate:not(.highlight)",e).map(n=>Pn(n,{target$:r,print$:o})),...P("pre:not(.mermaid) > code",e).map(n=>jn(n,{target$:r,print$:o})),...P("pre.mermaid",e).map(n=>Wn(n)),...P("table:not([class])",e).map(n=>Vn(n)),...P("details",e).map(n=>Fn(n,{target$:r,print$:o})),...P("[data-tabs]",e).map(n=>Nn(n,{viewport$:t,target$:r})),...P("[title]",e).filter(()=>B("content.tooltips")).map(n=>mt(n,{viewport$:t})))}function Ba(e,{alert$:t}){return t.pipe(v(r=>O(I(!0),I(!1).pipe(Ge(2e3))).pipe(m(o=>({message:r,active:o})))))}function qn(e,t){let r=R(".md-typeset",e);return C(()=>{let o=new g;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),Ba(e,t).pipe(w(n=>o.next(n)),_(()=>o.complete()),m(n=>$({ref:e},n)))})}var Ga=0;function Ja(e,t){document.body.append(e);let{width:r}=ce(e);e.style.setProperty("--md-tooltip-width",`${r}px`),e.remove();let o=cr(t),n=typeof o!="undefined"?Ne(o):I({x:0,y:0}),i=O(et(t),$t(t)).pipe(K());return z([i,n]).pipe(m(([a,s])=>{let{x:p,y:c}=Ve(t),l=ce(t),f=t.closest("table");return f&&t.parentElement&&(p+=f.offsetLeft+t.parentElement.offsetLeft,c+=f.offsetTop+t.parentElement.offsetTop),{active:a,offset:{x:p-s.x+l.width/2-r/2,y:c-s.y+l.height+8}}}))}function Qn(e){let t=e.title;if(!t.length)return S;let r=`__tooltip_${Ga++}`,o=Rt(r,"inline"),n=R(".md-typeset",o);return n.innerHTML=t,C(()=>{let i=new g;return i.subscribe({next({offset:a}){o.style.setProperty("--md-tooltip-x",`${a.x}px`),o.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){o.style.removeProperty("--md-tooltip-x"),o.style.removeProperty("--md-tooltip-y")}}),O(i.pipe(b(({active:a})=>a)),i.pipe(_e(250),b(({active:a})=>!a))).subscribe({next({active:a}){a?(e.insertAdjacentElement("afterend",o),e.setAttribute("aria-describedby",r),e.removeAttribute("title")):(o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t))},complete(){o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t)}}),i.pipe(Me(16,me)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:a})=>a)).subscribe({next(a){a?o.style.setProperty("--md-tooltip-0",`${-a}px`):o.style.removeProperty("--md-tooltip-0")},complete(){o.style.removeProperty("--md-tooltip-0")}}),Ja(o,e).pipe(w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))}).pipe(Ke(se))}function Xa({viewport$:e}){if(!B("header.autohide"))return I(!1);let t=e.pipe(m(({offset:{y:n}})=>n),Be(2,1),m(([n,i])=>[nMath.abs(i-n.y)>100),m(([,[n]])=>n),K()),o=ze("search");return z([e,o]).pipe(m(([{offset:n},i])=>n.y>400&&!i),K(),v(n=>n?r:I(!1)),Q(!1))}function Kn(e,t){return C(()=>z([ge(e),Xa(t)])).pipe(m(([{height:r},o])=>({height:r,hidden:o})),K((r,o)=>r.height===o.height&&r.hidden===o.hidden),G(1))}function Yn(e,{header$:t,main$:r}){return C(()=>{let o=new g,n=o.pipe(Z(),ie(!0));o.pipe(ee("active"),He(t)).subscribe(([{active:a},{hidden:s}])=>{e.classList.toggle("md-header--shadow",a&&!s),e.hidden=s});let i=ue(P("[title]",e)).pipe(b(()=>B("content.tooltips")),ne(a=>Qn(a)));return r.subscribe(o),t.pipe(W(n),m(a=>$({ref:e},a)),Re(i.pipe(W(n))))})}function Za(e,{viewport$:t,header$:r}){return mr(e,{viewport$:t,header$:r}).pipe(m(({offset:{y:o}})=>{let{height:n}=ce(e);return{active:o>=n}}),ee("active"))}function Bn(e,t){return C(()=>{let r=new g;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=fe(".md-content h1");return typeof o=="undefined"?S:Za(o,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))})}function Gn(e,{viewport$:t,header$:r}){let o=r.pipe(m(({height:i})=>i),K()),n=o.pipe(v(()=>ge(e).pipe(m(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),ee("bottom"))));return z([o,n,t]).pipe(m(([i,{top:a,bottom:s},{offset:{y:p},size:{height:c}}])=>(c=Math.max(0,c-Math.max(0,a-p,i)-Math.max(0,c+p-s)),{offset:a-i,height:c,active:a-i<=p})),K((i,a)=>i.offset===a.offset&&i.height===a.height&&i.active===a.active))}function es(e){let t=__md_get("__palette")||{index:e.findIndex(o=>matchMedia(o.getAttribute("data-md-color-media")).matches)},r=Math.max(0,Math.min(t.index,e.length-1));return I(...e).pipe(ne(o=>h(o,"change").pipe(m(()=>o))),Q(e[r]),m(o=>({index:e.indexOf(o),color:{media:o.getAttribute("data-md-color-media"),scheme:o.getAttribute("data-md-color-scheme"),primary:o.getAttribute("data-md-color-primary"),accent:o.getAttribute("data-md-color-accent")}})),G(1))}function Jn(e){let t=P("input",e),r=x("meta",{name:"theme-color"});document.head.appendChild(r);let o=x("meta",{name:"color-scheme"});document.head.appendChild(o);let n=Pt("(prefers-color-scheme: light)");return C(()=>{let i=new g;return i.subscribe(a=>{if(document.body.setAttribute("data-md-color-switching",""),a.color.media==="(prefers-color-scheme)"){let s=matchMedia("(prefers-color-scheme: light)"),p=document.querySelector(s.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");a.color.scheme=p.getAttribute("data-md-color-scheme"),a.color.primary=p.getAttribute("data-md-color-primary"),a.color.accent=p.getAttribute("data-md-color-accent")}for(let[s,p]of Object.entries(a.color))document.body.setAttribute(`data-md-color-${s}`,p);for(let s=0;sa.key==="Enter"),re(i,(a,s)=>s)).subscribe(({index:a})=>{a=(a+1)%t.length,t[a].click(),t[a].focus()}),i.pipe(m(()=>{let a=Se("header"),s=window.getComputedStyle(a);return o.content=s.colorScheme,s.backgroundColor.match(/\d+/g).map(p=>(+p).toString(16).padStart(2,"0")).join("")})).subscribe(a=>r.content=`#${a}`),i.pipe(ve(se)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")}),es(t).pipe(W(n.pipe(Ce(1))),ct(),w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))})}function Xn(e,{progress$:t}){return C(()=>{let r=new g;return r.subscribe(({value:o})=>{e.style.setProperty("--md-progress-value",`${o}`)}),t.pipe(w(o=>r.next({value:o})),_(()=>r.complete()),m(o=>({ref:e,value:o})))})}var Jr=Mt(Br());function ts(e){e.setAttribute("data-md-copying","");let t=e.closest("[data-copy]"),r=t?t.getAttribute("data-copy"):e.innerText;return e.removeAttribute("data-md-copying"),r.trimEnd()}function Zn({alert$:e}){Jr.default.isSupported()&&new j(t=>{new Jr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||ts(R(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(w(t=>{t.trigger.focus()}),m(()=>Ee("clipboard.copied"))).subscribe(e)}function ei(e,t){return e.protocol=t.protocol,e.hostname=t.hostname,e}function rs(e,t){let r=new Map;for(let o of P("url",e)){let n=R("loc",o),i=[ei(new URL(n.textContent),t)];r.set(`${i[0]}`,i);for(let a of P("[rel=alternate]",o)){let s=a.getAttribute("href");s!=null&&i.push(ei(new URL(s),t))}}return r}function ur(e){return un(new URL("sitemap.xml",e)).pipe(m(t=>rs(t,new URL(e))),de(()=>I(new Map)))}function os(e,t){if(!(e.target instanceof Element))return S;let r=e.target.closest("a");if(r===null)return S;if(r.target||e.metaKey||e.ctrlKey)return S;let o=new URL(r.href);return o.search=o.hash="",t.has(`${o}`)?(e.preventDefault(),I(new URL(r.href))):S}function ti(e){let t=new Map;for(let r of P(":scope > *",e.head))t.set(r.outerHTML,r);return t}function ri(e){for(let t of P("[href], [src]",e))for(let r of["href","src"]){let o=t.getAttribute(r);if(o&&!/^(?:[a-z]+:)?\/\//i.test(o)){t[r]=t[r];break}}return I(e)}function ns(e){for(let o of["[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...B("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let n=fe(o),i=fe(o,e);typeof n!="undefined"&&typeof i!="undefined"&&n.replaceWith(i)}let t=ti(document);for(let[o,n]of ti(e))t.has(o)?t.delete(o):document.head.appendChild(n);for(let o of t.values()){let n=o.getAttribute("name");n!=="theme-color"&&n!=="color-scheme"&&o.remove()}let r=Se("container");return We(P("script",r)).pipe(v(o=>{let n=e.createElement("script");if(o.src){for(let i of o.getAttributeNames())n.setAttribute(i,o.getAttribute(i));return o.replaceWith(n),new j(i=>{n.onload=()=>i.complete()})}else return n.textContent=o.textContent,o.replaceWith(n),S}),Z(),ie(document))}function oi({location$:e,viewport$:t,progress$:r}){let o=xe();if(location.protocol==="file:")return S;let n=ur(o.base);I(document).subscribe(ri);let i=h(document.body,"click").pipe(He(n),v(([p,c])=>os(p,c)),pe()),a=h(window,"popstate").pipe(m(ye),pe());i.pipe(re(t)).subscribe(([p,{offset:c}])=>{history.replaceState(c,""),history.pushState(null,"",p)}),O(i,a).subscribe(e);let s=e.pipe(ee("pathname"),v(p=>fn(p,{progress$:r}).pipe(de(()=>(lt(p,!0),S)))),v(ri),v(ns),pe());return O(s.pipe(re(e,(p,c)=>c)),s.pipe(v(()=>e),ee("pathname"),v(()=>e),ee("hash")),e.pipe(K((p,c)=>p.pathname===c.pathname&&p.hash===c.hash),v(()=>i),w(()=>history.back()))).subscribe(p=>{var c,l;history.state!==null||!p.hash?window.scrollTo(0,(l=(c=history.state)==null?void 0:c.y)!=null?l:0):(history.scrollRestoration="auto",pn(p.hash),history.scrollRestoration="manual")}),e.subscribe(()=>{history.scrollRestoration="manual"}),h(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),t.pipe(ee("offset"),_e(100)).subscribe(({offset:p})=>{history.replaceState(p,"")}),s}var ni=Mt(qr());function ii(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,a)=>`${i}${a}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return a=>(0,ni.default)(a).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function jt(e){return e.type===1}function dr(e){return e.type===3}function ai(e,t){let r=yn(e);return O(I(location.protocol!=="file:"),ze("search")).pipe(Ae(o=>o),v(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:B("search.suggest")}}})),r}function si(e){var l;let{selectedVersionSitemap:t,selectedVersionBaseURL:r,currentLocation:o,currentBaseURL:n}=e,i=(l=Xr(n))==null?void 0:l.pathname;if(i===void 0)return;let a=ss(o.pathname,i);if(a===void 0)return;let s=ps(t.keys());if(!t.has(s))return;let p=Xr(a,s);if(!p||!t.has(p.href))return;let c=Xr(a,r);if(c)return c.hash=o.hash,c.search=o.search,c}function Xr(e,t){try{return new URL(e,t)}catch(r){return}}function ss(e,t){if(e.startsWith(t))return e.slice(t.length)}function cs(e,t){let r=Math.min(e.length,t.length),o;for(o=0;oS)),o=r.pipe(m(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:a,aliases:s})=>a===i||s.includes(i))||n[0]}));r.pipe(m(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),v(n=>h(document.body,"click").pipe(b(i=>!i.metaKey&&!i.ctrlKey),re(o),v(([i,a])=>{if(i.target instanceof Element){let s=i.target.closest("a");if(s&&!s.target&&n.has(s.href)){let p=s.href;return!i.target.closest(".md-version")&&n.get(p)===a?S:(i.preventDefault(),I(new URL(p)))}}return S}),v(i=>ur(i).pipe(m(a=>{var s;return(s=si({selectedVersionSitemap:a,selectedVersionBaseURL:i,currentLocation:ye(),currentBaseURL:t.base}))!=null?s:i})))))).subscribe(n=>lt(n,!0)),z([r,o]).subscribe(([n,i])=>{R(".md-header__topic").appendChild(Cn(n,i))}),e.pipe(v(()=>o)).subscribe(n=>{var a;let i=__md_get("__outdated",sessionStorage);if(i===null){i=!0;let s=((a=t.version)==null?void 0:a.default)||"latest";Array.isArray(s)||(s=[s]);e:for(let p of s)for(let c of n.aliases.concat(n.version))if(new RegExp(p,"i").test(c)){i=!1;break e}__md_set("__outdated",i,sessionStorage)}if(i)for(let s of ae("outdated"))s.hidden=!1})}function ls(e,{worker$:t}){let{searchParams:r}=ye();r.has("q")&&(Je("search",!0),e.value=r.get("q"),e.focus(),ze("search").pipe(Ae(i=>!i)).subscribe(()=>{let i=ye();i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=et(e),n=O(t.pipe(Ae(jt)),h(e,"keyup"),o).pipe(m(()=>e.value),K());return z([n,o]).pipe(m(([i,a])=>({value:i,focus:a})),G(1))}function pi(e,{worker$:t}){let r=new g,o=r.pipe(Z(),ie(!0));z([t.pipe(Ae(jt)),r],(i,a)=>a).pipe(ee("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(ee("focus")).subscribe(({focus:i})=>{i&&Je("search",i)}),h(e.form,"reset").pipe(W(o)).subscribe(()=>e.focus());let n=R("header [for=__search]");return h(n,"click").subscribe(()=>e.focus()),ls(e,{worker$:t}).pipe(w(i=>r.next(i)),_(()=>r.complete()),m(i=>$({ref:e},i)),G(1))}function li(e,{worker$:t,query$:r}){let o=new g,n=on(e.parentElement).pipe(b(Boolean)),i=e.parentElement,a=R(":scope > :first-child",e),s=R(":scope > :last-child",e);ze("search").subscribe(l=>s.setAttribute("role",l?"list":"presentation")),o.pipe(re(r),Wr(t.pipe(Ae(jt)))).subscribe(([{items:l},{value:f}])=>{switch(l.length){case 0:a.textContent=f.length?Ee("search.result.none"):Ee("search.result.placeholder");break;case 1:a.textContent=Ee("search.result.one");break;default:let u=sr(l.length);a.textContent=Ee("search.result.other",u)}});let p=o.pipe(w(()=>s.innerHTML=""),v(({items:l})=>O(I(...l.slice(0,10)),I(...l.slice(10)).pipe(Be(4),Vr(n),v(([f])=>f)))),m(Mn),pe());return p.subscribe(l=>s.appendChild(l)),p.pipe(ne(l=>{let f=fe("details",l);return typeof f=="undefined"?S:h(f,"toggle").pipe(W(o),m(()=>f))})).subscribe(l=>{l.open===!1&&l.offsetTop<=i.scrollTop&&i.scrollTo({top:l.offsetTop})}),t.pipe(b(dr),m(({data:l})=>l)).pipe(w(l=>o.next(l)),_(()=>o.complete()),m(l=>$({ref:e},l)))}function ms(e,{query$:t}){return t.pipe(m(({value:r})=>{let o=ye();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function mi(e,t){let r=new g,o=r.pipe(Z(),ie(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),h(e,"click").pipe(W(o)).subscribe(n=>n.preventDefault()),ms(e,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))}function fi(e,{worker$:t,keyboard$:r}){let o=new g,n=Se("search-query"),i=O(h(n,"keydown"),h(n,"focus")).pipe(ve(se),m(()=>n.value),K());return o.pipe(He(i),m(([{suggest:s},p])=>{let c=p.split(/([\s-]+)/);if(s!=null&&s.length&&c[c.length-1]){let l=s[s.length-1];l.startsWith(c[c.length-1])&&(c[c.length-1]=l)}else c.length=0;return c})).subscribe(s=>e.innerHTML=s.join("").replace(/\s/g," ")),r.pipe(b(({mode:s})=>s==="search")).subscribe(s=>{switch(s.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(b(dr),m(({data:s})=>s)).pipe(w(s=>o.next(s)),_(()=>o.complete()),m(()=>({ref:e})))}function ui(e,{index$:t,keyboard$:r}){let o=xe();try{let n=ai(o.search,t),i=Se("search-query",e),a=Se("search-result",e);h(e,"click").pipe(b(({target:p})=>p instanceof Element&&!!p.closest("a"))).subscribe(()=>Je("search",!1)),r.pipe(b(({mode:p})=>p==="search")).subscribe(p=>{let c=Ie();switch(p.type){case"Enter":if(c===i){let l=new Map;for(let f of P(":first-child [href]",a)){let u=f.firstElementChild;l.set(f,parseFloat(u.getAttribute("data-md-score")))}if(l.size){let[[f]]=[...l].sort(([,u],[,d])=>d-u);f.click()}p.claim()}break;case"Escape":case"Tab":Je("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof c=="undefined")i.focus();else{let l=[i,...P(":not(details) > [href], summary, details[open] [href]",a)],f=Math.max(0,(Math.max(0,l.indexOf(c))+l.length+(p.type==="ArrowUp"?-1:1))%l.length);l[f].focus()}p.claim();break;default:i!==Ie()&&i.focus()}}),r.pipe(b(({mode:p})=>p==="global")).subscribe(p=>{switch(p.type){case"f":case"s":case"/":i.focus(),i.select(),p.claim();break}});let s=pi(i,{worker$:n});return O(s,li(a,{worker$:n,query$:s})).pipe(Re(...ae("search-share",e).map(p=>mi(p,{query$:s})),...ae("search-suggest",e).map(p=>fi(p,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,Ye}}function di(e,{index$:t,location$:r}){return z([t,r.pipe(Q(ye()),b(o=>!!o.searchParams.get("h")))]).pipe(m(([o,n])=>ii(o.config)(n.searchParams.get("h"))),m(o=>{var a;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let s=i.nextNode();s;s=i.nextNode())if((a=s.parentElement)!=null&&a.offsetHeight){let p=s.textContent,c=o(p);c.length>p.length&&n.set(s,c)}for(let[s,p]of n){let{childNodes:c}=x("span",null,p);s.replaceWith(...Array.from(c))}return{ref:e,nodes:n}}))}function fs(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return z([r,t]).pipe(m(([{offset:i,height:a},{offset:{y:s}}])=>(a=a+Math.min(n,Math.max(0,s-i))-n,{height:a,locked:s>=i+n})),K((i,a)=>i.height===a.height&&i.locked===a.locked))}function Zr(e,o){var n=o,{header$:t}=n,r=so(n,["header$"]);let i=R(".md-sidebar__scrollwrap",e),{y:a}=Ve(i);return C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=s.pipe(Me(0,me));return c.pipe(re(t)).subscribe({next([{height:l},{height:f}]){i.style.height=`${l-2*a}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),c.pipe(Ae()).subscribe(()=>{for(let l of P(".md-nav__link--active[href]",e)){if(!l.clientHeight)continue;let f=l.closest(".md-sidebar__scrollwrap");if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2})}}}),ue(P("label[tabindex]",e)).pipe(ne(l=>h(l,"click").pipe(ve(se),m(()=>l),W(p)))).subscribe(l=>{let f=R(`[id="${l.htmlFor}"]`);R(`[aria-labelledby="${l.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),fs(e,r).pipe(w(l=>s.next(l)),_(()=>s.complete()),m(l=>$({ref:e},l)))})}function hi(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return st(je(`${r}/releases/latest`).pipe(de(()=>S),m(o=>({version:o.tag_name})),De({})),je(r).pipe(de(()=>S),m(o=>({stars:o.stargazers_count,forks:o.forks_count})),De({}))).pipe(m(([o,n])=>$($({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return je(r).pipe(m(o=>({repositories:o.public_repos})),De({}))}}function bi(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return st(je(`${r}/releases/permalink/latest`).pipe(de(()=>S),m(({tag_name:o})=>({version:o})),De({})),je(r).pipe(de(()=>S),m(({star_count:o,forks_count:n})=>({stars:o,forks:n})),De({}))).pipe(m(([o,n])=>$($({},o),n)))}function vi(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return hi(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return bi(r,o)}return S}var us;function ds(e){return us||(us=C(()=>{let t=__md_get("__source",sessionStorage);if(t)return I(t);if(ae("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return S}return vi(e.href).pipe(w(o=>__md_set("__source",o,sessionStorage)))}).pipe(de(()=>S),b(t=>Object.keys(t).length>0),m(t=>({facts:t})),G(1)))}function gi(e){let t=R(":scope > :last-child",e);return C(()=>{let r=new g;return r.subscribe(({facts:o})=>{t.appendChild(_n(o)),t.classList.add("md-source__repository--active")}),ds(e).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function hs(e,{viewport$:t,header$:r}){return ge(document.body).pipe(v(()=>mr(e,{header$:r,viewport$:t})),m(({offset:{y:o}})=>({hidden:o>=10})),ee("hidden"))}function yi(e,t){return C(()=>{let r=new g;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(B("navigation.tabs.sticky")?I({hidden:!1}):hs(e,t)).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function bs(e,{viewport$:t,header$:r}){let o=new Map,n=P(".md-nav__link",e);for(let s of n){let p=decodeURIComponent(s.hash.substring(1)),c=fe(`[id="${p}"]`);typeof c!="undefined"&&o.set(s,c)}let i=r.pipe(ee("height"),m(({height:s})=>{let p=Se("main"),c=R(":scope > :first-child",p);return s+.8*(c.offsetTop-p.offsetTop)}),pe());return ge(document.body).pipe(ee("height"),v(s=>C(()=>{let p=[];return I([...o].reduce((c,[l,f])=>{for(;p.length&&o.get(p[p.length-1]).tagName>=f.tagName;)p.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let d=f.offsetParent;for(;d;d=d.offsetParent)u+=d.offsetTop;return c.set([...p=[...p,l]].reverse(),u)},new Map))}).pipe(m(p=>new Map([...p].sort(([,c],[,l])=>c-l))),He(i),v(([p,c])=>t.pipe(Fr(([l,f],{offset:{y:u},size:d})=>{let y=u+d.height>=Math.floor(s.height);for(;f.length;){let[,L]=f[0];if(L-c=u&&!y)f=[l.pop(),...f];else break}return[l,f]},[[],[...p]]),K((l,f)=>l[0]===f[0]&&l[1]===f[1])))))).pipe(m(([s,p])=>({prev:s.map(([c])=>c),next:p.map(([c])=>c)})),Q({prev:[],next:[]}),Be(2,1),m(([s,p])=>s.prev.length{let i=new g,a=i.pipe(Z(),ie(!0));if(i.subscribe(({prev:s,next:p})=>{for(let[c]of p)c.classList.remove("md-nav__link--passed"),c.classList.remove("md-nav__link--active");for(let[c,[l]]of s.entries())l.classList.add("md-nav__link--passed"),l.classList.toggle("md-nav__link--active",c===s.length-1)}),B("toc.follow")){let s=O(t.pipe(_e(1),m(()=>{})),t.pipe(_e(250),m(()=>"smooth")));i.pipe(b(({prev:p})=>p.length>0),He(o.pipe(ve(se))),re(s)).subscribe(([[{prev:p}],c])=>{let[l]=p[p.length-1];if(l.offsetHeight){let f=cr(l);if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2,behavior:c})}}})}return B("navigation.tracking")&&t.pipe(W(a),ee("offset"),_e(250),Ce(1),W(n.pipe(Ce(1))),ct({delay:250}),re(i)).subscribe(([,{prev:s}])=>{let p=ye(),c=s[s.length-1];if(c&&c.length){let[l]=c,{hash:f}=new URL(l.href);p.hash!==f&&(p.hash=f,history.replaceState({},"",`${p}`))}else p.hash="",history.replaceState({},"",`${p}`)}),bs(e,{viewport$:t,header$:r}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function vs(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(m(({offset:{y:a}})=>a),Be(2,1),m(([a,s])=>a>s&&s>0),K()),i=r.pipe(m(({active:a})=>a));return z([i,n]).pipe(m(([a,s])=>!(a&&s)),K(),W(o.pipe(Ce(1))),ie(!0),ct({delay:250}),m(a=>({hidden:a})))}function Ei(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({hidden:s}){e.hidden=s,s?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(W(a),ee("height")).subscribe(({height:s})=>{e.style.top=`${s+16}px`}),h(e,"click").subscribe(s=>{s.preventDefault(),window.scrollTo({top:0})}),vs(e,{viewport$:t,main$:o,target$:n}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))}function wi({document$:e,viewport$:t}){e.pipe(v(()=>P(".md-ellipsis")),ne(r=>tt(r).pipe(W(e.pipe(Ce(1))),b(o=>o),m(()=>r),Te(1))),b(r=>r.offsetWidth{let o=r.innerText,n=r.closest("a")||r;return n.title=o,B("content.tooltips")?mt(n,{viewport$:t}).pipe(W(e.pipe(Ce(1))),_(()=>n.removeAttribute("title"))):S})).subscribe(),B("content.tooltips")&&e.pipe(v(()=>P(".md-status")),ne(r=>mt(r,{viewport$:t}))).subscribe()}function Ti({document$:e,tablet$:t}){e.pipe(v(()=>P(".md-toggle--indeterminate")),w(r=>{r.indeterminate=!0,r.checked=!1}),ne(r=>h(r,"change").pipe(Dr(()=>r.classList.contains("md-toggle--indeterminate")),m(()=>r))),re(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function gs(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function Si({document$:e}){e.pipe(v(()=>P("[data-md-scrollfix]")),w(t=>t.removeAttribute("data-md-scrollfix")),b(gs),ne(t=>h(t,"touchstart").pipe(m(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function Oi({viewport$:e,tablet$:t}){z([ze("search"),t]).pipe(m(([r,o])=>r&&!o),v(r=>I(r).pipe(Ge(r?400:100))),re(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function ys(){return location.protocol==="file:"?Tt(`${new URL("search/search_index.js",eo.base)}`).pipe(m(()=>__index),G(1)):je(new URL("search/search_index.json",eo.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var ot=Go(),Ut=sn(),Lt=ln(Ut),to=an(),Oe=gn(),hr=Pt("(min-width: 960px)"),Mi=Pt("(min-width: 1220px)"),_i=mn(),eo=xe(),Ai=document.forms.namedItem("search")?ys():Ye,ro=new g;Zn({alert$:ro});var oo=new g;B("navigation.instant")&&oi({location$:Ut,viewport$:Oe,progress$:oo}).subscribe(ot);var Li;((Li=eo.version)==null?void 0:Li.provider)==="mike"&&ci({document$:ot});O(Ut,Lt).pipe(Ge(125)).subscribe(()=>{Je("drawer",!1),Je("search",!1)});to.pipe(b(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=fe("link[rel=prev]");typeof t!="undefined"&<(t);break;case"n":case".":let r=fe("link[rel=next]");typeof r!="undefined"&<(r);break;case"Enter":let o=Ie();o instanceof HTMLLabelElement&&o.click()}});wi({viewport$:Oe,document$:ot});Ti({document$:ot,tablet$:hr});Si({document$:ot});Oi({viewport$:Oe,tablet$:hr});var rt=Kn(Se("header"),{viewport$:Oe}),Ft=ot.pipe(m(()=>Se("main")),v(e=>Gn(e,{viewport$:Oe,header$:rt})),G(1)),xs=O(...ae("consent").map(e=>En(e,{target$:Lt})),...ae("dialog").map(e=>qn(e,{alert$:ro})),...ae("palette").map(e=>Jn(e)),...ae("progress").map(e=>Xn(e,{progress$:oo})),...ae("search").map(e=>ui(e,{index$:Ai,keyboard$:to})),...ae("source").map(e=>gi(e))),Es=C(()=>O(...ae("announce").map(e=>xn(e)),...ae("content").map(e=>zn(e,{viewport$:Oe,target$:Lt,print$:_i})),...ae("content").map(e=>B("search.highlight")?di(e,{index$:Ai,location$:Ut}):S),...ae("header").map(e=>Yn(e,{viewport$:Oe,header$:rt,main$:Ft})),...ae("header-title").map(e=>Bn(e,{viewport$:Oe,header$:rt})),...ae("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?Nr(Mi,()=>Zr(e,{viewport$:Oe,header$:rt,main$:Ft})):Nr(hr,()=>Zr(e,{viewport$:Oe,header$:rt,main$:Ft}))),...ae("tabs").map(e=>yi(e,{viewport$:Oe,header$:rt})),...ae("toc").map(e=>xi(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Lt})),...ae("top").map(e=>Ei(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Lt})))),Ci=ot.pipe(v(()=>Es),Re(xs),G(1));Ci.subscribe();window.document$=ot;window.location$=Ut;window.target$=Lt;window.keyboard$=to;window.viewport$=Oe;window.tablet$=hr;window.screen$=Mi;window.print$=_i;window.alert$=ro;window.progress$=oo;window.component$=Ci;})(); +//# sourceMappingURL=bundle.83f73b43.min.js.map + diff --git a/assets/javascripts/bundle.83f73b43.min.js.map b/assets/javascripts/bundle.83f73b43.min.js.map new file mode 100644 index 000000000..fe920b7d6 --- /dev/null +++ b/assets/javascripts/bundle.83f73b43.min.js.map @@ -0,0 +1,7 @@ +{ + "version": 3, + "sources": ["node_modules/focus-visible/dist/focus-visible.js", "node_modules/escape-html/index.js", "node_modules/clipboard/dist/clipboard.js", "src/templates/assets/javascripts/bundle.ts", "node_modules/tslib/tslib.es6.mjs", "node_modules/rxjs/src/internal/util/isFunction.ts", "node_modules/rxjs/src/internal/util/createErrorClass.ts", "node_modules/rxjs/src/internal/util/UnsubscriptionError.ts", "node_modules/rxjs/src/internal/util/arrRemove.ts", "node_modules/rxjs/src/internal/Subscription.ts", "node_modules/rxjs/src/internal/config.ts", "node_modules/rxjs/src/internal/scheduler/timeoutProvider.ts", "node_modules/rxjs/src/internal/util/reportUnhandledError.ts", "node_modules/rxjs/src/internal/util/noop.ts", "node_modules/rxjs/src/internal/NotificationFactories.ts", "node_modules/rxjs/src/internal/util/errorContext.ts", "node_modules/rxjs/src/internal/Subscriber.ts", "node_modules/rxjs/src/internal/symbol/observable.ts", "node_modules/rxjs/src/internal/util/identity.ts", "node_modules/rxjs/src/internal/util/pipe.ts", "node_modules/rxjs/src/internal/Observable.ts", "node_modules/rxjs/src/internal/util/lift.ts", "node_modules/rxjs/src/internal/operators/OperatorSubscriber.ts", "node_modules/rxjs/src/internal/scheduler/animationFrameProvider.ts", "node_modules/rxjs/src/internal/util/ObjectUnsubscribedError.ts", "node_modules/rxjs/src/internal/Subject.ts", "node_modules/rxjs/src/internal/BehaviorSubject.ts", "node_modules/rxjs/src/internal/scheduler/dateTimestampProvider.ts", "node_modules/rxjs/src/internal/ReplaySubject.ts", "node_modules/rxjs/src/internal/scheduler/Action.ts", "node_modules/rxjs/src/internal/scheduler/intervalProvider.ts", "node_modules/rxjs/src/internal/scheduler/AsyncAction.ts", "node_modules/rxjs/src/internal/Scheduler.ts", "node_modules/rxjs/src/internal/scheduler/AsyncScheduler.ts", "node_modules/rxjs/src/internal/scheduler/async.ts", "node_modules/rxjs/src/internal/scheduler/QueueAction.ts", "node_modules/rxjs/src/internal/scheduler/QueueScheduler.ts", "node_modules/rxjs/src/internal/scheduler/queue.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameAction.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameScheduler.ts", "node_modules/rxjs/src/internal/scheduler/animationFrame.ts", "node_modules/rxjs/src/internal/observable/empty.ts", "node_modules/rxjs/src/internal/util/isScheduler.ts", "node_modules/rxjs/src/internal/util/args.ts", "node_modules/rxjs/src/internal/util/isArrayLike.ts", "node_modules/rxjs/src/internal/util/isPromise.ts", "node_modules/rxjs/src/internal/util/isInteropObservable.ts", "node_modules/rxjs/src/internal/util/isAsyncIterable.ts", "node_modules/rxjs/src/internal/util/throwUnobservableError.ts", "node_modules/rxjs/src/internal/symbol/iterator.ts", "node_modules/rxjs/src/internal/util/isIterable.ts", "node_modules/rxjs/src/internal/util/isReadableStreamLike.ts", "node_modules/rxjs/src/internal/observable/innerFrom.ts", "node_modules/rxjs/src/internal/util/executeSchedule.ts", "node_modules/rxjs/src/internal/operators/observeOn.ts", "node_modules/rxjs/src/internal/operators/subscribeOn.ts", "node_modules/rxjs/src/internal/scheduled/scheduleObservable.ts", "node_modules/rxjs/src/internal/scheduled/schedulePromise.ts", "node_modules/rxjs/src/internal/scheduled/scheduleArray.ts", "node_modules/rxjs/src/internal/scheduled/scheduleIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleAsyncIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleReadableStreamLike.ts", "node_modules/rxjs/src/internal/scheduled/scheduled.ts", "node_modules/rxjs/src/internal/observable/from.ts", "node_modules/rxjs/src/internal/observable/of.ts", "node_modules/rxjs/src/internal/observable/throwError.ts", "node_modules/rxjs/src/internal/util/EmptyError.ts", "node_modules/rxjs/src/internal/util/isDate.ts", "node_modules/rxjs/src/internal/operators/map.ts", "node_modules/rxjs/src/internal/util/mapOneOrManyArgs.ts", "node_modules/rxjs/src/internal/util/argsArgArrayOrObject.ts", "node_modules/rxjs/src/internal/util/createObject.ts", "node_modules/rxjs/src/internal/observable/combineLatest.ts", "node_modules/rxjs/src/internal/operators/mergeInternals.ts", "node_modules/rxjs/src/internal/operators/mergeMap.ts", "node_modules/rxjs/src/internal/operators/mergeAll.ts", "node_modules/rxjs/src/internal/operators/concatAll.ts", "node_modules/rxjs/src/internal/observable/concat.ts", "node_modules/rxjs/src/internal/observable/defer.ts", "node_modules/rxjs/src/internal/observable/fromEvent.ts", "node_modules/rxjs/src/internal/observable/fromEventPattern.ts", "node_modules/rxjs/src/internal/observable/timer.ts", "node_modules/rxjs/src/internal/observable/merge.ts", "node_modules/rxjs/src/internal/observable/never.ts", "node_modules/rxjs/src/internal/util/argsOrArgArray.ts", "node_modules/rxjs/src/internal/operators/filter.ts", "node_modules/rxjs/src/internal/observable/zip.ts", "node_modules/rxjs/src/internal/operators/audit.ts", "node_modules/rxjs/src/internal/operators/auditTime.ts", "node_modules/rxjs/src/internal/operators/bufferCount.ts", "node_modules/rxjs/src/internal/operators/catchError.ts", "node_modules/rxjs/src/internal/operators/scanInternals.ts", "node_modules/rxjs/src/internal/operators/combineLatest.ts", "node_modules/rxjs/src/internal/operators/combineLatestWith.ts", "node_modules/rxjs/src/internal/operators/debounce.ts", "node_modules/rxjs/src/internal/operators/debounceTime.ts", "node_modules/rxjs/src/internal/operators/defaultIfEmpty.ts", "node_modules/rxjs/src/internal/operators/take.ts", "node_modules/rxjs/src/internal/operators/ignoreElements.ts", "node_modules/rxjs/src/internal/operators/mapTo.ts", "node_modules/rxjs/src/internal/operators/delayWhen.ts", "node_modules/rxjs/src/internal/operators/delay.ts", "node_modules/rxjs/src/internal/operators/distinctUntilChanged.ts", "node_modules/rxjs/src/internal/operators/distinctUntilKeyChanged.ts", "node_modules/rxjs/src/internal/operators/throwIfEmpty.ts", "node_modules/rxjs/src/internal/operators/endWith.ts", "node_modules/rxjs/src/internal/operators/finalize.ts", "node_modules/rxjs/src/internal/operators/first.ts", "node_modules/rxjs/src/internal/operators/takeLast.ts", "node_modules/rxjs/src/internal/operators/merge.ts", "node_modules/rxjs/src/internal/operators/mergeWith.ts", "node_modules/rxjs/src/internal/operators/repeat.ts", "node_modules/rxjs/src/internal/operators/scan.ts", "node_modules/rxjs/src/internal/operators/share.ts", "node_modules/rxjs/src/internal/operators/shareReplay.ts", "node_modules/rxjs/src/internal/operators/skip.ts", "node_modules/rxjs/src/internal/operators/skipUntil.ts", "node_modules/rxjs/src/internal/operators/startWith.ts", "node_modules/rxjs/src/internal/operators/switchMap.ts", "node_modules/rxjs/src/internal/operators/takeUntil.ts", "node_modules/rxjs/src/internal/operators/takeWhile.ts", "node_modules/rxjs/src/internal/operators/tap.ts", "node_modules/rxjs/src/internal/operators/throttle.ts", "node_modules/rxjs/src/internal/operators/throttleTime.ts", "node_modules/rxjs/src/internal/operators/withLatestFrom.ts", "node_modules/rxjs/src/internal/operators/zip.ts", "node_modules/rxjs/src/internal/operators/zipWith.ts", "src/templates/assets/javascripts/browser/document/index.ts", "src/templates/assets/javascripts/browser/element/_/index.ts", "src/templates/assets/javascripts/browser/element/focus/index.ts", "src/templates/assets/javascripts/browser/element/hover/index.ts", "src/templates/assets/javascripts/utilities/h/index.ts", "src/templates/assets/javascripts/utilities/round/index.ts", "src/templates/assets/javascripts/browser/script/index.ts", "src/templates/assets/javascripts/browser/element/size/_/index.ts", "src/templates/assets/javascripts/browser/element/size/content/index.ts", "src/templates/assets/javascripts/browser/element/offset/_/index.ts", "src/templates/assets/javascripts/browser/element/offset/content/index.ts", "src/templates/assets/javascripts/browser/element/visibility/index.ts", "src/templates/assets/javascripts/browser/toggle/index.ts", "src/templates/assets/javascripts/browser/keyboard/index.ts", "src/templates/assets/javascripts/browser/location/_/index.ts", "src/templates/assets/javascripts/browser/location/hash/index.ts", "src/templates/assets/javascripts/browser/media/index.ts", "src/templates/assets/javascripts/browser/request/index.ts", "src/templates/assets/javascripts/browser/viewport/offset/index.ts", "src/templates/assets/javascripts/browser/viewport/size/index.ts", "src/templates/assets/javascripts/browser/viewport/_/index.ts", "src/templates/assets/javascripts/browser/viewport/at/index.ts", "src/templates/assets/javascripts/browser/worker/index.ts", "src/templates/assets/javascripts/_/index.ts", "src/templates/assets/javascripts/components/_/index.ts", "src/templates/assets/javascripts/components/announce/index.ts", "src/templates/assets/javascripts/components/consent/index.ts", "src/templates/assets/javascripts/templates/tooltip/index.tsx", "src/templates/assets/javascripts/templates/annotation/index.tsx", "src/templates/assets/javascripts/templates/clipboard/index.tsx", "src/templates/assets/javascripts/templates/search/index.tsx", "src/templates/assets/javascripts/templates/source/index.tsx", "src/templates/assets/javascripts/templates/tabbed/index.tsx", "src/templates/assets/javascripts/templates/table/index.tsx", "src/templates/assets/javascripts/templates/version/index.tsx", "src/templates/assets/javascripts/components/tooltip2/index.ts", "src/templates/assets/javascripts/components/content/annotation/_/index.ts", "src/templates/assets/javascripts/components/content/annotation/list/index.ts", "src/templates/assets/javascripts/components/content/annotation/block/index.ts", "src/templates/assets/javascripts/components/content/code/_/index.ts", "src/templates/assets/javascripts/components/content/details/index.ts", "src/templates/assets/javascripts/components/content/mermaid/index.css", "src/templates/assets/javascripts/components/content/mermaid/index.ts", "src/templates/assets/javascripts/components/content/table/index.ts", "src/templates/assets/javascripts/components/content/tabs/index.ts", "src/templates/assets/javascripts/components/content/_/index.ts", "src/templates/assets/javascripts/components/dialog/index.ts", "src/templates/assets/javascripts/components/tooltip/index.ts", "src/templates/assets/javascripts/components/header/_/index.ts", "src/templates/assets/javascripts/components/header/title/index.ts", "src/templates/assets/javascripts/components/main/index.ts", "src/templates/assets/javascripts/components/palette/index.ts", "src/templates/assets/javascripts/components/progress/index.ts", "src/templates/assets/javascripts/integrations/clipboard/index.ts", "src/templates/assets/javascripts/integrations/sitemap/index.ts", "src/templates/assets/javascripts/integrations/instant/index.ts", "src/templates/assets/javascripts/integrations/search/highlighter/index.ts", "src/templates/assets/javascripts/integrations/search/worker/message/index.ts", "src/templates/assets/javascripts/integrations/search/worker/_/index.ts", "src/templates/assets/javascripts/integrations/version/findurl/index.ts", "src/templates/assets/javascripts/integrations/version/index.ts", "src/templates/assets/javascripts/components/search/query/index.ts", "src/templates/assets/javascripts/components/search/result/index.ts", "src/templates/assets/javascripts/components/search/share/index.ts", "src/templates/assets/javascripts/components/search/suggest/index.ts", "src/templates/assets/javascripts/components/search/_/index.ts", "src/templates/assets/javascripts/components/search/highlight/index.ts", "src/templates/assets/javascripts/components/sidebar/index.ts", "src/templates/assets/javascripts/components/source/facts/github/index.ts", "src/templates/assets/javascripts/components/source/facts/gitlab/index.ts", "src/templates/assets/javascripts/components/source/facts/_/index.ts", "src/templates/assets/javascripts/components/source/_/index.ts", "src/templates/assets/javascripts/components/tabs/index.ts", "src/templates/assets/javascripts/components/toc/index.ts", "src/templates/assets/javascripts/components/top/index.ts", "src/templates/assets/javascripts/patches/ellipsis/index.ts", "src/templates/assets/javascripts/patches/indeterminate/index.ts", "src/templates/assets/javascripts/patches/scrollfix/index.ts", "src/templates/assets/javascripts/patches/scrolllock/index.ts", "src/templates/assets/javascripts/polyfills/index.ts"], + "sourcesContent": ["(function (global, factory) {\n typeof exports === 'object' && typeof module !== 'undefined' ? factory() :\n typeof define === 'function' && define.amd ? define(factory) :\n (factory());\n}(this, (function () { 'use strict';\n\n /**\n * Applies the :focus-visible polyfill at the given scope.\n * A scope in this case is either the top-level Document or a Shadow Root.\n *\n * @param {(Document|ShadowRoot)} scope\n * @see https://github.com/WICG/focus-visible\n */\n function applyFocusVisiblePolyfill(scope) {\n var hadKeyboardEvent = true;\n var hadFocusVisibleRecently = false;\n var hadFocusVisibleRecentlyTimeout = null;\n\n var inputTypesAllowlist = {\n text: true,\n search: true,\n url: true,\n tel: true,\n email: true,\n password: true,\n number: true,\n date: true,\n month: true,\n week: true,\n time: true,\n datetime: true,\n 'datetime-local': true\n };\n\n /**\n * Helper function for legacy browsers and iframes which sometimes focus\n * elements like document, body, and non-interactive SVG.\n * @param {Element} el\n */\n function isValidFocusTarget(el) {\n if (\n el &&\n el !== document &&\n el.nodeName !== 'HTML' &&\n el.nodeName !== 'BODY' &&\n 'classList' in el &&\n 'contains' in el.classList\n ) {\n return true;\n }\n return false;\n }\n\n /**\n * Computes whether the given element should automatically trigger the\n * `focus-visible` class being added, i.e. whether it should always match\n * `:focus-visible` when focused.\n * @param {Element} el\n * @return {boolean}\n */\n function focusTriggersKeyboardModality(el) {\n var type = el.type;\n var tagName = el.tagName;\n\n if (tagName === 'INPUT' && inputTypesAllowlist[type] && !el.readOnly) {\n return true;\n }\n\n if (tagName === 'TEXTAREA' && !el.readOnly) {\n return true;\n }\n\n if (el.isContentEditable) {\n return true;\n }\n\n return false;\n }\n\n /**\n * Add the `focus-visible` class to the given element if it was not added by\n * the author.\n * @param {Element} el\n */\n function addFocusVisibleClass(el) {\n if (el.classList.contains('focus-visible')) {\n return;\n }\n el.classList.add('focus-visible');\n el.setAttribute('data-focus-visible-added', '');\n }\n\n /**\n * Remove the `focus-visible` class from the given element if it was not\n * originally added by the author.\n * @param {Element} el\n */\n function removeFocusVisibleClass(el) {\n if (!el.hasAttribute('data-focus-visible-added')) {\n return;\n }\n el.classList.remove('focus-visible');\n el.removeAttribute('data-focus-visible-added');\n }\n\n /**\n * If the most recent user interaction was via the keyboard;\n * and the key press did not include a meta, alt/option, or control key;\n * then the modality is keyboard. Otherwise, the modality is not keyboard.\n * Apply `focus-visible` to any current active element and keep track\n * of our keyboard modality state with `hadKeyboardEvent`.\n * @param {KeyboardEvent} e\n */\n function onKeyDown(e) {\n if (e.metaKey || e.altKey || e.ctrlKey) {\n return;\n }\n\n if (isValidFocusTarget(scope.activeElement)) {\n addFocusVisibleClass(scope.activeElement);\n }\n\n hadKeyboardEvent = true;\n }\n\n /**\n * If at any point a user clicks with a pointing device, ensure that we change\n * the modality away from keyboard.\n * This avoids the situation where a user presses a key on an already focused\n * element, and then clicks on a different element, focusing it with a\n * pointing device, while we still think we're in keyboard modality.\n * @param {Event} e\n */\n function onPointerDown(e) {\n hadKeyboardEvent = false;\n }\n\n /**\n * On `focus`, add the `focus-visible` class to the target if:\n * - the target received focus as a result of keyboard navigation, or\n * - the event target is an element that will likely require interaction\n * via the keyboard (e.g. a text box)\n * @param {Event} e\n */\n function onFocus(e) {\n // Prevent IE from focusing the document or HTML element.\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (hadKeyboardEvent || focusTriggersKeyboardModality(e.target)) {\n addFocusVisibleClass(e.target);\n }\n }\n\n /**\n * On `blur`, remove the `focus-visible` class from the target.\n * @param {Event} e\n */\n function onBlur(e) {\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (\n e.target.classList.contains('focus-visible') ||\n e.target.hasAttribute('data-focus-visible-added')\n ) {\n // To detect a tab/window switch, we look for a blur event followed\n // rapidly by a visibility change.\n // If we don't see a visibility change within 100ms, it's probably a\n // regular focus change.\n hadFocusVisibleRecently = true;\n window.clearTimeout(hadFocusVisibleRecentlyTimeout);\n hadFocusVisibleRecentlyTimeout = window.setTimeout(function() {\n hadFocusVisibleRecently = false;\n }, 100);\n removeFocusVisibleClass(e.target);\n }\n }\n\n /**\n * If the user changes tabs, keep track of whether or not the previously\n * focused element had .focus-visible.\n * @param {Event} e\n */\n function onVisibilityChange(e) {\n if (document.visibilityState === 'hidden') {\n // If the tab becomes active again, the browser will handle calling focus\n // on the element (Safari actually calls it twice).\n // If this tab change caused a blur on an element with focus-visible,\n // re-apply the class when the user switches back to the tab.\n if (hadFocusVisibleRecently) {\n hadKeyboardEvent = true;\n }\n addInitialPointerMoveListeners();\n }\n }\n\n /**\n * Add a group of listeners to detect usage of any pointing devices.\n * These listeners will be added when the polyfill first loads, and anytime\n * the window is blurred, so that they are active when the window regains\n * focus.\n */\n function addInitialPointerMoveListeners() {\n document.addEventListener('mousemove', onInitialPointerMove);\n document.addEventListener('mousedown', onInitialPointerMove);\n document.addEventListener('mouseup', onInitialPointerMove);\n document.addEventListener('pointermove', onInitialPointerMove);\n document.addEventListener('pointerdown', onInitialPointerMove);\n document.addEventListener('pointerup', onInitialPointerMove);\n document.addEventListener('touchmove', onInitialPointerMove);\n document.addEventListener('touchstart', onInitialPointerMove);\n document.addEventListener('touchend', onInitialPointerMove);\n }\n\n function removeInitialPointerMoveListeners() {\n document.removeEventListener('mousemove', onInitialPointerMove);\n document.removeEventListener('mousedown', onInitialPointerMove);\n document.removeEventListener('mouseup', onInitialPointerMove);\n document.removeEventListener('pointermove', onInitialPointerMove);\n document.removeEventListener('pointerdown', onInitialPointerMove);\n document.removeEventListener('pointerup', onInitialPointerMove);\n document.removeEventListener('touchmove', onInitialPointerMove);\n document.removeEventListener('touchstart', onInitialPointerMove);\n document.removeEventListener('touchend', onInitialPointerMove);\n }\n\n /**\n * When the polfyill first loads, assume the user is in keyboard modality.\n * If any event is received from a pointing device (e.g. mouse, pointer,\n * touch), turn off keyboard modality.\n * This accounts for situations where focus enters the page from the URL bar.\n * @param {Event} e\n */\n function onInitialPointerMove(e) {\n // Work around a Safari quirk that fires a mousemove on whenever the\n // window blurs, even if you're tabbing out of the page. \u00AF\\_(\u30C4)_/\u00AF\n if (e.target.nodeName && e.target.nodeName.toLowerCase() === 'html') {\n return;\n }\n\n hadKeyboardEvent = false;\n removeInitialPointerMoveListeners();\n }\n\n // For some kinds of state, we are interested in changes at the global scope\n // only. For example, global pointer input, global key presses and global\n // visibility change should affect the state at every scope:\n document.addEventListener('keydown', onKeyDown, true);\n document.addEventListener('mousedown', onPointerDown, true);\n document.addEventListener('pointerdown', onPointerDown, true);\n document.addEventListener('touchstart', onPointerDown, true);\n document.addEventListener('visibilitychange', onVisibilityChange, true);\n\n addInitialPointerMoveListeners();\n\n // For focus and blur, we specifically care about state changes in the local\n // scope. This is because focus / blur events that originate from within a\n // shadow root are not re-dispatched from the host element if it was already\n // the active element in its own scope:\n scope.addEventListener('focus', onFocus, true);\n scope.addEventListener('blur', onBlur, true);\n\n // We detect that a node is a ShadowRoot by ensuring that it is a\n // DocumentFragment and also has a host property. This check covers native\n // implementation and polyfill implementation transparently. If we only cared\n // about the native implementation, we could just check if the scope was\n // an instance of a ShadowRoot.\n if (scope.nodeType === Node.DOCUMENT_FRAGMENT_NODE && scope.host) {\n // Since a ShadowRoot is a special kind of DocumentFragment, it does not\n // have a root element to add a class to. So, we add this attribute to the\n // host element instead:\n scope.host.setAttribute('data-js-focus-visible', '');\n } else if (scope.nodeType === Node.DOCUMENT_NODE) {\n document.documentElement.classList.add('js-focus-visible');\n document.documentElement.setAttribute('data-js-focus-visible', '');\n }\n }\n\n // It is important to wrap all references to global window and document in\n // these checks to support server-side rendering use cases\n // @see https://github.com/WICG/focus-visible/issues/199\n if (typeof window !== 'undefined' && typeof document !== 'undefined') {\n // Make the polyfill helper globally available. This can be used as a signal\n // to interested libraries that wish to coordinate with the polyfill for e.g.,\n // applying the polyfill to a shadow root:\n window.applyFocusVisiblePolyfill = applyFocusVisiblePolyfill;\n\n // Notify interested libraries of the polyfill's presence, in case the\n // polyfill was loaded lazily:\n var event;\n\n try {\n event = new CustomEvent('focus-visible-polyfill-ready');\n } catch (error) {\n // IE11 does not support using CustomEvent as a constructor directly:\n event = document.createEvent('CustomEvent');\n event.initCustomEvent('focus-visible-polyfill-ready', false, false, {});\n }\n\n window.dispatchEvent(event);\n }\n\n if (typeof document !== 'undefined') {\n // Apply the polyfill to the global document, so that no JavaScript\n // coordination is required to use the polyfill in the top-level document:\n applyFocusVisiblePolyfill(document);\n }\n\n})));\n", "/*!\n * escape-html\n * Copyright(c) 2012-2013 TJ Holowaychuk\n * Copyright(c) 2015 Andreas Lubbe\n * Copyright(c) 2015 Tiancheng \"Timothy\" Gu\n * MIT Licensed\n */\n\n'use strict';\n\n/**\n * Module variables.\n * @private\n */\n\nvar matchHtmlRegExp = /[\"'&<>]/;\n\n/**\n * Module exports.\n * @public\n */\n\nmodule.exports = escapeHtml;\n\n/**\n * Escape special characters in the given string of html.\n *\n * @param {string} string The string to escape for inserting into HTML\n * @return {string}\n * @public\n */\n\nfunction escapeHtml(string) {\n var str = '' + string;\n var match = matchHtmlRegExp.exec(str);\n\n if (!match) {\n return str;\n }\n\n var escape;\n var html = '';\n var index = 0;\n var lastIndex = 0;\n\n for (index = match.index; index < str.length; index++) {\n switch (str.charCodeAt(index)) {\n case 34: // \"\n escape = '"';\n break;\n case 38: // &\n escape = '&';\n break;\n case 39: // '\n escape = ''';\n break;\n case 60: // <\n escape = '<';\n break;\n case 62: // >\n escape = '>';\n break;\n default:\n continue;\n }\n\n if (lastIndex !== index) {\n html += str.substring(lastIndex, index);\n }\n\n lastIndex = index + 1;\n html += escape;\n }\n\n return lastIndex !== index\n ? html + str.substring(lastIndex, index)\n : html;\n}\n", "/*!\n * clipboard.js v2.0.11\n * https://clipboardjs.com/\n *\n * Licensed MIT \u00A9 Zeno Rocha\n */\n(function webpackUniversalModuleDefinition(root, factory) {\n\tif(typeof exports === 'object' && typeof module === 'object')\n\t\tmodule.exports = factory();\n\telse if(typeof define === 'function' && define.amd)\n\t\tdefine([], factory);\n\telse if(typeof exports === 'object')\n\t\texports[\"ClipboardJS\"] = factory();\n\telse\n\t\troot[\"ClipboardJS\"] = factory();\n})(this, function() {\nreturn /******/ (function() { // webpackBootstrap\n/******/ \tvar __webpack_modules__ = ({\n\n/***/ 686:\n/***/ (function(__unused_webpack_module, __webpack_exports__, __webpack_require__) {\n\n\"use strict\";\n\n// EXPORTS\n__webpack_require__.d(__webpack_exports__, {\n \"default\": function() { return /* binding */ clipboard; }\n});\n\n// EXTERNAL MODULE: ./node_modules/tiny-emitter/index.js\nvar tiny_emitter = __webpack_require__(279);\nvar tiny_emitter_default = /*#__PURE__*/__webpack_require__.n(tiny_emitter);\n// EXTERNAL MODULE: ./node_modules/good-listener/src/listen.js\nvar listen = __webpack_require__(370);\nvar listen_default = /*#__PURE__*/__webpack_require__.n(listen);\n// EXTERNAL MODULE: ./node_modules/select/src/select.js\nvar src_select = __webpack_require__(817);\nvar select_default = /*#__PURE__*/__webpack_require__.n(src_select);\n;// CONCATENATED MODULE: ./src/common/command.js\n/**\n * Executes a given operation type.\n * @param {String} type\n * @return {Boolean}\n */\nfunction command(type) {\n try {\n return document.execCommand(type);\n } catch (err) {\n return false;\n }\n}\n;// CONCATENATED MODULE: ./src/actions/cut.js\n\n\n/**\n * Cut action wrapper.\n * @param {String|HTMLElement} target\n * @return {String}\n */\n\nvar ClipboardActionCut = function ClipboardActionCut(target) {\n var selectedText = select_default()(target);\n command('cut');\n return selectedText;\n};\n\n/* harmony default export */ var actions_cut = (ClipboardActionCut);\n;// CONCATENATED MODULE: ./src/common/create-fake-element.js\n/**\n * Creates a fake textarea element with a value.\n * @param {String} value\n * @return {HTMLElement}\n */\nfunction createFakeElement(value) {\n var isRTL = document.documentElement.getAttribute('dir') === 'rtl';\n var fakeElement = document.createElement('textarea'); // Prevent zooming on iOS\n\n fakeElement.style.fontSize = '12pt'; // Reset box model\n\n fakeElement.style.border = '0';\n fakeElement.style.padding = '0';\n fakeElement.style.margin = '0'; // Move element out of screen horizontally\n\n fakeElement.style.position = 'absolute';\n fakeElement.style[isRTL ? 'right' : 'left'] = '-9999px'; // Move element to the same position vertically\n\n var yPosition = window.pageYOffset || document.documentElement.scrollTop;\n fakeElement.style.top = \"\".concat(yPosition, \"px\");\n fakeElement.setAttribute('readonly', '');\n fakeElement.value = value;\n return fakeElement;\n}\n;// CONCATENATED MODULE: ./src/actions/copy.js\n\n\n\n/**\n * Create fake copy action wrapper using a fake element.\n * @param {String} target\n * @param {Object} options\n * @return {String}\n */\n\nvar fakeCopyAction = function fakeCopyAction(value, options) {\n var fakeElement = createFakeElement(value);\n options.container.appendChild(fakeElement);\n var selectedText = select_default()(fakeElement);\n command('copy');\n fakeElement.remove();\n return selectedText;\n};\n/**\n * Copy action wrapper.\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @return {String}\n */\n\n\nvar ClipboardActionCopy = function ClipboardActionCopy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n var selectedText = '';\n\n if (typeof target === 'string') {\n selectedText = fakeCopyAction(target, options);\n } else if (target instanceof HTMLInputElement && !['text', 'search', 'url', 'tel', 'password'].includes(target === null || target === void 0 ? void 0 : target.type)) {\n // If input type doesn't support `setSelectionRange`. Simulate it. https://developer.mozilla.org/en-US/docs/Web/API/HTMLInputElement/setSelectionRange\n selectedText = fakeCopyAction(target.value, options);\n } else {\n selectedText = select_default()(target);\n command('copy');\n }\n\n return selectedText;\n};\n\n/* harmony default export */ var actions_copy = (ClipboardActionCopy);\n;// CONCATENATED MODULE: ./src/actions/default.js\nfunction _typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return _typeof(obj); }\n\n\n\n/**\n * Inner function which performs selection from either `text` or `target`\n * properties and then executes copy or cut operations.\n * @param {Object} options\n */\n\nvar ClipboardActionDefault = function ClipboardActionDefault() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n // Defines base properties passed from constructor.\n var _options$action = options.action,\n action = _options$action === void 0 ? 'copy' : _options$action,\n container = options.container,\n target = options.target,\n text = options.text; // Sets the `action` to be performed which can be either 'copy' or 'cut'.\n\n if (action !== 'copy' && action !== 'cut') {\n throw new Error('Invalid \"action\" value, use either \"copy\" or \"cut\"');\n } // Sets the `target` property using an element that will be have its content copied.\n\n\n if (target !== undefined) {\n if (target && _typeof(target) === 'object' && target.nodeType === 1) {\n if (action === 'copy' && target.hasAttribute('disabled')) {\n throw new Error('Invalid \"target\" attribute. Please use \"readonly\" instead of \"disabled\" attribute');\n }\n\n if (action === 'cut' && (target.hasAttribute('readonly') || target.hasAttribute('disabled'))) {\n throw new Error('Invalid \"target\" attribute. You can\\'t cut text from elements with \"readonly\" or \"disabled\" attributes');\n }\n } else {\n throw new Error('Invalid \"target\" value, use a valid Element');\n }\n } // Define selection strategy based on `text` property.\n\n\n if (text) {\n return actions_copy(text, {\n container: container\n });\n } // Defines which selection strategy based on `target` property.\n\n\n if (target) {\n return action === 'cut' ? actions_cut(target) : actions_copy(target, {\n container: container\n });\n }\n};\n\n/* harmony default export */ var actions_default = (ClipboardActionDefault);\n;// CONCATENATED MODULE: ./src/clipboard.js\nfunction clipboard_typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { clipboard_typeof = function _typeof(obj) { return typeof obj; }; } else { clipboard_typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return clipboard_typeof(obj); }\n\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }\n\nfunction _inherits(subClass, superClass) { if (typeof superClass !== \"function\" && superClass !== null) { throw new TypeError(\"Super expression must either be null or a function\"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }\n\nfunction _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }\n\nfunction _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }\n\nfunction _possibleConstructorReturn(self, call) { if (call && (clipboard_typeof(call) === \"object\" || typeof call === \"function\")) { return call; } return _assertThisInitialized(self); }\n\nfunction _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\"); } return self; }\n\nfunction _isNativeReflectConstruct() { if (typeof Reflect === \"undefined\" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === \"function\") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } }\n\nfunction _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }\n\n\n\n\n\n\n/**\n * Helper function to retrieve attribute value.\n * @param {String} suffix\n * @param {Element} element\n */\n\nfunction getAttributeValue(suffix, element) {\n var attribute = \"data-clipboard-\".concat(suffix);\n\n if (!element.hasAttribute(attribute)) {\n return;\n }\n\n return element.getAttribute(attribute);\n}\n/**\n * Base class which takes one or more elements, adds event listeners to them,\n * and instantiates a new `ClipboardAction` on each click.\n */\n\n\nvar Clipboard = /*#__PURE__*/function (_Emitter) {\n _inherits(Clipboard, _Emitter);\n\n var _super = _createSuper(Clipboard);\n\n /**\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n * @param {Object} options\n */\n function Clipboard(trigger, options) {\n var _this;\n\n _classCallCheck(this, Clipboard);\n\n _this = _super.call(this);\n\n _this.resolveOptions(options);\n\n _this.listenClick(trigger);\n\n return _this;\n }\n /**\n * Defines if attributes would be resolved using internal setter functions\n * or custom functions that were passed in the constructor.\n * @param {Object} options\n */\n\n\n _createClass(Clipboard, [{\n key: \"resolveOptions\",\n value: function resolveOptions() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n this.action = typeof options.action === 'function' ? options.action : this.defaultAction;\n this.target = typeof options.target === 'function' ? options.target : this.defaultTarget;\n this.text = typeof options.text === 'function' ? options.text : this.defaultText;\n this.container = clipboard_typeof(options.container) === 'object' ? options.container : document.body;\n }\n /**\n * Adds a click event listener to the passed trigger.\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n */\n\n }, {\n key: \"listenClick\",\n value: function listenClick(trigger) {\n var _this2 = this;\n\n this.listener = listen_default()(trigger, 'click', function (e) {\n return _this2.onClick(e);\n });\n }\n /**\n * Defines a new `ClipboardAction` on each click event.\n * @param {Event} e\n */\n\n }, {\n key: \"onClick\",\n value: function onClick(e) {\n var trigger = e.delegateTarget || e.currentTarget;\n var action = this.action(trigger) || 'copy';\n var text = actions_default({\n action: action,\n container: this.container,\n target: this.target(trigger),\n text: this.text(trigger)\n }); // Fires an event based on the copy operation result.\n\n this.emit(text ? 'success' : 'error', {\n action: action,\n text: text,\n trigger: trigger,\n clearSelection: function clearSelection() {\n if (trigger) {\n trigger.focus();\n }\n\n window.getSelection().removeAllRanges();\n }\n });\n }\n /**\n * Default `action` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultAction\",\n value: function defaultAction(trigger) {\n return getAttributeValue('action', trigger);\n }\n /**\n * Default `target` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultTarget\",\n value: function defaultTarget(trigger) {\n var selector = getAttributeValue('target', trigger);\n\n if (selector) {\n return document.querySelector(selector);\n }\n }\n /**\n * Allow fire programmatically a copy action\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @returns Text copied.\n */\n\n }, {\n key: \"defaultText\",\n\n /**\n * Default `text` lookup function.\n * @param {Element} trigger\n */\n value: function defaultText(trigger) {\n return getAttributeValue('text', trigger);\n }\n /**\n * Destroy lifecycle.\n */\n\n }, {\n key: \"destroy\",\n value: function destroy() {\n this.listener.destroy();\n }\n }], [{\n key: \"copy\",\n value: function copy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n return actions_copy(target, options);\n }\n /**\n * Allow fire programmatically a cut action\n * @param {String|HTMLElement} target\n * @returns Text cutted.\n */\n\n }, {\n key: \"cut\",\n value: function cut(target) {\n return actions_cut(target);\n }\n /**\n * Returns the support of the given action, or all actions if no action is\n * given.\n * @param {String} [action]\n */\n\n }, {\n key: \"isSupported\",\n value: function isSupported() {\n var action = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ['copy', 'cut'];\n var actions = typeof action === 'string' ? [action] : action;\n var support = !!document.queryCommandSupported;\n actions.forEach(function (action) {\n support = support && !!document.queryCommandSupported(action);\n });\n return support;\n }\n }]);\n\n return Clipboard;\n}((tiny_emitter_default()));\n\n/* harmony default export */ var clipboard = (Clipboard);\n\n/***/ }),\n\n/***/ 828:\n/***/ (function(module) {\n\nvar DOCUMENT_NODE_TYPE = 9;\n\n/**\n * A polyfill for Element.matches()\n */\nif (typeof Element !== 'undefined' && !Element.prototype.matches) {\n var proto = Element.prototype;\n\n proto.matches = proto.matchesSelector ||\n proto.mozMatchesSelector ||\n proto.msMatchesSelector ||\n proto.oMatchesSelector ||\n proto.webkitMatchesSelector;\n}\n\n/**\n * Finds the closest parent that matches a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @return {Function}\n */\nfunction closest (element, selector) {\n while (element && element.nodeType !== DOCUMENT_NODE_TYPE) {\n if (typeof element.matches === 'function' &&\n element.matches(selector)) {\n return element;\n }\n element = element.parentNode;\n }\n}\n\nmodule.exports = closest;\n\n\n/***/ }),\n\n/***/ 438:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar closest = __webpack_require__(828);\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction _delegate(element, selector, type, callback, useCapture) {\n var listenerFn = listener.apply(this, arguments);\n\n element.addEventListener(type, listenerFn, useCapture);\n\n return {\n destroy: function() {\n element.removeEventListener(type, listenerFn, useCapture);\n }\n }\n}\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element|String|Array} [elements]\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction delegate(elements, selector, type, callback, useCapture) {\n // Handle the regular Element usage\n if (typeof elements.addEventListener === 'function') {\n return _delegate.apply(null, arguments);\n }\n\n // Handle Element-less usage, it defaults to global delegation\n if (typeof type === 'function') {\n // Use `document` as the first parameter, then apply arguments\n // This is a short way to .unshift `arguments` without running into deoptimizations\n return _delegate.bind(null, document).apply(null, arguments);\n }\n\n // Handle Selector-based usage\n if (typeof elements === 'string') {\n elements = document.querySelectorAll(elements);\n }\n\n // Handle Array-like based usage\n return Array.prototype.map.call(elements, function (element) {\n return _delegate(element, selector, type, callback, useCapture);\n });\n}\n\n/**\n * Finds closest match and invokes callback.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Function}\n */\nfunction listener(element, selector, type, callback) {\n return function(e) {\n e.delegateTarget = closest(e.target, selector);\n\n if (e.delegateTarget) {\n callback.call(element, e);\n }\n }\n}\n\nmodule.exports = delegate;\n\n\n/***/ }),\n\n/***/ 879:\n/***/ (function(__unused_webpack_module, exports) {\n\n/**\n * Check if argument is a HTML element.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.node = function(value) {\n return value !== undefined\n && value instanceof HTMLElement\n && value.nodeType === 1;\n};\n\n/**\n * Check if argument is a list of HTML elements.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.nodeList = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return value !== undefined\n && (type === '[object NodeList]' || type === '[object HTMLCollection]')\n && ('length' in value)\n && (value.length === 0 || exports.node(value[0]));\n};\n\n/**\n * Check if argument is a string.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.string = function(value) {\n return typeof value === 'string'\n || value instanceof String;\n};\n\n/**\n * Check if argument is a function.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.fn = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return type === '[object Function]';\n};\n\n\n/***/ }),\n\n/***/ 370:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar is = __webpack_require__(879);\nvar delegate = __webpack_require__(438);\n\n/**\n * Validates all params and calls the right\n * listener function based on its target type.\n *\n * @param {String|HTMLElement|HTMLCollection|NodeList} target\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listen(target, type, callback) {\n if (!target && !type && !callback) {\n throw new Error('Missing required arguments');\n }\n\n if (!is.string(type)) {\n throw new TypeError('Second argument must be a String');\n }\n\n if (!is.fn(callback)) {\n throw new TypeError('Third argument must be a Function');\n }\n\n if (is.node(target)) {\n return listenNode(target, type, callback);\n }\n else if (is.nodeList(target)) {\n return listenNodeList(target, type, callback);\n }\n else if (is.string(target)) {\n return listenSelector(target, type, callback);\n }\n else {\n throw new TypeError('First argument must be a String, HTMLElement, HTMLCollection, or NodeList');\n }\n}\n\n/**\n * Adds an event listener to a HTML element\n * and returns a remove listener function.\n *\n * @param {HTMLElement} node\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNode(node, type, callback) {\n node.addEventListener(type, callback);\n\n return {\n destroy: function() {\n node.removeEventListener(type, callback);\n }\n }\n}\n\n/**\n * Add an event listener to a list of HTML elements\n * and returns a remove listener function.\n *\n * @param {NodeList|HTMLCollection} nodeList\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNodeList(nodeList, type, callback) {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.addEventListener(type, callback);\n });\n\n return {\n destroy: function() {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.removeEventListener(type, callback);\n });\n }\n }\n}\n\n/**\n * Add an event listener to a selector\n * and returns a remove listener function.\n *\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenSelector(selector, type, callback) {\n return delegate(document.body, selector, type, callback);\n}\n\nmodule.exports = listen;\n\n\n/***/ }),\n\n/***/ 817:\n/***/ (function(module) {\n\nfunction select(element) {\n var selectedText;\n\n if (element.nodeName === 'SELECT') {\n element.focus();\n\n selectedText = element.value;\n }\n else if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') {\n var isReadOnly = element.hasAttribute('readonly');\n\n if (!isReadOnly) {\n element.setAttribute('readonly', '');\n }\n\n element.select();\n element.setSelectionRange(0, element.value.length);\n\n if (!isReadOnly) {\n element.removeAttribute('readonly');\n }\n\n selectedText = element.value;\n }\n else {\n if (element.hasAttribute('contenteditable')) {\n element.focus();\n }\n\n var selection = window.getSelection();\n var range = document.createRange();\n\n range.selectNodeContents(element);\n selection.removeAllRanges();\n selection.addRange(range);\n\n selectedText = selection.toString();\n }\n\n return selectedText;\n}\n\nmodule.exports = select;\n\n\n/***/ }),\n\n/***/ 279:\n/***/ (function(module) {\n\nfunction E () {\n // Keep this empty so it's easier to inherit from\n // (via https://github.com/lipsmack from https://github.com/scottcorgan/tiny-emitter/issues/3)\n}\n\nE.prototype = {\n on: function (name, callback, ctx) {\n var e = this.e || (this.e = {});\n\n (e[name] || (e[name] = [])).push({\n fn: callback,\n ctx: ctx\n });\n\n return this;\n },\n\n once: function (name, callback, ctx) {\n var self = this;\n function listener () {\n self.off(name, listener);\n callback.apply(ctx, arguments);\n };\n\n listener._ = callback\n return this.on(name, listener, ctx);\n },\n\n emit: function (name) {\n var data = [].slice.call(arguments, 1);\n var evtArr = ((this.e || (this.e = {}))[name] || []).slice();\n var i = 0;\n var len = evtArr.length;\n\n for (i; i < len; i++) {\n evtArr[i].fn.apply(evtArr[i].ctx, data);\n }\n\n return this;\n },\n\n off: function (name, callback) {\n var e = this.e || (this.e = {});\n var evts = e[name];\n var liveEvents = [];\n\n if (evts && callback) {\n for (var i = 0, len = evts.length; i < len; i++) {\n if (evts[i].fn !== callback && evts[i].fn._ !== callback)\n liveEvents.push(evts[i]);\n }\n }\n\n // Remove event from queue to prevent memory leak\n // Suggested by https://github.com/lazd\n // Ref: https://github.com/scottcorgan/tiny-emitter/commit/c6ebfaa9bc973b33d110a84a307742b7cf94c953#commitcomment-5024910\n\n (liveEvents.length)\n ? e[name] = liveEvents\n : delete e[name];\n\n return this;\n }\n};\n\nmodule.exports = E;\nmodule.exports.TinyEmitter = E;\n\n\n/***/ })\n\n/******/ \t});\n/************************************************************************/\n/******/ \t// The module cache\n/******/ \tvar __webpack_module_cache__ = {};\n/******/ \t\n/******/ \t// The require function\n/******/ \tfunction __webpack_require__(moduleId) {\n/******/ \t\t// Check if module is in cache\n/******/ \t\tif(__webpack_module_cache__[moduleId]) {\n/******/ \t\t\treturn __webpack_module_cache__[moduleId].exports;\n/******/ \t\t}\n/******/ \t\t// Create a new module (and put it into the cache)\n/******/ \t\tvar module = __webpack_module_cache__[moduleId] = {\n/******/ \t\t\t// no module.id needed\n/******/ \t\t\t// no module.loaded needed\n/******/ \t\t\texports: {}\n/******/ \t\t};\n/******/ \t\n/******/ \t\t// Execute the module function\n/******/ \t\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n/******/ \t\n/******/ \t\t// Return the exports of the module\n/******/ \t\treturn module.exports;\n/******/ \t}\n/******/ \t\n/************************************************************************/\n/******/ \t/* webpack/runtime/compat get default export */\n/******/ \t!function() {\n/******/ \t\t// getDefaultExport function for compatibility with non-harmony modules\n/******/ \t\t__webpack_require__.n = function(module) {\n/******/ \t\t\tvar getter = module && module.__esModule ?\n/******/ \t\t\t\tfunction() { return module['default']; } :\n/******/ \t\t\t\tfunction() { return module; };\n/******/ \t\t\t__webpack_require__.d(getter, { a: getter });\n/******/ \t\t\treturn getter;\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/define property getters */\n/******/ \t!function() {\n/******/ \t\t// define getter functions for harmony exports\n/******/ \t\t__webpack_require__.d = function(exports, definition) {\n/******/ \t\t\tfor(var key in definition) {\n/******/ \t\t\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n/******/ \t\t\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n/******/ \t\t\t\t}\n/******/ \t\t\t}\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/hasOwnProperty shorthand */\n/******/ \t!function() {\n/******/ \t\t__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }\n/******/ \t}();\n/******/ \t\n/************************************************************************/\n/******/ \t// module exports must be returned from runtime so entry inlining is disabled\n/******/ \t// startup\n/******/ \t// Load entry module and return exports\n/******/ \treturn __webpack_require__(686);\n/******/ })()\n.default;\n});", "/*\n * Copyright (c) 2016-2024 Martin Donath \n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\nimport \"focus-visible\"\n\nimport {\n EMPTY,\n NEVER,\n Observable,\n Subject,\n defer,\n delay,\n filter,\n map,\n merge,\n mergeWith,\n shareReplay,\n switchMap\n} from \"rxjs\"\n\nimport { configuration, feature } from \"./_\"\nimport {\n at,\n getActiveElement,\n getOptionalElement,\n requestJSON,\n setLocation,\n setToggle,\n watchDocument,\n watchKeyboard,\n watchLocation,\n watchLocationTarget,\n watchMedia,\n watchPrint,\n watchScript,\n watchViewport\n} from \"./browser\"\nimport {\n getComponentElement,\n getComponentElements,\n mountAnnounce,\n mountBackToTop,\n mountConsent,\n mountContent,\n mountDialog,\n mountHeader,\n mountHeaderTitle,\n mountPalette,\n mountProgress,\n mountSearch,\n mountSearchHiglight,\n mountSidebar,\n mountSource,\n mountTableOfContents,\n mountTabs,\n watchHeader,\n watchMain\n} from \"./components\"\nimport {\n SearchIndex,\n setupClipboardJS,\n setupInstantNavigation,\n setupVersionSelector\n} from \"./integrations\"\nimport {\n patchEllipsis,\n patchIndeterminate,\n patchScrollfix,\n patchScrolllock\n} from \"./patches\"\nimport \"./polyfills\"\n\n/* ----------------------------------------------------------------------------\n * Functions - @todo refactor\n * ------------------------------------------------------------------------- */\n\n/**\n * Fetch search index\n *\n * @returns Search index observable\n */\nfunction fetchSearchIndex(): Observable {\n if (location.protocol === \"file:\") {\n return watchScript(\n `${new URL(\"search/search_index.js\", config.base)}`\n )\n .pipe(\n // @ts-ignore - @todo fix typings\n map(() => __index),\n shareReplay(1)\n )\n } else {\n return requestJSON(\n new URL(\"search/search_index.json\", config.base)\n )\n }\n}\n\n/* ----------------------------------------------------------------------------\n * Application\n * ------------------------------------------------------------------------- */\n\n/* Yay, JavaScript is available */\ndocument.documentElement.classList.remove(\"no-js\")\ndocument.documentElement.classList.add(\"js\")\n\n/* Set up navigation observables and subjects */\nconst document$ = watchDocument()\nconst location$ = watchLocation()\nconst target$ = watchLocationTarget(location$)\nconst keyboard$ = watchKeyboard()\n\n/* Set up media observables */\nconst viewport$ = watchViewport()\nconst tablet$ = watchMedia(\"(min-width: 960px)\")\nconst screen$ = watchMedia(\"(min-width: 1220px)\")\nconst print$ = watchPrint()\n\n/* Retrieve search index, if search is enabled */\nconst config = configuration()\nconst index$ = document.forms.namedItem(\"search\")\n ? fetchSearchIndex()\n : NEVER\n\n/* Set up Clipboard.js integration */\nconst alert$ = new Subject()\nsetupClipboardJS({ alert$ })\n\n/* Set up progress indicator */\nconst progress$ = new Subject()\n\n/* Set up instant navigation, if enabled */\nif (feature(\"navigation.instant\"))\n setupInstantNavigation({ location$, viewport$, progress$ })\n .subscribe(document$)\n\n/* Set up version selector */\nif (config.version?.provider === \"mike\")\n setupVersionSelector({ document$ })\n\n/* Always close drawer and search on navigation */\nmerge(location$, target$)\n .pipe(\n delay(125)\n )\n .subscribe(() => {\n setToggle(\"drawer\", false)\n setToggle(\"search\", false)\n })\n\n/* Set up global keyboard handlers */\nkeyboard$\n .pipe(\n filter(({ mode }) => mode === \"global\")\n )\n .subscribe(key => {\n switch (key.type) {\n\n /* Go to previous page */\n case \"p\":\n case \",\":\n const prev = getOptionalElement(\"link[rel=prev]\")\n if (typeof prev !== \"undefined\")\n setLocation(prev)\n break\n\n /* Go to next page */\n case \"n\":\n case \".\":\n const next = getOptionalElement(\"link[rel=next]\")\n if (typeof next !== \"undefined\")\n setLocation(next)\n break\n\n /* Expand navigation, see https://bit.ly/3ZjG5io */\n case \"Enter\":\n const active = getActiveElement()\n if (active instanceof HTMLLabelElement)\n active.click()\n }\n })\n\n/* Set up patches */\npatchEllipsis({ viewport$, document$ })\npatchIndeterminate({ document$, tablet$ })\npatchScrollfix({ document$ })\npatchScrolllock({ viewport$, tablet$ })\n\n/* Set up header and main area observable */\nconst header$ = watchHeader(getComponentElement(\"header\"), { viewport$ })\nconst main$ = document$\n .pipe(\n map(() => getComponentElement(\"main\")),\n switchMap(el => watchMain(el, { viewport$, header$ })),\n shareReplay(1)\n )\n\n/* Set up control component observables */\nconst control$ = merge(\n\n /* Consent */\n ...getComponentElements(\"consent\")\n .map(el => mountConsent(el, { target$ })),\n\n /* Dialog */\n ...getComponentElements(\"dialog\")\n .map(el => mountDialog(el, { alert$ })),\n\n /* Color palette */\n ...getComponentElements(\"palette\")\n .map(el => mountPalette(el)),\n\n /* Progress bar */\n ...getComponentElements(\"progress\")\n .map(el => mountProgress(el, { progress$ })),\n\n /* Search */\n ...getComponentElements(\"search\")\n .map(el => mountSearch(el, { index$, keyboard$ })),\n\n /* Repository information */\n ...getComponentElements(\"source\")\n .map(el => mountSource(el))\n)\n\n/* Set up content component observables */\nconst content$ = defer(() => merge(\n\n /* Announcement bar */\n ...getComponentElements(\"announce\")\n .map(el => mountAnnounce(el)),\n\n /* Content */\n ...getComponentElements(\"content\")\n .map(el => mountContent(el, { viewport$, target$, print$ })),\n\n /* Search highlighting */\n ...getComponentElements(\"content\")\n .map(el => feature(\"search.highlight\")\n ? mountSearchHiglight(el, { index$, location$ })\n : EMPTY\n ),\n\n /* Header */\n ...getComponentElements(\"header\")\n .map(el => mountHeader(el, { viewport$, header$, main$ })),\n\n /* Header title */\n ...getComponentElements(\"header-title\")\n .map(el => mountHeaderTitle(el, { viewport$, header$ })),\n\n /* Sidebar */\n ...getComponentElements(\"sidebar\")\n .map(el => el.getAttribute(\"data-md-type\") === \"navigation\"\n ? at(screen$, () => mountSidebar(el, { viewport$, header$, main$ }))\n : at(tablet$, () => mountSidebar(el, { viewport$, header$, main$ }))\n ),\n\n /* Navigation tabs */\n ...getComponentElements(\"tabs\")\n .map(el => mountTabs(el, { viewport$, header$ })),\n\n /* Table of contents */\n ...getComponentElements(\"toc\")\n .map(el => mountTableOfContents(el, {\n viewport$, header$, main$, target$\n })),\n\n /* Back-to-top button */\n ...getComponentElements(\"top\")\n .map(el => mountBackToTop(el, { viewport$, header$, main$, target$ }))\n))\n\n/* Set up component observables */\nconst component$ = document$\n .pipe(\n switchMap(() => content$),\n mergeWith(control$),\n shareReplay(1)\n )\n\n/* Subscribe to all components */\ncomponent$.subscribe()\n\n/* ----------------------------------------------------------------------------\n * Exports\n * ------------------------------------------------------------------------- */\n\nwindow.document$ = document$ /* Document observable */\nwindow.location$ = location$ /* Location subject */\nwindow.target$ = target$ /* Location target observable */\nwindow.keyboard$ = keyboard$ /* Keyboard observable */\nwindow.viewport$ = viewport$ /* Viewport observable */\nwindow.tablet$ = tablet$ /* Media tablet observable */\nwindow.screen$ = screen$ /* Media screen observable */\nwindow.print$ = print$ /* Media print observable */\nwindow.alert$ = alert$ /* Alert subject */\nwindow.progress$ = progress$ /* Progress indicator subject */\nwindow.component$ = component$ /* Component observable */\n", "/******************************************************************************\nCopyright (c) Microsoft Corporation.\n\nPermission to use, copy, modify, and/or distribute this software for any\npurpose with or without fee is hereby granted.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH\nREGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY\nAND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,\nINDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM\nLOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR\nOTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\nPERFORMANCE OF THIS SOFTWARE.\n***************************************************************************** */\n/* global Reflect, Promise, SuppressedError, Symbol, Iterator */\n\nvar extendStatics = function(d, b) {\n extendStatics = Object.setPrototypeOf ||\n ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||\n function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };\n return extendStatics(d, b);\n};\n\nexport function __extends(d, b) {\n if (typeof b !== \"function\" && b !== null)\n throw new TypeError(\"Class extends value \" + String(b) + \" is not a constructor or null\");\n extendStatics(d, b);\n function __() { this.constructor = d; }\n d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());\n}\n\nexport var __assign = function() {\n __assign = Object.assign || function __assign(t) {\n for (var s, i = 1, n = arguments.length; i < n; i++) {\n s = arguments[i];\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];\n }\n return t;\n }\n return __assign.apply(this, arguments);\n}\n\nexport function __rest(s, e) {\n var t = {};\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)\n t[p] = s[p];\n if (s != null && typeof Object.getOwnPropertySymbols === \"function\")\n for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {\n if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))\n t[p[i]] = s[p[i]];\n }\n return t;\n}\n\nexport function __decorate(decorators, target, key, desc) {\n var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;\n if (typeof Reflect === \"object\" && typeof Reflect.decorate === \"function\") r = Reflect.decorate(decorators, target, key, desc);\n else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;\n return c > 3 && r && Object.defineProperty(target, key, r), r;\n}\n\nexport function __param(paramIndex, decorator) {\n return function (target, key) { decorator(target, key, paramIndex); }\n}\n\nexport function __esDecorate(ctor, descriptorIn, decorators, contextIn, initializers, extraInitializers) {\n function accept(f) { if (f !== void 0 && typeof f !== \"function\") throw new TypeError(\"Function expected\"); return f; }\n var kind = contextIn.kind, key = kind === \"getter\" ? \"get\" : kind === \"setter\" ? \"set\" : \"value\";\n var target = !descriptorIn && ctor ? contextIn[\"static\"] ? ctor : ctor.prototype : null;\n var descriptor = descriptorIn || (target ? Object.getOwnPropertyDescriptor(target, contextIn.name) : {});\n var _, done = false;\n for (var i = decorators.length - 1; i >= 0; i--) {\n var context = {};\n for (var p in contextIn) context[p] = p === \"access\" ? {} : contextIn[p];\n for (var p in contextIn.access) context.access[p] = contextIn.access[p];\n context.addInitializer = function (f) { if (done) throw new TypeError(\"Cannot add initializers after decoration has completed\"); extraInitializers.push(accept(f || null)); };\n var result = (0, decorators[i])(kind === \"accessor\" ? { get: descriptor.get, set: descriptor.set } : descriptor[key], context);\n if (kind === \"accessor\") {\n if (result === void 0) continue;\n if (result === null || typeof result !== \"object\") throw new TypeError(\"Object expected\");\n if (_ = accept(result.get)) descriptor.get = _;\n if (_ = accept(result.set)) descriptor.set = _;\n if (_ = accept(result.init)) initializers.unshift(_);\n }\n else if (_ = accept(result)) {\n if (kind === \"field\") initializers.unshift(_);\n else descriptor[key] = _;\n }\n }\n if (target) Object.defineProperty(target, contextIn.name, descriptor);\n done = true;\n};\n\nexport function __runInitializers(thisArg, initializers, value) {\n var useValue = arguments.length > 2;\n for (var i = 0; i < initializers.length; i++) {\n value = useValue ? initializers[i].call(thisArg, value) : initializers[i].call(thisArg);\n }\n return useValue ? value : void 0;\n};\n\nexport function __propKey(x) {\n return typeof x === \"symbol\" ? x : \"\".concat(x);\n};\n\nexport function __setFunctionName(f, name, prefix) {\n if (typeof name === \"symbol\") name = name.description ? \"[\".concat(name.description, \"]\") : \"\";\n return Object.defineProperty(f, \"name\", { configurable: true, value: prefix ? \"\".concat(prefix, \" \", name) : name });\n};\n\nexport function __metadata(metadataKey, metadataValue) {\n if (typeof Reflect === \"object\" && typeof Reflect.metadata === \"function\") return Reflect.metadata(metadataKey, metadataValue);\n}\n\nexport function __awaiter(thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n}\n\nexport function __generator(thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === \"function\" ? Iterator : Object).prototype);\n return g.next = verb(0), g[\"throw\"] = verb(1), g[\"return\"] = verb(2), typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n}\n\nexport var __createBinding = Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n});\n\nexport function __exportStar(m, o) {\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(o, p)) __createBinding(o, m, p);\n}\n\nexport function __values(o) {\n var s = typeof Symbol === \"function\" && Symbol.iterator, m = s && o[s], i = 0;\n if (m) return m.call(o);\n if (o && typeof o.length === \"number\") return {\n next: function () {\n if (o && i >= o.length) o = void 0;\n return { value: o && o[i++], done: !o };\n }\n };\n throw new TypeError(s ? \"Object is not iterable.\" : \"Symbol.iterator is not defined.\");\n}\n\nexport function __read(o, n) {\n var m = typeof Symbol === \"function\" && o[Symbol.iterator];\n if (!m) return o;\n var i = m.call(o), r, ar = [], e;\n try {\n while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);\n }\n catch (error) { e = { error: error }; }\n finally {\n try {\n if (r && !r.done && (m = i[\"return\"])) m.call(i);\n }\n finally { if (e) throw e.error; }\n }\n return ar;\n}\n\n/** @deprecated */\nexport function __spread() {\n for (var ar = [], i = 0; i < arguments.length; i++)\n ar = ar.concat(__read(arguments[i]));\n return ar;\n}\n\n/** @deprecated */\nexport function __spreadArrays() {\n for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;\n for (var r = Array(s), k = 0, i = 0; i < il; i++)\n for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)\n r[k] = a[j];\n return r;\n}\n\nexport function __spreadArray(to, from, pack) {\n if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {\n if (ar || !(i in from)) {\n if (!ar) ar = Array.prototype.slice.call(from, 0, i);\n ar[i] = from[i];\n }\n }\n return to.concat(ar || Array.prototype.slice.call(from));\n}\n\nexport function __await(v) {\n return this instanceof __await ? (this.v = v, this) : new __await(v);\n}\n\nexport function __asyncGenerator(thisArg, _arguments, generator) {\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\n var g = generator.apply(thisArg, _arguments || []), i, q = [];\n return i = Object.create((typeof AsyncIterator === \"function\" ? AsyncIterator : Object).prototype), verb(\"next\"), verb(\"throw\"), verb(\"return\", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;\n function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }\n function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }\n function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }\n function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }\n function fulfill(value) { resume(\"next\", value); }\n function reject(value) { resume(\"throw\", value); }\n function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }\n}\n\nexport function __asyncDelegator(o) {\n var i, p;\n return i = {}, verb(\"next\"), verb(\"throw\", function (e) { throw e; }), verb(\"return\"), i[Symbol.iterator] = function () { return this; }, i;\n function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: false } : f ? f(v) : v; } : f; }\n}\n\nexport function __asyncValues(o) {\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\n var m = o[Symbol.asyncIterator], i;\n return m ? m.call(o) : (o = typeof __values === \"function\" ? __values(o) : o[Symbol.iterator](), i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i);\n function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }\n function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }\n}\n\nexport function __makeTemplateObject(cooked, raw) {\n if (Object.defineProperty) { Object.defineProperty(cooked, \"raw\", { value: raw }); } else { cooked.raw = raw; }\n return cooked;\n};\n\nvar __setModuleDefault = Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n};\n\nexport function __importStar(mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n}\n\nexport function __importDefault(mod) {\n return (mod && mod.__esModule) ? mod : { default: mod };\n}\n\nexport function __classPrivateFieldGet(receiver, state, kind, f) {\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a getter\");\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot read private member from an object whose class did not declare it\");\n return kind === \"m\" ? f : kind === \"a\" ? f.call(receiver) : f ? f.value : state.get(receiver);\n}\n\nexport function __classPrivateFieldSet(receiver, state, value, kind, f) {\n if (kind === \"m\") throw new TypeError(\"Private method is not writable\");\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a setter\");\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot write private member to an object whose class did not declare it\");\n return (kind === \"a\" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;\n}\n\nexport function __classPrivateFieldIn(state, receiver) {\n if (receiver === null || (typeof receiver !== \"object\" && typeof receiver !== \"function\")) throw new TypeError(\"Cannot use 'in' operator on non-object\");\n return typeof state === \"function\" ? receiver === state : state.has(receiver);\n}\n\nexport function __addDisposableResource(env, value, async) {\n if (value !== null && value !== void 0) {\n if (typeof value !== \"object\" && typeof value !== \"function\") throw new TypeError(\"Object expected.\");\n var dispose, inner;\n if (async) {\n if (!Symbol.asyncDispose) throw new TypeError(\"Symbol.asyncDispose is not defined.\");\n dispose = value[Symbol.asyncDispose];\n }\n if (dispose === void 0) {\n if (!Symbol.dispose) throw new TypeError(\"Symbol.dispose is not defined.\");\n dispose = value[Symbol.dispose];\n if (async) inner = dispose;\n }\n if (typeof dispose !== \"function\") throw new TypeError(\"Object not disposable.\");\n if (inner) dispose = function() { try { inner.call(this); } catch (e) { return Promise.reject(e); } };\n env.stack.push({ value: value, dispose: dispose, async: async });\n }\n else if (async) {\n env.stack.push({ async: true });\n }\n return value;\n}\n\nvar _SuppressedError = typeof SuppressedError === \"function\" ? SuppressedError : function (error, suppressed, message) {\n var e = new Error(message);\n return e.name = \"SuppressedError\", e.error = error, e.suppressed = suppressed, e;\n};\n\nexport function __disposeResources(env) {\n function fail(e) {\n env.error = env.hasError ? new _SuppressedError(e, env.error, \"An error was suppressed during disposal.\") : e;\n env.hasError = true;\n }\n var r, s = 0;\n function next() {\n while (r = env.stack.pop()) {\n try {\n if (!r.async && s === 1) return s = 0, env.stack.push(r), Promise.resolve().then(next);\n if (r.dispose) {\n var result = r.dispose.call(r.value);\n if (r.async) return s |= 2, Promise.resolve(result).then(next, function(e) { fail(e); return next(); });\n }\n else s |= 1;\n }\n catch (e) {\n fail(e);\n }\n }\n if (s === 1) return env.hasError ? Promise.reject(env.error) : Promise.resolve();\n if (env.hasError) throw env.error;\n }\n return next();\n}\n\nexport default {\n __extends,\n __assign,\n __rest,\n __decorate,\n __param,\n __metadata,\n __awaiter,\n __generator,\n __createBinding,\n __exportStar,\n __values,\n __read,\n __spread,\n __spreadArrays,\n __spreadArray,\n __await,\n __asyncGenerator,\n __asyncDelegator,\n __asyncValues,\n __makeTemplateObject,\n __importStar,\n __importDefault,\n __classPrivateFieldGet,\n __classPrivateFieldSet,\n __classPrivateFieldIn,\n __addDisposableResource,\n __disposeResources,\n};\n", "/**\n * Returns true if the object is a function.\n * @param value The value to check\n */\nexport function isFunction(value: any): value is (...args: any[]) => any {\n return typeof value === 'function';\n}\n", "/**\n * Used to create Error subclasses until the community moves away from ES5.\n *\n * This is because compiling from TypeScript down to ES5 has issues with subclassing Errors\n * as well as other built-in types: https://github.com/Microsoft/TypeScript/issues/12123\n *\n * @param createImpl A factory function to create the actual constructor implementation. The returned\n * function should be a named function that calls `_super` internally.\n */\nexport function createErrorClass(createImpl: (_super: any) => any): T {\n const _super = (instance: any) => {\n Error.call(instance);\n instance.stack = new Error().stack;\n };\n\n const ctorFunc = createImpl(_super);\n ctorFunc.prototype = Object.create(Error.prototype);\n ctorFunc.prototype.constructor = ctorFunc;\n return ctorFunc;\n}\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface UnsubscriptionError extends Error {\n readonly errors: any[];\n}\n\nexport interface UnsubscriptionErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (errors: any[]): UnsubscriptionError;\n}\n\n/**\n * An error thrown when one or more errors have occurred during the\n * `unsubscribe` of a {@link Subscription}.\n */\nexport const UnsubscriptionError: UnsubscriptionErrorCtor = createErrorClass(\n (_super) =>\n function UnsubscriptionErrorImpl(this: any, errors: (Error | string)[]) {\n _super(this);\n this.message = errors\n ? `${errors.length} errors occurred during unsubscription:\n${errors.map((err, i) => `${i + 1}) ${err.toString()}`).join('\\n ')}`\n : '';\n this.name = 'UnsubscriptionError';\n this.errors = errors;\n }\n);\n", "/**\n * Removes an item from an array, mutating it.\n * @param arr The array to remove the item from\n * @param item The item to remove\n */\nexport function arrRemove(arr: T[] | undefined | null, item: T) {\n if (arr) {\n const index = arr.indexOf(item);\n 0 <= index && arr.splice(index, 1);\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { UnsubscriptionError } from './util/UnsubscriptionError';\nimport { SubscriptionLike, TeardownLogic, Unsubscribable } from './types';\nimport { arrRemove } from './util/arrRemove';\n\n/**\n * Represents a disposable resource, such as the execution of an Observable. A\n * Subscription has one important method, `unsubscribe`, that takes no argument\n * and just disposes the resource held by the subscription.\n *\n * Additionally, subscriptions may be grouped together through the `add()`\n * method, which will attach a child Subscription to the current Subscription.\n * When a Subscription is unsubscribed, all its children (and its grandchildren)\n * will be unsubscribed as well.\n *\n * @class Subscription\n */\nexport class Subscription implements SubscriptionLike {\n /** @nocollapse */\n public static EMPTY = (() => {\n const empty = new Subscription();\n empty.closed = true;\n return empty;\n })();\n\n /**\n * A flag to indicate whether this Subscription has already been unsubscribed.\n */\n public closed = false;\n\n private _parentage: Subscription[] | Subscription | null = null;\n\n /**\n * The list of registered finalizers to execute upon unsubscription. Adding and removing from this\n * list occurs in the {@link #add} and {@link #remove} methods.\n */\n private _finalizers: Exclude[] | null = null;\n\n /**\n * @param initialTeardown A function executed first as part of the finalization\n * process that is kicked off when {@link #unsubscribe} is called.\n */\n constructor(private initialTeardown?: () => void) {}\n\n /**\n * Disposes the resources held by the subscription. May, for instance, cancel\n * an ongoing Observable execution or cancel any other type of work that\n * started when the Subscription was created.\n * @return {void}\n */\n unsubscribe(): void {\n let errors: any[] | undefined;\n\n if (!this.closed) {\n this.closed = true;\n\n // Remove this from it's parents.\n const { _parentage } = this;\n if (_parentage) {\n this._parentage = null;\n if (Array.isArray(_parentage)) {\n for (const parent of _parentage) {\n parent.remove(this);\n }\n } else {\n _parentage.remove(this);\n }\n }\n\n const { initialTeardown: initialFinalizer } = this;\n if (isFunction(initialFinalizer)) {\n try {\n initialFinalizer();\n } catch (e) {\n errors = e instanceof UnsubscriptionError ? e.errors : [e];\n }\n }\n\n const { _finalizers } = this;\n if (_finalizers) {\n this._finalizers = null;\n for (const finalizer of _finalizers) {\n try {\n execFinalizer(finalizer);\n } catch (err) {\n errors = errors ?? [];\n if (err instanceof UnsubscriptionError) {\n errors = [...errors, ...err.errors];\n } else {\n errors.push(err);\n }\n }\n }\n }\n\n if (errors) {\n throw new UnsubscriptionError(errors);\n }\n }\n }\n\n /**\n * Adds a finalizer to this subscription, so that finalization will be unsubscribed/called\n * when this subscription is unsubscribed. If this subscription is already {@link #closed},\n * because it has already been unsubscribed, then whatever finalizer is passed to it\n * will automatically be executed (unless the finalizer itself is also a closed subscription).\n *\n * Closed Subscriptions cannot be added as finalizers to any subscription. Adding a closed\n * subscription to a any subscription will result in no operation. (A noop).\n *\n * Adding a subscription to itself, or adding `null` or `undefined` will not perform any\n * operation at all. (A noop).\n *\n * `Subscription` instances that are added to this instance will automatically remove themselves\n * if they are unsubscribed. Functions and {@link Unsubscribable} objects that you wish to remove\n * will need to be removed manually with {@link #remove}\n *\n * @param teardown The finalization logic to add to this subscription.\n */\n add(teardown: TeardownLogic): void {\n // Only add the finalizer if it's not undefined\n // and don't add a subscription to itself.\n if (teardown && teardown !== this) {\n if (this.closed) {\n // If this subscription is already closed,\n // execute whatever finalizer is handed to it automatically.\n execFinalizer(teardown);\n } else {\n if (teardown instanceof Subscription) {\n // We don't add closed subscriptions, and we don't add the same subscription\n // twice. Subscription unsubscribe is idempotent.\n if (teardown.closed || teardown._hasParent(this)) {\n return;\n }\n teardown._addParent(this);\n }\n (this._finalizers = this._finalizers ?? []).push(teardown);\n }\n }\n }\n\n /**\n * Checks to see if a this subscription already has a particular parent.\n * This will signal that this subscription has already been added to the parent in question.\n * @param parent the parent to check for\n */\n private _hasParent(parent: Subscription) {\n const { _parentage } = this;\n return _parentage === parent || (Array.isArray(_parentage) && _parentage.includes(parent));\n }\n\n /**\n * Adds a parent to this subscription so it can be removed from the parent if it\n * unsubscribes on it's own.\n *\n * NOTE: THIS ASSUMES THAT {@link _hasParent} HAS ALREADY BEEN CHECKED.\n * @param parent The parent subscription to add\n */\n private _addParent(parent: Subscription) {\n const { _parentage } = this;\n this._parentage = Array.isArray(_parentage) ? (_parentage.push(parent), _parentage) : _parentage ? [_parentage, parent] : parent;\n }\n\n /**\n * Called on a child when it is removed via {@link #remove}.\n * @param parent The parent to remove\n */\n private _removeParent(parent: Subscription) {\n const { _parentage } = this;\n if (_parentage === parent) {\n this._parentage = null;\n } else if (Array.isArray(_parentage)) {\n arrRemove(_parentage, parent);\n }\n }\n\n /**\n * Removes a finalizer from this subscription that was previously added with the {@link #add} method.\n *\n * Note that `Subscription` instances, when unsubscribed, will automatically remove themselves\n * from every other `Subscription` they have been added to. This means that using the `remove` method\n * is not a common thing and should be used thoughtfully.\n *\n * If you add the same finalizer instance of a function or an unsubscribable object to a `Subscription` instance\n * more than once, you will need to call `remove` the same number of times to remove all instances.\n *\n * All finalizer instances are removed to free up memory upon unsubscription.\n *\n * @param teardown The finalizer to remove from this subscription\n */\n remove(teardown: Exclude): void {\n const { _finalizers } = this;\n _finalizers && arrRemove(_finalizers, teardown);\n\n if (teardown instanceof Subscription) {\n teardown._removeParent(this);\n }\n }\n}\n\nexport const EMPTY_SUBSCRIPTION = Subscription.EMPTY;\n\nexport function isSubscription(value: any): value is Subscription {\n return (\n value instanceof Subscription ||\n (value && 'closed' in value && isFunction(value.remove) && isFunction(value.add) && isFunction(value.unsubscribe))\n );\n}\n\nfunction execFinalizer(finalizer: Unsubscribable | (() => void)) {\n if (isFunction(finalizer)) {\n finalizer();\n } else {\n finalizer.unsubscribe();\n }\n}\n", "import { Subscriber } from './Subscriber';\nimport { ObservableNotification } from './types';\n\n/**\n * The {@link GlobalConfig} object for RxJS. It is used to configure things\n * like how to react on unhandled errors.\n */\nexport const config: GlobalConfig = {\n onUnhandledError: null,\n onStoppedNotification: null,\n Promise: undefined,\n useDeprecatedSynchronousErrorHandling: false,\n useDeprecatedNextContext: false,\n};\n\n/**\n * The global configuration object for RxJS, used to configure things\n * like how to react on unhandled errors. Accessible via {@link config}\n * object.\n */\nexport interface GlobalConfig {\n /**\n * A registration point for unhandled errors from RxJS. These are errors that\n * cannot were not handled by consuming code in the usual subscription path. For\n * example, if you have this configured, and you subscribe to an observable without\n * providing an error handler, errors from that subscription will end up here. This\n * will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onUnhandledError: ((err: any) => void) | null;\n\n /**\n * A registration point for notifications that cannot be sent to subscribers because they\n * have completed, errored or have been explicitly unsubscribed. By default, next, complete\n * and error notifications sent to stopped subscribers are noops. However, sometimes callers\n * might want a different behavior. For example, with sources that attempt to report errors\n * to stopped subscribers, a caller can configure RxJS to throw an unhandled error instead.\n * This will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onStoppedNotification: ((notification: ObservableNotification, subscriber: Subscriber) => void) | null;\n\n /**\n * The promise constructor used by default for {@link Observable#toPromise toPromise} and {@link Observable#forEach forEach}\n * methods.\n *\n * @deprecated As of version 8, RxJS will no longer support this sort of injection of a\n * Promise constructor. If you need a Promise implementation other than native promises,\n * please polyfill/patch Promise as you see appropriate. Will be removed in v8.\n */\n Promise?: PromiseConstructorLike;\n\n /**\n * If true, turns on synchronous error rethrowing, which is a deprecated behavior\n * in v6 and higher. This behavior enables bad patterns like wrapping a subscribe\n * call in a try/catch block. It also enables producer interference, a nasty bug\n * where a multicast can be broken for all observers by a downstream consumer with\n * an unhandled error. DO NOT USE THIS FLAG UNLESS IT'S NEEDED TO BUY TIME\n * FOR MIGRATION REASONS.\n *\n * @deprecated As of version 8, RxJS will no longer support synchronous throwing\n * of unhandled errors. All errors will be thrown on a separate call stack to prevent bad\n * behaviors described above. Will be removed in v8.\n */\n useDeprecatedSynchronousErrorHandling: boolean;\n\n /**\n * If true, enables an as-of-yet undocumented feature from v5: The ability to access\n * `unsubscribe()` via `this` context in `next` functions created in observers passed\n * to `subscribe`.\n *\n * This is being removed because the performance was severely problematic, and it could also cause\n * issues when types other than POJOs are passed to subscribe as subscribers, as they will likely have\n * their `this` context overwritten.\n *\n * @deprecated As of version 8, RxJS will no longer support altering the\n * context of next functions provided as part of an observer to Subscribe. Instead,\n * you will have access to a subscription or a signal or token that will allow you to do things like\n * unsubscribe and test closed status. Will be removed in v8.\n */\n useDeprecatedNextContext: boolean;\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetTimeoutFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearTimeoutFunction = (handle: TimerHandle) => void;\n\ninterface TimeoutProvider {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n delegate:\n | {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n }\n | undefined;\n}\n\nexport const timeoutProvider: TimeoutProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setTimeout(handler: () => void, timeout?: number, ...args) {\n const { delegate } = timeoutProvider;\n if (delegate?.setTimeout) {\n return delegate.setTimeout(handler, timeout, ...args);\n }\n return setTimeout(handler, timeout, ...args);\n },\n clearTimeout(handle) {\n const { delegate } = timeoutProvider;\n return (delegate?.clearTimeout || clearTimeout)(handle as any);\n },\n delegate: undefined,\n};\n", "import { config } from '../config';\nimport { timeoutProvider } from '../scheduler/timeoutProvider';\n\n/**\n * Handles an error on another job either with the user-configured {@link onUnhandledError},\n * or by throwing it on that new job so it can be picked up by `window.onerror`, `process.on('error')`, etc.\n *\n * This should be called whenever there is an error that is out-of-band with the subscription\n * or when an error hits a terminal boundary of the subscription and no error handler was provided.\n *\n * @param err the error to report\n */\nexport function reportUnhandledError(err: any) {\n timeoutProvider.setTimeout(() => {\n const { onUnhandledError } = config;\n if (onUnhandledError) {\n // Execute the user-configured error handler.\n onUnhandledError(err);\n } else {\n // Throw so it is picked up by the runtime's uncaught error mechanism.\n throw err;\n }\n });\n}\n", "/* tslint:disable:no-empty */\nexport function noop() { }\n", "import { CompleteNotification, NextNotification, ErrorNotification } from './types';\n\n/**\n * A completion object optimized for memory use and created to be the\n * same \"shape\" as other notifications in v8.\n * @internal\n */\nexport const COMPLETE_NOTIFICATION = (() => createNotification('C', undefined, undefined) as CompleteNotification)();\n\n/**\n * Internal use only. Creates an optimized error notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function errorNotification(error: any): ErrorNotification {\n return createNotification('E', undefined, error) as any;\n}\n\n/**\n * Internal use only. Creates an optimized next notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function nextNotification(value: T) {\n return createNotification('N', value, undefined) as NextNotification;\n}\n\n/**\n * Ensures that all notifications created internally have the same \"shape\" in v8.\n *\n * TODO: This is only exported to support a crazy legacy test in `groupBy`.\n * @internal\n */\nexport function createNotification(kind: 'N' | 'E' | 'C', value: any, error: any) {\n return {\n kind,\n value,\n error,\n };\n}\n", "import { config } from '../config';\n\nlet context: { errorThrown: boolean; error: any } | null = null;\n\n/**\n * Handles dealing with errors for super-gross mode. Creates a context, in which\n * any synchronously thrown errors will be passed to {@link captureError}. Which\n * will record the error such that it will be rethrown after the call back is complete.\n * TODO: Remove in v8\n * @param cb An immediately executed function.\n */\nexport function errorContext(cb: () => void) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n const isRoot = !context;\n if (isRoot) {\n context = { errorThrown: false, error: null };\n }\n cb();\n if (isRoot) {\n const { errorThrown, error } = context!;\n context = null;\n if (errorThrown) {\n throw error;\n }\n }\n } else {\n // This is the general non-deprecated path for everyone that\n // isn't crazy enough to use super-gross mode (useDeprecatedSynchronousErrorHandling)\n cb();\n }\n}\n\n/**\n * Captures errors only in super-gross mode.\n * @param err the error to capture\n */\nexport function captureError(err: any) {\n if (config.useDeprecatedSynchronousErrorHandling && context) {\n context.errorThrown = true;\n context.error = err;\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { Observer, ObservableNotification } from './types';\nimport { isSubscription, Subscription } from './Subscription';\nimport { config } from './config';\nimport { reportUnhandledError } from './util/reportUnhandledError';\nimport { noop } from './util/noop';\nimport { nextNotification, errorNotification, COMPLETE_NOTIFICATION } from './NotificationFactories';\nimport { timeoutProvider } from './scheduler/timeoutProvider';\nimport { captureError } from './util/errorContext';\n\n/**\n * Implements the {@link Observer} interface and extends the\n * {@link Subscription} class. While the {@link Observer} is the public API for\n * consuming the values of an {@link Observable}, all Observers get converted to\n * a Subscriber, in order to provide Subscription-like capabilities such as\n * `unsubscribe`. Subscriber is a common type in RxJS, and crucial for\n * implementing operators, but it is rarely used as a public API.\n *\n * @class Subscriber\n */\nexport class Subscriber extends Subscription implements Observer {\n /**\n * A static factory for a Subscriber, given a (potentially partial) definition\n * of an Observer.\n * @param next The `next` callback of an Observer.\n * @param error The `error` callback of an\n * Observer.\n * @param complete The `complete` callback of an\n * Observer.\n * @return A Subscriber wrapping the (partially defined)\n * Observer represented by the given arguments.\n * @nocollapse\n * @deprecated Do not use. Will be removed in v8. There is no replacement for this\n * method, and there is no reason to be creating instances of `Subscriber` directly.\n * If you have a specific use case, please file an issue.\n */\n static create(next?: (x?: T) => void, error?: (e?: any) => void, complete?: () => void): Subscriber {\n return new SafeSubscriber(next, error, complete);\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected isStopped: boolean = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected destination: Subscriber | Observer; // this `any` is the escape hatch to erase extra type param (e.g. R)\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * There is no reason to directly create an instance of Subscriber. This type is exported for typings reasons.\n */\n constructor(destination?: Subscriber | Observer) {\n super();\n if (destination) {\n this.destination = destination;\n // Automatically chain subscriptions together here.\n // if destination is a Subscription, then it is a Subscriber.\n if (isSubscription(destination)) {\n destination.add(this);\n }\n } else {\n this.destination = EMPTY_OBSERVER;\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `next` from\n * the Observable, with a value. The Observable may call this method 0 or more\n * times.\n * @param {T} [value] The `next` value.\n * @return {void}\n */\n next(value?: T): void {\n if (this.isStopped) {\n handleStoppedNotification(nextNotification(value), this);\n } else {\n this._next(value!);\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `error` from\n * the Observable, with an attached `Error`. Notifies the Observer that\n * the Observable has experienced an error condition.\n * @param {any} [err] The `error` exception.\n * @return {void}\n */\n error(err?: any): void {\n if (this.isStopped) {\n handleStoppedNotification(errorNotification(err), this);\n } else {\n this.isStopped = true;\n this._error(err);\n }\n }\n\n /**\n * The {@link Observer} callback to receive a valueless notification of type\n * `complete` from the Observable. Notifies the Observer that the Observable\n * has finished sending push-based notifications.\n * @return {void}\n */\n complete(): void {\n if (this.isStopped) {\n handleStoppedNotification(COMPLETE_NOTIFICATION, this);\n } else {\n this.isStopped = true;\n this._complete();\n }\n }\n\n unsubscribe(): void {\n if (!this.closed) {\n this.isStopped = true;\n super.unsubscribe();\n this.destination = null!;\n }\n }\n\n protected _next(value: T): void {\n this.destination.next(value);\n }\n\n protected _error(err: any): void {\n try {\n this.destination.error(err);\n } finally {\n this.unsubscribe();\n }\n }\n\n protected _complete(): void {\n try {\n this.destination.complete();\n } finally {\n this.unsubscribe();\n }\n }\n}\n\n/**\n * This bind is captured here because we want to be able to have\n * compatibility with monoid libraries that tend to use a method named\n * `bind`. In particular, a library called Monio requires this.\n */\nconst _bind = Function.prototype.bind;\n\nfunction bind any>(fn: Fn, thisArg: any): Fn {\n return _bind.call(fn, thisArg);\n}\n\n/**\n * Internal optimization only, DO NOT EXPOSE.\n * @internal\n */\nclass ConsumerObserver implements Observer {\n constructor(private partialObserver: Partial>) {}\n\n next(value: T): void {\n const { partialObserver } = this;\n if (partialObserver.next) {\n try {\n partialObserver.next(value);\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n\n error(err: any): void {\n const { partialObserver } = this;\n if (partialObserver.error) {\n try {\n partialObserver.error(err);\n } catch (error) {\n handleUnhandledError(error);\n }\n } else {\n handleUnhandledError(err);\n }\n }\n\n complete(): void {\n const { partialObserver } = this;\n if (partialObserver.complete) {\n try {\n partialObserver.complete();\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n}\n\nexport class SafeSubscriber extends Subscriber {\n constructor(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((e?: any) => void) | null,\n complete?: (() => void) | null\n ) {\n super();\n\n let partialObserver: Partial>;\n if (isFunction(observerOrNext) || !observerOrNext) {\n // The first argument is a function, not an observer. The next\n // two arguments *could* be observers, or they could be empty.\n partialObserver = {\n next: (observerOrNext ?? undefined) as (((value: T) => void) | undefined),\n error: error ?? undefined,\n complete: complete ?? undefined,\n };\n } else {\n // The first argument is a partial observer.\n let context: any;\n if (this && config.useDeprecatedNextContext) {\n // This is a deprecated path that made `this.unsubscribe()` available in\n // next handler functions passed to subscribe. This only exists behind a flag\n // now, as it is *very* slow.\n context = Object.create(observerOrNext);\n context.unsubscribe = () => this.unsubscribe();\n partialObserver = {\n next: observerOrNext.next && bind(observerOrNext.next, context),\n error: observerOrNext.error && bind(observerOrNext.error, context),\n complete: observerOrNext.complete && bind(observerOrNext.complete, context),\n };\n } else {\n // The \"normal\" path. Just use the partial observer directly.\n partialObserver = observerOrNext;\n }\n }\n\n // Wrap the partial observer to ensure it's a full observer, and\n // make sure proper error handling is accounted for.\n this.destination = new ConsumerObserver(partialObserver);\n }\n}\n\nfunction handleUnhandledError(error: any) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n captureError(error);\n } else {\n // Ideal path, we report this as an unhandled error,\n // which is thrown on a new call stack.\n reportUnhandledError(error);\n }\n}\n\n/**\n * An error handler used when no error handler was supplied\n * to the SafeSubscriber -- meaning no error handler was supplied\n * do the `subscribe` call on our observable.\n * @param err The error to handle\n */\nfunction defaultErrorHandler(err: any) {\n throw err;\n}\n\n/**\n * A handler for notifications that cannot be sent to a stopped subscriber.\n * @param notification The notification being sent\n * @param subscriber The stopped subscriber\n */\nfunction handleStoppedNotification(notification: ObservableNotification, subscriber: Subscriber) {\n const { onStoppedNotification } = config;\n onStoppedNotification && timeoutProvider.setTimeout(() => onStoppedNotification(notification, subscriber));\n}\n\n/**\n * The observer used as a stub for subscriptions where the user did not\n * pass any arguments to `subscribe`. Comes with the default error handling\n * behavior.\n */\nexport const EMPTY_OBSERVER: Readonly> & { closed: true } = {\n closed: true,\n next: noop,\n error: defaultErrorHandler,\n complete: noop,\n};\n", "/**\n * Symbol.observable or a string \"@@observable\". Used for interop\n *\n * @deprecated We will no longer be exporting this symbol in upcoming versions of RxJS.\n * Instead polyfill and use Symbol.observable directly *or* use https://www.npmjs.com/package/symbol-observable\n */\nexport const observable: string | symbol = (() => (typeof Symbol === 'function' && Symbol.observable) || '@@observable')();\n", "/**\n * This function takes one parameter and just returns it. Simply put,\n * this is like `(x: T): T => x`.\n *\n * ## Examples\n *\n * This is useful in some cases when using things like `mergeMap`\n *\n * ```ts\n * import { interval, take, map, range, mergeMap, identity } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(5));\n *\n * const result$ = source$.pipe(\n * map(i => range(i)),\n * mergeMap(identity) // same as mergeMap(x => x)\n * );\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * Or when you want to selectively apply an operator\n *\n * ```ts\n * import { interval, take, identity } from 'rxjs';\n *\n * const shouldLimit = () => Math.random() < 0.5;\n *\n * const source$ = interval(1000);\n *\n * const result$ = source$.pipe(shouldLimit() ? take(5) : identity);\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * @param x Any value that is returned by this function\n * @returns The value passed as the first parameter to this function\n */\nexport function identity(x: T): T {\n return x;\n}\n", "import { identity } from './identity';\nimport { UnaryFunction } from '../types';\n\nexport function pipe(): typeof identity;\nexport function pipe(fn1: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction, fn3: UnaryFunction): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction,\n ...fns: UnaryFunction[]\n): UnaryFunction;\n\n/**\n * pipe() can be called on one or more functions, each of which can take one argument (\"UnaryFunction\")\n * and uses it to return a value.\n * It returns a function that takes one argument, passes it to the first UnaryFunction, and then\n * passes the result to the next one, passes that result to the next one, and so on. \n */\nexport function pipe(...fns: Array>): UnaryFunction {\n return pipeFromArray(fns);\n}\n\n/** @internal */\nexport function pipeFromArray(fns: Array>): UnaryFunction {\n if (fns.length === 0) {\n return identity as UnaryFunction;\n }\n\n if (fns.length === 1) {\n return fns[0];\n }\n\n return function piped(input: T): R {\n return fns.reduce((prev: any, fn: UnaryFunction) => fn(prev), input as any);\n };\n}\n", "import { Operator } from './Operator';\nimport { SafeSubscriber, Subscriber } from './Subscriber';\nimport { isSubscription, Subscription } from './Subscription';\nimport { TeardownLogic, OperatorFunction, Subscribable, Observer } from './types';\nimport { observable as Symbol_observable } from './symbol/observable';\nimport { pipeFromArray } from './util/pipe';\nimport { config } from './config';\nimport { isFunction } from './util/isFunction';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A representation of any set of values over any amount of time. This is the most basic building block\n * of RxJS.\n *\n * @class Observable\n */\nexport class Observable implements Subscribable {\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n source: Observable | undefined;\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n operator: Operator | undefined;\n\n /**\n * @constructor\n * @param {Function} subscribe the function that is called when the Observable is\n * initially subscribed to. This function is given a Subscriber, to which new values\n * can be `next`ed, or an `error` method can be called to raise an error, or\n * `complete` can be called to notify of a successful completion.\n */\n constructor(subscribe?: (this: Observable, subscriber: Subscriber) => TeardownLogic) {\n if (subscribe) {\n this._subscribe = subscribe;\n }\n }\n\n // HACK: Since TypeScript inherits static properties too, we have to\n // fight against TypeScript here so Subject can have a different static create signature\n /**\n * Creates a new Observable by calling the Observable constructor\n * @owner Observable\n * @method create\n * @param {Function} subscribe? the subscriber function to be passed to the Observable constructor\n * @return {Observable} a new observable\n * @nocollapse\n * @deprecated Use `new Observable()` instead. Will be removed in v8.\n */\n static create: (...args: any[]) => any = (subscribe?: (subscriber: Subscriber) => TeardownLogic) => {\n return new Observable(subscribe);\n };\n\n /**\n * Creates a new Observable, with this Observable instance as the source, and the passed\n * operator defined as the new observable's operator.\n * @method lift\n * @param operator the operator defining the operation to take on the observable\n * @return a new observable with the Operator applied\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * If you have implemented an operator using `lift`, it is recommended that you create an\n * operator by simply returning `new Observable()` directly. See \"Creating new operators from\n * scratch\" section here: https://rxjs.dev/guide/operators\n */\n lift(operator?: Operator): Observable {\n const observable = new Observable();\n observable.source = this;\n observable.operator = operator;\n return observable;\n }\n\n subscribe(observerOrNext?: Partial> | ((value: T) => void)): Subscription;\n /** @deprecated Instead of passing separate callback arguments, use an observer argument. Signatures taking separate callback arguments will be removed in v8. Details: https://rxjs.dev/deprecations/subscribe-arguments */\n subscribe(next?: ((value: T) => void) | null, error?: ((error: any) => void) | null, complete?: (() => void) | null): Subscription;\n /**\n * Invokes an execution of an Observable and registers Observer handlers for notifications it will emit.\n *\n * Use it when you have all these Observables, but still nothing is happening.\n *\n * `subscribe` is not a regular operator, but a method that calls Observable's internal `subscribe` function. It\n * might be for example a function that you passed to Observable's constructor, but most of the time it is\n * a library implementation, which defines what will be emitted by an Observable, and when it be will emitted. This means\n * that calling `subscribe` is actually the moment when Observable starts its work, not when it is created, as it is often\n * the thought.\n *\n * Apart from starting the execution of an Observable, this method allows you to listen for values\n * that an Observable emits, as well as for when it completes or errors. You can achieve this in two\n * of the following ways.\n *\n * The first way is creating an object that implements {@link Observer} interface. It should have methods\n * defined by that interface, but note that it should be just a regular JavaScript object, which you can create\n * yourself in any way you want (ES6 class, classic function constructor, object literal etc.). In particular, do\n * not attempt to use any RxJS implementation details to create Observers - you don't need them. Remember also\n * that your object does not have to implement all methods. If you find yourself creating a method that doesn't\n * do anything, you can simply omit it. Note however, if the `error` method is not provided and an error happens,\n * it will be thrown asynchronously. Errors thrown asynchronously cannot be caught using `try`/`catch`. Instead,\n * use the {@link onUnhandledError} configuration option or use a runtime handler (like `window.onerror` or\n * `process.on('error)`) to be notified of unhandled errors. Because of this, it's recommended that you provide\n * an `error` method to avoid missing thrown errors.\n *\n * The second way is to give up on Observer object altogether and simply provide callback functions in place of its methods.\n * This means you can provide three functions as arguments to `subscribe`, where the first function is equivalent\n * of a `next` method, the second of an `error` method and the third of a `complete` method. Just as in case of an Observer,\n * if you do not need to listen for something, you can omit a function by passing `undefined` or `null`,\n * since `subscribe` recognizes these functions by where they were placed in function call. When it comes\n * to the `error` function, as with an Observer, if not provided, errors emitted by an Observable will be thrown asynchronously.\n *\n * You can, however, subscribe with no parameters at all. This may be the case where you're not interested in terminal events\n * and you also handled emissions internally by using operators (e.g. using `tap`).\n *\n * Whichever style of calling `subscribe` you use, in both cases it returns a Subscription object.\n * This object allows you to call `unsubscribe` on it, which in turn will stop the work that an Observable does and will clean\n * up all resources that an Observable used. Note that cancelling a subscription will not call `complete` callback\n * provided to `subscribe` function, which is reserved for a regular completion signal that comes from an Observable.\n *\n * Remember that callbacks provided to `subscribe` are not guaranteed to be called asynchronously.\n * It is an Observable itself that decides when these functions will be called. For example {@link of}\n * by default emits all its values synchronously. Always check documentation for how given Observable\n * will behave when subscribed and if its default behavior can be modified with a `scheduler`.\n *\n * #### Examples\n *\n * Subscribe with an {@link guide/observer Observer}\n *\n * ```ts\n * import { of } from 'rxjs';\n *\n * const sumObserver = {\n * sum: 0,\n * next(value) {\n * console.log('Adding: ' + value);\n * this.sum = this.sum + value;\n * },\n * error() {\n * // We actually could just remove this method,\n * // since we do not really care about errors right now.\n * },\n * complete() {\n * console.log('Sum equals: ' + this.sum);\n * }\n * };\n *\n * of(1, 2, 3) // Synchronously emits 1, 2, 3 and then completes.\n * .subscribe(sumObserver);\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Subscribe with functions ({@link deprecations/subscribe-arguments deprecated})\n *\n * ```ts\n * import { of } from 'rxjs'\n *\n * let sum = 0;\n *\n * of(1, 2, 3).subscribe(\n * value => {\n * console.log('Adding: ' + value);\n * sum = sum + value;\n * },\n * undefined,\n * () => console.log('Sum equals: ' + sum)\n * );\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Cancel a subscription\n *\n * ```ts\n * import { interval } from 'rxjs';\n *\n * const subscription = interval(1000).subscribe({\n * next(num) {\n * console.log(num)\n * },\n * complete() {\n * // Will not be called, even when cancelling subscription.\n * console.log('completed!');\n * }\n * });\n *\n * setTimeout(() => {\n * subscription.unsubscribe();\n * console.log('unsubscribed!');\n * }, 2500);\n *\n * // Logs:\n * // 0 after 1s\n * // 1 after 2s\n * // 'unsubscribed!' after 2.5s\n * ```\n *\n * @param {Observer|Function} observerOrNext (optional) Either an observer with methods to be called,\n * or the first of three possible handlers, which is the handler for each value emitted from the subscribed\n * Observable.\n * @param {Function} error (optional) A handler for a terminal event resulting from an error. If no error handler is provided,\n * the error will be thrown asynchronously as unhandled.\n * @param {Function} complete (optional) A handler for a terminal event resulting from successful completion.\n * @return {Subscription} a subscription reference to the registered handlers\n * @method subscribe\n */\n subscribe(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((error: any) => void) | null,\n complete?: (() => void) | null\n ): Subscription {\n const subscriber = isSubscriber(observerOrNext) ? observerOrNext : new SafeSubscriber(observerOrNext, error, complete);\n\n errorContext(() => {\n const { operator, source } = this;\n subscriber.add(\n operator\n ? // We're dealing with a subscription in the\n // operator chain to one of our lifted operators.\n operator.call(subscriber, source)\n : source\n ? // If `source` has a value, but `operator` does not, something that\n // had intimate knowledge of our API, like our `Subject`, must have\n // set it. We're going to just call `_subscribe` directly.\n this._subscribe(subscriber)\n : // In all other cases, we're likely wrapping a user-provided initializer\n // function, so we need to catch errors and handle them appropriately.\n this._trySubscribe(subscriber)\n );\n });\n\n return subscriber;\n }\n\n /** @internal */\n protected _trySubscribe(sink: Subscriber): TeardownLogic {\n try {\n return this._subscribe(sink);\n } catch (err) {\n // We don't need to return anything in this case,\n // because it's just going to try to `add()` to a subscription\n // above.\n sink.error(err);\n }\n }\n\n /**\n * Used as a NON-CANCELLABLE means of subscribing to an observable, for use with\n * APIs that expect promises, like `async/await`. You cannot unsubscribe from this.\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * #### Example\n *\n * ```ts\n * import { interval, take } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(4));\n *\n * async function getTotal() {\n * let total = 0;\n *\n * await source$.forEach(value => {\n * total += value;\n * console.log('observable -> ' + value);\n * });\n *\n * return total;\n * }\n *\n * getTotal().then(\n * total => console.log('Total: ' + total)\n * );\n *\n * // Expected:\n * // 'observable -> 0'\n * // 'observable -> 1'\n * // 'observable -> 2'\n * // 'observable -> 3'\n * // 'Total: 6'\n * ```\n *\n * @param next a handler for each value emitted by the observable\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n */\n forEach(next: (value: T) => void): Promise;\n\n /**\n * @param next a handler for each value emitted by the observable\n * @param promiseCtor a constructor function used to instantiate the Promise\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n * @deprecated Passing a Promise constructor will no longer be available\n * in upcoming versions of RxJS. This is because it adds weight to the library, for very\n * little benefit. If you need this functionality, it is recommended that you either\n * polyfill Promise, or you create an adapter to convert the returned native promise\n * to whatever promise implementation you wanted. Will be removed in v8.\n */\n forEach(next: (value: T) => void, promiseCtor: PromiseConstructorLike): Promise;\n\n forEach(next: (value: T) => void, promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n const subscriber = new SafeSubscriber({\n next: (value) => {\n try {\n next(value);\n } catch (err) {\n reject(err);\n subscriber.unsubscribe();\n }\n },\n error: reject,\n complete: resolve,\n });\n this.subscribe(subscriber);\n }) as Promise;\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): TeardownLogic {\n return this.source?.subscribe(subscriber);\n }\n\n /**\n * An interop point defined by the es7-observable spec https://github.com/zenparsing/es-observable\n * @method Symbol.observable\n * @return {Observable} this instance of the observable\n */\n [Symbol_observable]() {\n return this;\n }\n\n /* tslint:disable:max-line-length */\n pipe(): Observable;\n pipe(op1: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction, op3: OperatorFunction): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction,\n ...operations: OperatorFunction[]\n ): Observable;\n /* tslint:enable:max-line-length */\n\n /**\n * Used to stitch together functional operators into a chain.\n * @method pipe\n * @return {Observable} the Observable result of all of the operators having\n * been called in the order they were passed in.\n *\n * ## Example\n *\n * ```ts\n * import { interval, filter, map, scan } from 'rxjs';\n *\n * interval(1000)\n * .pipe(\n * filter(x => x % 2 === 0),\n * map(x => x + x),\n * scan((acc, x) => acc + x)\n * )\n * .subscribe(x => console.log(x));\n * ```\n */\n pipe(...operations: OperatorFunction[]): Observable {\n return pipeFromArray(operations)(this);\n }\n\n /* tslint:disable:max-line-length */\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: typeof Promise): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: PromiseConstructorLike): Promise;\n /* tslint:enable:max-line-length */\n\n /**\n * Subscribe to this Observable and get a Promise resolving on\n * `complete` with the last emission (if any).\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * @method toPromise\n * @param [promiseCtor] a constructor function used to instantiate\n * the Promise\n * @return A Promise that resolves with the last value emit, or\n * rejects on an error. If there were no emissions, Promise\n * resolves with undefined.\n * @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise\n */\n toPromise(promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n let value: T | undefined;\n this.subscribe(\n (x: T) => (value = x),\n (err: any) => reject(err),\n () => resolve(value)\n );\n }) as Promise;\n }\n}\n\n/**\n * Decides between a passed promise constructor from consuming code,\n * A default configured promise constructor, and the native promise\n * constructor and returns it. If nothing can be found, it will throw\n * an error.\n * @param promiseCtor The optional promise constructor to passed by consuming code\n */\nfunction getPromiseCtor(promiseCtor: PromiseConstructorLike | undefined) {\n return promiseCtor ?? config.Promise ?? Promise;\n}\n\nfunction isObserver(value: any): value is Observer {\n return value && isFunction(value.next) && isFunction(value.error) && isFunction(value.complete);\n}\n\nfunction isSubscriber(value: any): value is Subscriber {\n return (value && value instanceof Subscriber) || (isObserver(value) && isSubscription(value));\n}\n", "import { Observable } from '../Observable';\nimport { Subscriber } from '../Subscriber';\nimport { OperatorFunction } from '../types';\nimport { isFunction } from './isFunction';\n\n/**\n * Used to determine if an object is an Observable with a lift function.\n */\nexport function hasLift(source: any): source is { lift: InstanceType['lift'] } {\n return isFunction(source?.lift);\n}\n\n/**\n * Creates an `OperatorFunction`. Used to define operators throughout the library in a concise way.\n * @param init The logic to connect the liftedSource to the subscriber at the moment of subscription.\n */\nexport function operate(\n init: (liftedSource: Observable, subscriber: Subscriber) => (() => void) | void\n): OperatorFunction {\n return (source: Observable) => {\n if (hasLift(source)) {\n return source.lift(function (this: Subscriber, liftedSource: Observable) {\n try {\n return init(liftedSource, this);\n } catch (err) {\n this.error(err);\n }\n });\n }\n throw new TypeError('Unable to lift unknown Observable type');\n };\n}\n", "import { Subscriber } from '../Subscriber';\n\n/**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional teardown logic here. This will only be called on teardown if the\n * subscriber itself is not already closed. This is called after all other teardown logic is executed.\n */\nexport function createOperatorSubscriber(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n onFinalize?: () => void\n): Subscriber {\n return new OperatorSubscriber(destination, onNext, onComplete, onError, onFinalize);\n}\n\n/**\n * A generic helper for allowing operators to be created with a Subscriber and\n * use closures to capture necessary state from the operator function itself.\n */\nexport class OperatorSubscriber extends Subscriber {\n /**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional finalization logic here. This will only be called on finalization if the\n * subscriber itself is not already closed. This is called after all other finalization logic is executed.\n * @param shouldUnsubscribe An optional check to see if an unsubscribe call should truly unsubscribe.\n * NOTE: This currently **ONLY** exists to support the strange behavior of {@link groupBy}, where unsubscription\n * to the resulting observable does not actually disconnect from the source if there are active subscriptions\n * to any grouped observable. (DO NOT EXPOSE OR USE EXTERNALLY!!!)\n */\n constructor(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n private onFinalize?: () => void,\n private shouldUnsubscribe?: () => boolean\n ) {\n // It's important - for performance reasons - that all of this class's\n // members are initialized and that they are always initialized in the same\n // order. This will ensure that all OperatorSubscriber instances have the\n // same hidden class in V8. This, in turn, will help keep the number of\n // hidden classes involved in property accesses within the base class as\n // low as possible. If the number of hidden classes involved exceeds four,\n // the property accesses will become megamorphic and performance penalties\n // will be incurred - i.e. inline caches won't be used.\n //\n // The reasons for ensuring all instances have the same hidden class are\n // further discussed in this blog post from Benedikt Meurer:\n // https://benediktmeurer.de/2018/03/23/impact-of-polymorphism-on-component-based-frameworks-like-react/\n super(destination);\n this._next = onNext\n ? function (this: OperatorSubscriber, value: T) {\n try {\n onNext(value);\n } catch (err) {\n destination.error(err);\n }\n }\n : super._next;\n this._error = onError\n ? function (this: OperatorSubscriber, err: any) {\n try {\n onError(err);\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._error;\n this._complete = onComplete\n ? function (this: OperatorSubscriber) {\n try {\n onComplete();\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._complete;\n }\n\n unsubscribe() {\n if (!this.shouldUnsubscribe || this.shouldUnsubscribe()) {\n const { closed } = this;\n super.unsubscribe();\n // Execute additional teardown if we have any and we didn't already do so.\n !closed && this.onFinalize?.();\n }\n }\n}\n", "import { Subscription } from '../Subscription';\n\ninterface AnimationFrameProvider {\n schedule(callback: FrameRequestCallback): Subscription;\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n delegate:\n | {\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n }\n | undefined;\n}\n\nexport const animationFrameProvider: AnimationFrameProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n schedule(callback) {\n let request = requestAnimationFrame;\n let cancel: typeof cancelAnimationFrame | undefined = cancelAnimationFrame;\n const { delegate } = animationFrameProvider;\n if (delegate) {\n request = delegate.requestAnimationFrame;\n cancel = delegate.cancelAnimationFrame;\n }\n const handle = request((timestamp) => {\n // Clear the cancel function. The request has been fulfilled, so\n // attempting to cancel the request upon unsubscription would be\n // pointless.\n cancel = undefined;\n callback(timestamp);\n });\n return new Subscription(() => cancel?.(handle));\n },\n requestAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.requestAnimationFrame || requestAnimationFrame)(...args);\n },\n cancelAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.cancelAnimationFrame || cancelAnimationFrame)(...args);\n },\n delegate: undefined,\n};\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface ObjectUnsubscribedError extends Error {}\n\nexport interface ObjectUnsubscribedErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (): ObjectUnsubscribedError;\n}\n\n/**\n * An error thrown when an action is invalid because the object has been\n * unsubscribed.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n *\n * @class ObjectUnsubscribedError\n */\nexport const ObjectUnsubscribedError: ObjectUnsubscribedErrorCtor = createErrorClass(\n (_super) =>\n function ObjectUnsubscribedErrorImpl(this: any) {\n _super(this);\n this.name = 'ObjectUnsubscribedError';\n this.message = 'object unsubscribed';\n }\n);\n", "import { Operator } from './Operator';\nimport { Observable } from './Observable';\nimport { Subscriber } from './Subscriber';\nimport { Subscription, EMPTY_SUBSCRIPTION } from './Subscription';\nimport { Observer, SubscriptionLike, TeardownLogic } from './types';\nimport { ObjectUnsubscribedError } from './util/ObjectUnsubscribedError';\nimport { arrRemove } from './util/arrRemove';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A Subject is a special type of Observable that allows values to be\n * multicasted to many Observers. Subjects are like EventEmitters.\n *\n * Every Subject is an Observable and an Observer. You can subscribe to a\n * Subject, and you can call next to feed values as well as error and complete.\n */\nexport class Subject extends Observable implements SubscriptionLike {\n closed = false;\n\n private currentObservers: Observer[] | null = null;\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n observers: Observer[] = [];\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n isStopped = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n hasError = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n thrownError: any = null;\n\n /**\n * Creates a \"subject\" by basically gluing an observer to an observable.\n *\n * @nocollapse\n * @deprecated Recommended you do not use. Will be removed at some point in the future. Plans for replacement still under discussion.\n */\n static create: (...args: any[]) => any = (destination: Observer, source: Observable): AnonymousSubject => {\n return new AnonymousSubject(destination, source);\n };\n\n constructor() {\n // NOTE: This must be here to obscure Observable's constructor.\n super();\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n lift(operator: Operator): Observable {\n const subject = new AnonymousSubject(this, this);\n subject.operator = operator as any;\n return subject as any;\n }\n\n /** @internal */\n protected _throwIfClosed() {\n if (this.closed) {\n throw new ObjectUnsubscribedError();\n }\n }\n\n next(value: T) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n if (!this.currentObservers) {\n this.currentObservers = Array.from(this.observers);\n }\n for (const observer of this.currentObservers) {\n observer.next(value);\n }\n }\n });\n }\n\n error(err: any) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.hasError = this.isStopped = true;\n this.thrownError = err;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.error(err);\n }\n }\n });\n }\n\n complete() {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.isStopped = true;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.complete();\n }\n }\n });\n }\n\n unsubscribe() {\n this.isStopped = this.closed = true;\n this.observers = this.currentObservers = null!;\n }\n\n get observed() {\n return this.observers?.length > 0;\n }\n\n /** @internal */\n protected _trySubscribe(subscriber: Subscriber): TeardownLogic {\n this._throwIfClosed();\n return super._trySubscribe(subscriber);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._checkFinalizedStatuses(subscriber);\n return this._innerSubscribe(subscriber);\n }\n\n /** @internal */\n protected _innerSubscribe(subscriber: Subscriber) {\n const { hasError, isStopped, observers } = this;\n if (hasError || isStopped) {\n return EMPTY_SUBSCRIPTION;\n }\n this.currentObservers = null;\n observers.push(subscriber);\n return new Subscription(() => {\n this.currentObservers = null;\n arrRemove(observers, subscriber);\n });\n }\n\n /** @internal */\n protected _checkFinalizedStatuses(subscriber: Subscriber) {\n const { hasError, thrownError, isStopped } = this;\n if (hasError) {\n subscriber.error(thrownError);\n } else if (isStopped) {\n subscriber.complete();\n }\n }\n\n /**\n * Creates a new Observable with this Subject as the source. You can do this\n * to create custom Observer-side logic of the Subject and conceal it from\n * code that uses the Observable.\n * @return {Observable} Observable that the Subject casts to\n */\n asObservable(): Observable {\n const observable: any = new Observable();\n observable.source = this;\n return observable;\n }\n}\n\n/**\n * @class AnonymousSubject\n */\nexport class AnonymousSubject extends Subject {\n constructor(\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n public destination?: Observer,\n source?: Observable\n ) {\n super();\n this.source = source;\n }\n\n next(value: T) {\n this.destination?.next?.(value);\n }\n\n error(err: any) {\n this.destination?.error?.(err);\n }\n\n complete() {\n this.destination?.complete?.();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n return this.source?.subscribe(subscriber) ?? EMPTY_SUBSCRIPTION;\n }\n}\n", "import { Subject } from './Subject';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\n\n/**\n * A variant of Subject that requires an initial value and emits its current\n * value whenever it is subscribed to.\n *\n * @class BehaviorSubject\n */\nexport class BehaviorSubject extends Subject {\n constructor(private _value: T) {\n super();\n }\n\n get value(): T {\n return this.getValue();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n const subscription = super._subscribe(subscriber);\n !subscription.closed && subscriber.next(this._value);\n return subscription;\n }\n\n getValue(): T {\n const { hasError, thrownError, _value } = this;\n if (hasError) {\n throw thrownError;\n }\n this._throwIfClosed();\n return _value;\n }\n\n next(value: T): void {\n super.next((this._value = value));\n }\n}\n", "import { TimestampProvider } from '../types';\n\ninterface DateTimestampProvider extends TimestampProvider {\n delegate: TimestampProvider | undefined;\n}\n\nexport const dateTimestampProvider: DateTimestampProvider = {\n now() {\n // Use the variable rather than `this` so that the function can be called\n // without being bound to the provider.\n return (dateTimestampProvider.delegate || Date).now();\n },\n delegate: undefined,\n};\n", "import { Subject } from './Subject';\nimport { TimestampProvider } from './types';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * A variant of {@link Subject} that \"replays\" old values to new subscribers by emitting them when they first subscribe.\n *\n * `ReplaySubject` has an internal buffer that will store a specified number of values that it has observed. Like `Subject`,\n * `ReplaySubject` \"observes\" values by having them passed to its `next` method. When it observes a value, it will store that\n * value for a time determined by the configuration of the `ReplaySubject`, as passed to its constructor.\n *\n * When a new subscriber subscribes to the `ReplaySubject` instance, it will synchronously emit all values in its buffer in\n * a First-In-First-Out (FIFO) manner. The `ReplaySubject` will also complete, if it has observed completion; and it will\n * error if it has observed an error.\n *\n * There are two main configuration items to be concerned with:\n *\n * 1. `bufferSize` - This will determine how many items are stored in the buffer, defaults to infinite.\n * 2. `windowTime` - The amount of time to hold a value in the buffer before removing it from the buffer.\n *\n * Both configurations may exist simultaneously. So if you would like to buffer a maximum of 3 values, as long as the values\n * are less than 2 seconds old, you could do so with a `new ReplaySubject(3, 2000)`.\n *\n * ### Differences with BehaviorSubject\n *\n * `BehaviorSubject` is similar to `new ReplaySubject(1)`, with a couple of exceptions:\n *\n * 1. `BehaviorSubject` comes \"primed\" with a single value upon construction.\n * 2. `ReplaySubject` will replay values, even after observing an error, where `BehaviorSubject` will not.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n * @see {@link shareReplay}\n */\nexport class ReplaySubject extends Subject {\n private _buffer: (T | number)[] = [];\n private _infiniteTimeWindow = true;\n\n /**\n * @param bufferSize The size of the buffer to replay on subscription\n * @param windowTime The amount of time the buffered items will stay buffered\n * @param timestampProvider An object with a `now()` method that provides the current timestamp. This is used to\n * calculate the amount of time something has been buffered.\n */\n constructor(\n private _bufferSize = Infinity,\n private _windowTime = Infinity,\n private _timestampProvider: TimestampProvider = dateTimestampProvider\n ) {\n super();\n this._infiniteTimeWindow = _windowTime === Infinity;\n this._bufferSize = Math.max(1, _bufferSize);\n this._windowTime = Math.max(1, _windowTime);\n }\n\n next(value: T): void {\n const { isStopped, _buffer, _infiniteTimeWindow, _timestampProvider, _windowTime } = this;\n if (!isStopped) {\n _buffer.push(value);\n !_infiniteTimeWindow && _buffer.push(_timestampProvider.now() + _windowTime);\n }\n this._trimBuffer();\n super.next(value);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._trimBuffer();\n\n const subscription = this._innerSubscribe(subscriber);\n\n const { _infiniteTimeWindow, _buffer } = this;\n // We use a copy here, so reentrant code does not mutate our array while we're\n // emitting it to a new subscriber.\n const copy = _buffer.slice();\n for (let i = 0; i < copy.length && !subscriber.closed; i += _infiniteTimeWindow ? 1 : 2) {\n subscriber.next(copy[i] as T);\n }\n\n this._checkFinalizedStatuses(subscriber);\n\n return subscription;\n }\n\n private _trimBuffer() {\n const { _bufferSize, _timestampProvider, _buffer, _infiniteTimeWindow } = this;\n // If we don't have an infinite buffer size, and we're over the length,\n // use splice to truncate the old buffer values off. Note that we have to\n // double the size for instances where we're not using an infinite time window\n // because we're storing the values and the timestamps in the same array.\n const adjustedBufferSize = (_infiniteTimeWindow ? 1 : 2) * _bufferSize;\n _bufferSize < Infinity && adjustedBufferSize < _buffer.length && _buffer.splice(0, _buffer.length - adjustedBufferSize);\n\n // Now, if we're not in an infinite time window, remove all values where the time is\n // older than what is allowed.\n if (!_infiniteTimeWindow) {\n const now = _timestampProvider.now();\n let last = 0;\n // Search the array for the first timestamp that isn't expired and\n // truncate the buffer up to that point.\n for (let i = 1; i < _buffer.length && (_buffer[i] as number) <= now; i += 2) {\n last = i;\n }\n last && _buffer.splice(0, last + 1);\n }\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Subscription } from '../Subscription';\nimport { SchedulerAction } from '../types';\n\n/**\n * A unit of work to be executed in a `scheduler`. An action is typically\n * created from within a {@link SchedulerLike} and an RxJS user does not need to concern\n * themselves about creating and manipulating an Action.\n *\n * ```ts\n * class Action extends Subscription {\n * new (scheduler: Scheduler, work: (state?: T) => void);\n * schedule(state?: T, delay: number = 0): Subscription;\n * }\n * ```\n *\n * @class Action\n */\nexport class Action extends Subscription {\n constructor(scheduler: Scheduler, work: (this: SchedulerAction, state?: T) => void) {\n super();\n }\n /**\n * Schedules this action on its parent {@link SchedulerLike} for execution. May be passed\n * some context object, `state`. May happen at some point in the future,\n * according to the `delay` parameter, if specified.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler.\n * @return {void}\n */\n public schedule(state?: T, delay: number = 0): Subscription {\n return this;\n }\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetIntervalFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearIntervalFunction = (handle: TimerHandle) => void;\n\ninterface IntervalProvider {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n delegate:\n | {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n }\n | undefined;\n}\n\nexport const intervalProvider: IntervalProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setInterval(handler: () => void, timeout?: number, ...args) {\n const { delegate } = intervalProvider;\n if (delegate?.setInterval) {\n return delegate.setInterval(handler, timeout, ...args);\n }\n return setInterval(handler, timeout, ...args);\n },\n clearInterval(handle) {\n const { delegate } = intervalProvider;\n return (delegate?.clearInterval || clearInterval)(handle as any);\n },\n delegate: undefined,\n};\n", "import { Action } from './Action';\nimport { SchedulerAction } from '../types';\nimport { Subscription } from '../Subscription';\nimport { AsyncScheduler } from './AsyncScheduler';\nimport { intervalProvider } from './intervalProvider';\nimport { arrRemove } from '../util/arrRemove';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncAction extends Action {\n public id: TimerHandle | undefined;\n public state?: T;\n // @ts-ignore: Property has no initializer and is not definitely assigned\n public delay: number;\n protected pending: boolean = false;\n\n constructor(protected scheduler: AsyncScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (this.closed) {\n return this;\n }\n\n // Always replace the current state with the new state.\n this.state = state;\n\n const id = this.id;\n const scheduler = this.scheduler;\n\n //\n // Important implementation note:\n //\n // Actions only execute once by default, unless rescheduled from within the\n // scheduled callback. This allows us to implement single and repeat\n // actions via the same code path, without adding API surface area, as well\n // as mimic traditional recursion but across asynchronous boundaries.\n //\n // However, JS runtimes and timers distinguish between intervals achieved by\n // serial `setTimeout` calls vs. a single `setInterval` call. An interval of\n // serial `setTimeout` calls can be individually delayed, which delays\n // scheduling the next `setTimeout`, and so on. `setInterval` attempts to\n // guarantee the interval callback will be invoked more precisely to the\n // interval period, regardless of load.\n //\n // Therefore, we use `setInterval` to schedule single and repeat actions.\n // If the action reschedules itself with the same delay, the interval is not\n // canceled. If the action doesn't reschedule, or reschedules with a\n // different delay, the interval will be canceled after scheduled callback\n // execution.\n //\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, delay);\n }\n\n // Set the pending flag indicating that this action has been scheduled, or\n // has recursively rescheduled itself.\n this.pending = true;\n\n this.delay = delay;\n // If this action has already an async Id, don't request a new one.\n this.id = this.id ?? this.requestAsyncId(scheduler, this.id, delay);\n\n return this;\n }\n\n protected requestAsyncId(scheduler: AsyncScheduler, _id?: TimerHandle, delay: number = 0): TimerHandle {\n return intervalProvider.setInterval(scheduler.flush.bind(scheduler, this), delay);\n }\n\n protected recycleAsyncId(_scheduler: AsyncScheduler, id?: TimerHandle, delay: number | null = 0): TimerHandle | undefined {\n // If this action is rescheduled with the same delay time, don't clear the interval id.\n if (delay != null && this.delay === delay && this.pending === false) {\n return id;\n }\n // Otherwise, if the action's delay time is different from the current delay,\n // or the action has been rescheduled before it's executed, clear the interval id\n if (id != null) {\n intervalProvider.clearInterval(id);\n }\n\n return undefined;\n }\n\n /**\n * Immediately executes this action and the `work` it contains.\n * @return {any}\n */\n public execute(state: T, delay: number): any {\n if (this.closed) {\n return new Error('executing a cancelled action');\n }\n\n this.pending = false;\n const error = this._execute(state, delay);\n if (error) {\n return error;\n } else if (this.pending === false && this.id != null) {\n // Dequeue if the action didn't reschedule itself. Don't call\n // unsubscribe(), because the action could reschedule later.\n // For example:\n // ```\n // scheduler.schedule(function doWork(counter) {\n // /* ... I'm a busy worker bee ... */\n // var originalAction = this;\n // /* wait 100ms before rescheduling the action */\n // setTimeout(function () {\n // originalAction.schedule(counter + 1);\n // }, 100);\n // }, 1000);\n // ```\n this.id = this.recycleAsyncId(this.scheduler, this.id, null);\n }\n }\n\n protected _execute(state: T, _delay: number): any {\n let errored: boolean = false;\n let errorValue: any;\n try {\n this.work(state);\n } catch (e) {\n errored = true;\n // HACK: Since code elsewhere is relying on the \"truthiness\" of the\n // return here, we can't have it return \"\" or 0 or false.\n // TODO: Clean this up when we refactor schedulers mid-version-8 or so.\n errorValue = e ? e : new Error('Scheduled action threw falsy error');\n }\n if (errored) {\n this.unsubscribe();\n return errorValue;\n }\n }\n\n unsubscribe() {\n if (!this.closed) {\n const { id, scheduler } = this;\n const { actions } = scheduler;\n\n this.work = this.state = this.scheduler = null!;\n this.pending = false;\n\n arrRemove(actions, this);\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, null);\n }\n\n this.delay = null!;\n super.unsubscribe();\n }\n }\n}\n", "import { Action } from './scheduler/Action';\nimport { Subscription } from './Subscription';\nimport { SchedulerLike, SchedulerAction } from './types';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * An execution context and a data structure to order tasks and schedule their\n * execution. Provides a notion of (potentially virtual) time, through the\n * `now()` getter method.\n *\n * Each unit of work in a Scheduler is called an `Action`.\n *\n * ```ts\n * class Scheduler {\n * now(): number;\n * schedule(work, delay?, state?): Subscription;\n * }\n * ```\n *\n * @class Scheduler\n * @deprecated Scheduler is an internal implementation detail of RxJS, and\n * should not be used directly. Rather, create your own class and implement\n * {@link SchedulerLike}. Will be made internal in v8.\n */\nexport class Scheduler implements SchedulerLike {\n public static now: () => number = dateTimestampProvider.now;\n\n constructor(private schedulerActionCtor: typeof Action, now: () => number = Scheduler.now) {\n this.now = now;\n }\n\n /**\n * A getter method that returns a number representing the current time\n * (at the time this function was called) according to the scheduler's own\n * internal clock.\n * @return {number} A number that represents the current time. May or may not\n * have a relation to wall-clock time. May or may not refer to a time unit\n * (e.g. milliseconds).\n */\n public now: () => number;\n\n /**\n * Schedules a function, `work`, for execution. May happen at some point in\n * the future, according to the `delay` parameter, if specified. May be passed\n * some context object, `state`, which will be passed to the `work` function.\n *\n * The given arguments will be processed an stored as an Action object in a\n * queue of actions.\n *\n * @param {function(state: ?T): ?Subscription} work A function representing a\n * task, or some unit of work to be executed by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler itself.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @return {Subscription} A subscription in order to be able to unsubscribe\n * the scheduled work.\n */\n public schedule(work: (this: SchedulerAction, state?: T) => void, delay: number = 0, state?: T): Subscription {\n return new this.schedulerActionCtor(this, work).schedule(state, delay);\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Action } from './Action';\nimport { AsyncAction } from './AsyncAction';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncScheduler extends Scheduler {\n public actions: Array> = [];\n /**\n * A flag to indicate whether the Scheduler is currently executing a batch of\n * queued actions.\n * @type {boolean}\n * @internal\n */\n public _active: boolean = false;\n /**\n * An internal ID used to track the latest asynchronous task such as those\n * coming from `setTimeout`, `setInterval`, `requestAnimationFrame`, and\n * others.\n * @type {any}\n * @internal\n */\n public _scheduled: TimerHandle | undefined;\n\n constructor(SchedulerAction: typeof Action, now: () => number = Scheduler.now) {\n super(SchedulerAction, now);\n }\n\n public flush(action: AsyncAction): void {\n const { actions } = this;\n\n if (this._active) {\n actions.push(action);\n return;\n }\n\n let error: any;\n this._active = true;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions.shift()!)); // exhaust the scheduler queue\n\n this._active = false;\n\n if (error) {\n while ((action = actions.shift()!)) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\n/**\n *\n * Async Scheduler\n *\n * Schedule task as if you used setTimeout(task, duration)\n *\n * `async` scheduler schedules tasks asynchronously, by putting them on the JavaScript\n * event loop queue. It is best used to delay tasks in time or to schedule tasks repeating\n * in intervals.\n *\n * If you just want to \"defer\" task, that is to perform it right after currently\n * executing synchronous code ends (commonly achieved by `setTimeout(deferredTask, 0)`),\n * better choice will be the {@link asapScheduler} scheduler.\n *\n * ## Examples\n * Use async scheduler to delay task\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * const task = () => console.log('it works!');\n *\n * asyncScheduler.schedule(task, 2000);\n *\n * // After 2 seconds logs:\n * // \"it works!\"\n * ```\n *\n * Use async scheduler to repeat task in intervals\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * function task(state) {\n * console.log(state);\n * this.schedule(state + 1, 1000); // `this` references currently executing Action,\n * // which we reschedule with new state and delay\n * }\n *\n * asyncScheduler.schedule(task, 3000, 0);\n *\n * // Logs:\n * // 0 after 3s\n * // 1 after 4s\n * // 2 after 5s\n * // 3 after 6s\n * ```\n */\n\nexport const asyncScheduler = new AsyncScheduler(AsyncAction);\n\n/**\n * @deprecated Renamed to {@link asyncScheduler}. Will be removed in v8.\n */\nexport const async = asyncScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { Subscription } from '../Subscription';\nimport { QueueScheduler } from './QueueScheduler';\nimport { SchedulerAction } from '../types';\nimport { TimerHandle } from './timerHandle';\n\nexport class QueueAction extends AsyncAction {\n constructor(protected scheduler: QueueScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (delay > 0) {\n return super.schedule(state, delay);\n }\n this.delay = delay;\n this.state = state;\n this.scheduler.flush(this);\n return this;\n }\n\n public execute(state: T, delay: number): any {\n return delay > 0 || this.closed ? super.execute(state, delay) : this._execute(state, delay);\n }\n\n protected requestAsyncId(scheduler: QueueScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n\n if ((delay != null && delay > 0) || (delay == null && this.delay > 0)) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n\n // Otherwise flush the scheduler starting with this action.\n scheduler.flush(this);\n\n // HACK: In the past, this was returning `void`. However, `void` isn't a valid\n // `TimerHandle`, and generally the return value here isn't really used. So the\n // compromise is to return `0` which is both \"falsy\" and a valid `TimerHandle`,\n // as opposed to refactoring every other instanceo of `requestAsyncId`.\n return 0;\n }\n}\n", "import { AsyncScheduler } from './AsyncScheduler';\n\nexport class QueueScheduler extends AsyncScheduler {\n}\n", "import { QueueAction } from './QueueAction';\nimport { QueueScheduler } from './QueueScheduler';\n\n/**\n *\n * Queue Scheduler\n *\n * Put every next task on a queue, instead of executing it immediately\n *\n * `queue` scheduler, when used with delay, behaves the same as {@link asyncScheduler} scheduler.\n *\n * When used without delay, it schedules given task synchronously - executes it right when\n * it is scheduled. However when called recursively, that is when inside the scheduled task,\n * another task is scheduled with queue scheduler, instead of executing immediately as well,\n * that task will be put on a queue and wait for current one to finish.\n *\n * This means that when you execute task with `queue` scheduler, you are sure it will end\n * before any other task scheduled with that scheduler will start.\n *\n * ## Examples\n * Schedule recursively first, then do something\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(() => {\n * queueScheduler.schedule(() => console.log('second')); // will not happen now, but will be put on a queue\n *\n * console.log('first');\n * });\n *\n * // Logs:\n * // \"first\"\n * // \"second\"\n * ```\n *\n * Reschedule itself recursively\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(function(state) {\n * if (state !== 0) {\n * console.log('before', state);\n * this.schedule(state - 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * console.log('after', state);\n * }\n * }, 0, 3);\n *\n * // In scheduler that runs recursively, you would expect:\n * // \"before\", 3\n * // \"before\", 2\n * // \"before\", 1\n * // \"after\", 1\n * // \"after\", 2\n * // \"after\", 3\n *\n * // But with queue it logs:\n * // \"before\", 3\n * // \"after\", 3\n * // \"before\", 2\n * // \"after\", 2\n * // \"before\", 1\n * // \"after\", 1\n * ```\n */\n\nexport const queueScheduler = new QueueScheduler(QueueAction);\n\n/**\n * @deprecated Renamed to {@link queueScheduler}. Will be removed in v8.\n */\nexport const queue = queueScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\nimport { SchedulerAction } from '../types';\nimport { animationFrameProvider } from './animationFrameProvider';\nimport { TimerHandle } from './timerHandle';\n\nexport class AnimationFrameAction extends AsyncAction {\n constructor(protected scheduler: AnimationFrameScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n protected requestAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay is greater than 0, request as an async action.\n if (delay !== null && delay > 0) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n // Push the action to the end of the scheduler queue.\n scheduler.actions.push(this);\n // If an animation frame has already been requested, don't request another\n // one. If an animation frame hasn't been requested yet, request one. Return\n // the current animation frame request id.\n return scheduler._scheduled || (scheduler._scheduled = animationFrameProvider.requestAnimationFrame(() => scheduler.flush(undefined)));\n }\n\n protected recycleAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle | undefined {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n if (delay != null ? delay > 0 : this.delay > 0) {\n return super.recycleAsyncId(scheduler, id, delay);\n }\n // If the scheduler queue has no remaining actions with the same async id,\n // cancel the requested animation frame and set the scheduled flag to\n // undefined so the next AnimationFrameAction will request its own.\n const { actions } = scheduler;\n if (id != null && actions[actions.length - 1]?.id !== id) {\n animationFrameProvider.cancelAnimationFrame(id as number);\n scheduler._scheduled = undefined;\n }\n // Return undefined so the action knows to request a new async id if it's rescheduled.\n return undefined;\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\nexport class AnimationFrameScheduler extends AsyncScheduler {\n public flush(action?: AsyncAction): void {\n this._active = true;\n // The async id that effects a call to flush is stored in _scheduled.\n // Before executing an action, it's necessary to check the action's async\n // id to determine whether it's supposed to be executed in the current\n // flush.\n // Previous implementations of this method used a count to determine this,\n // but that was unsound, as actions that are unsubscribed - i.e. cancelled -\n // are removed from the actions array and that can shift actions that are\n // scheduled to be executed in a subsequent flush into positions at which\n // they are executed within the current flush.\n const flushId = this._scheduled;\n this._scheduled = undefined;\n\n const { actions } = this;\n let error: any;\n action = action || actions.shift()!;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions[0]) && action.id === flushId && actions.shift());\n\n this._active = false;\n\n if (error) {\n while ((action = actions[0]) && action.id === flushId && actions.shift()) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AnimationFrameAction } from './AnimationFrameAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\n\n/**\n *\n * Animation Frame Scheduler\n *\n * Perform task when `window.requestAnimationFrame` would fire\n *\n * When `animationFrame` scheduler is used with delay, it will fall back to {@link asyncScheduler} scheduler\n * behaviour.\n *\n * Without delay, `animationFrame` scheduler can be used to create smooth browser animations.\n * It makes sure scheduled task will happen just before next browser content repaint,\n * thus performing animations as efficiently as possible.\n *\n * ## Example\n * Schedule div height animation\n * ```ts\n * // html:
\n * import { animationFrameScheduler } from 'rxjs';\n *\n * const div = document.querySelector('div');\n *\n * animationFrameScheduler.schedule(function(height) {\n * div.style.height = height + \"px\";\n *\n * this.schedule(height + 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * }, 0, 0);\n *\n * // You will see a div element growing in height\n * ```\n */\n\nexport const animationFrameScheduler = new AnimationFrameScheduler(AnimationFrameAction);\n\n/**\n * @deprecated Renamed to {@link animationFrameScheduler}. Will be removed in v8.\n */\nexport const animationFrame = animationFrameScheduler;\n", "import { Observable } from '../Observable';\nimport { SchedulerLike } from '../types';\n\n/**\n * A simple Observable that emits no items to the Observer and immediately\n * emits a complete notification.\n *\n * Just emits 'complete', and nothing else.\n *\n * ![](empty.png)\n *\n * A simple Observable that only emits the complete notification. It can be used\n * for composing with other Observables, such as in a {@link mergeMap}.\n *\n * ## Examples\n *\n * Log complete notification\n *\n * ```ts\n * import { EMPTY } from 'rxjs';\n *\n * EMPTY.subscribe({\n * next: () => console.log('Next'),\n * complete: () => console.log('Complete!')\n * });\n *\n * // Outputs\n * // Complete!\n * ```\n *\n * Emit the number 7, then complete\n *\n * ```ts\n * import { EMPTY, startWith } from 'rxjs';\n *\n * const result = EMPTY.pipe(startWith(7));\n * result.subscribe(x => console.log(x));\n *\n * // Outputs\n * // 7\n * ```\n *\n * Map and flatten only odd numbers to the sequence `'a'`, `'b'`, `'c'`\n *\n * ```ts\n * import { interval, mergeMap, of, EMPTY } from 'rxjs';\n *\n * const interval$ = interval(1000);\n * const result = interval$.pipe(\n * mergeMap(x => x % 2 === 1 ? of('a', 'b', 'c') : EMPTY),\n * );\n * result.subscribe(x => console.log(x));\n *\n * // Results in the following to the console:\n * // x is equal to the count on the interval, e.g. (0, 1, 2, 3, ...)\n * // x will occur every 1000ms\n * // if x % 2 is equal to 1, print a, b, c (each on its own)\n * // if x % 2 is not equal to 1, nothing will be output\n * ```\n *\n * @see {@link Observable}\n * @see {@link NEVER}\n * @see {@link of}\n * @see {@link throwError}\n */\nexport const EMPTY = new Observable((subscriber) => subscriber.complete());\n\n/**\n * @param scheduler A {@link SchedulerLike} to use for scheduling\n * the emission of the complete notification.\n * @deprecated Replaced with the {@link EMPTY} constant or {@link scheduled} (e.g. `scheduled([], scheduler)`). Will be removed in v8.\n */\nexport function empty(scheduler?: SchedulerLike) {\n return scheduler ? emptyScheduled(scheduler) : EMPTY;\n}\n\nfunction emptyScheduled(scheduler: SchedulerLike) {\n return new Observable((subscriber) => scheduler.schedule(() => subscriber.complete()));\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport function isScheduler(value: any): value is SchedulerLike {\n return value && isFunction(value.schedule);\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\nimport { isScheduler } from './isScheduler';\n\nfunction last(arr: T[]): T | undefined {\n return arr[arr.length - 1];\n}\n\nexport function popResultSelector(args: any[]): ((...args: unknown[]) => unknown) | undefined {\n return isFunction(last(args)) ? args.pop() : undefined;\n}\n\nexport function popScheduler(args: any[]): SchedulerLike | undefined {\n return isScheduler(last(args)) ? args.pop() : undefined;\n}\n\nexport function popNumber(args: any[], defaultValue: number): number {\n return typeof last(args) === 'number' ? args.pop()! : defaultValue;\n}\n", "export const isArrayLike = ((x: any): x is ArrayLike => x && typeof x.length === 'number' && typeof x !== 'function');", "import { isFunction } from \"./isFunction\";\n\n/**\n * Tests to see if the object is \"thennable\".\n * @param value the object to test\n */\nexport function isPromise(value: any): value is PromiseLike {\n return isFunction(value?.then);\n}\n", "import { InteropObservable } from '../types';\nimport { observable as Symbol_observable } from '../symbol/observable';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being Observable (but not necessary an Rx Observable) */\nexport function isInteropObservable(input: any): input is InteropObservable {\n return isFunction(input[Symbol_observable]);\n}\n", "import { isFunction } from './isFunction';\n\nexport function isAsyncIterable(obj: any): obj is AsyncIterable {\n return Symbol.asyncIterator && isFunction(obj?.[Symbol.asyncIterator]);\n}\n", "/**\n * Creates the TypeError to throw if an invalid object is passed to `from` or `scheduled`.\n * @param input The object that was passed.\n */\nexport function createInvalidObservableTypeError(input: any) {\n // TODO: We should create error codes that can be looked up, so this can be less verbose.\n return new TypeError(\n `You provided ${\n input !== null && typeof input === 'object' ? 'an invalid object' : `'${input}'`\n } where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.`\n );\n}\n", "export function getSymbolIterator(): symbol {\n if (typeof Symbol !== 'function' || !Symbol.iterator) {\n return '@@iterator' as any;\n }\n\n return Symbol.iterator;\n}\n\nexport const iterator = getSymbolIterator();\n", "import { iterator as Symbol_iterator } from '../symbol/iterator';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being an Iterable */\nexport function isIterable(input: any): input is Iterable {\n return isFunction(input?.[Symbol_iterator]);\n}\n", "import { ReadableStreamLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport async function* readableStreamLikeToAsyncGenerator(readableStream: ReadableStreamLike): AsyncGenerator {\n const reader = readableStream.getReader();\n try {\n while (true) {\n const { value, done } = await reader.read();\n if (done) {\n return;\n }\n yield value!;\n }\n } finally {\n reader.releaseLock();\n }\n}\n\nexport function isReadableStreamLike(obj: any): obj is ReadableStreamLike {\n // We don't want to use instanceof checks because they would return\n // false for instances from another Realm, like an +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +