Skip to content

Commit

Permalink
Work on building the dependencies on PrgEnv-cray.
Browse files Browse the repository at this point in the history
  • Loading branch information
bvanessen committed Aug 13, 2024
1 parent cd4cec4 commit 60ff967
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 18 deletions.
4 changes: 2 additions & 2 deletions .gitlab/build-and-test-lassen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ include:
clang-16-0-6-gcc-11-2-1-cuda-12-2-2-lassen:
variables:
COMPILER_FAMILY: clang
MODULES: "clang/16.0.6-gcc-11.2.1 spectrum-mpi/rolling-release cuda/12.2.2 cmake/3.29.2 python/3.11.5"
MODULES: "clang/16.0.6-gcc-11.2.1 spectrum-mpi/rolling-release cuda/12.2.2 cmake/3.29.2 python/3.11.5 fftw/3.3.10-gcc-11.2.1"
extends: .build-and-test-on-lassen

clang-16-0-6-gcc-11-2-1-cuda-12-2-2-distconv-lassen:
variables:
COMPILER_FAMILY: clang
MODULES: "clang/16.0.6-gcc-11.2.1 spectrum-mpi/rolling-release cuda/12.2.2 cmake/3.29.2 python/3.11.5"
MODULES: "clang/16.0.6-gcc-11.2.1 spectrum-mpi/rolling-release cuda/12.2.2 cmake/3.29.2 python/3.11.5 fftw/3.3.10-gcc-11.2.1"
WITH_DISTCONV: "1"
extends: .build-and-test-on-lassen

Expand Down
22 changes: 19 additions & 3 deletions .gitlab/build-and-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,25 @@ echo "~~~~~ Project dir: ${project_dir}"
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

prefix="${project_dir}/install-deps-${CI_JOB_NAME_SLUG:-${job_unique_id}}"
#dha_prefix=${INSTALL_EXTERNALS_ROOT}/rocm-5.7.1/amd/cray-mpich-8.1.29/dha_with_distconv
dha_prefix=${prefix}

# Just for good measure...
export CMAKE_PREFIX_PATH=${prefix}/aluminum:${prefix}/hydrogen:${prefix}/dihydrogen:${CMAKE_PREFIX_PATH}
export CMAKE_PREFIX_PATH=${dha_prefix}/aluminum:${dha_prefix}/hydrogen:${dha_prefix}/dihydrogen:${CMAKE_PREFIX_PATH}
#export CMAKE_PREFIX_PATH=${prefix}/aluminum:${prefix}/hydrogen:${prefix}/dihydrogen:${CMAKE_PREFIX_PATH}
CMAKE_CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH//:/;}

# Allow a user to force this
rebuild_deps=${REBUILD_DEPS:-""}
#rebuild_deps=0

# Rebuild if the prefix doesn't exist.
if [[ ! -d "${prefix}" ]]
#if [[ ! -d "${prefix}" ]]
if [[ ! -d "${dha_prefix}" ]]
then
rebuild_deps=1
fi
#rebuild_deps=0

# Rebuild if latest hashes don't match
if [[ -z "${rebuild_deps}" ]]
Expand Down Expand Up @@ -220,10 +225,21 @@ echo "~~~~~ Installing Python Packages with PIP"
echo "~~~~~ $(date)"
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

CMD="python3 -m pip install -i https://pypi.org/simple --prefix ${prefix}/lbann protobuf tqdm numpy scipy"
CMD="python3 -m pip install -i https://pypi.org/simple -U --force-reinstall --prefix ${prefix}/lbann protobuf tqdm numpy scipy"
echo ${CMD}
${CMD}

case "${cluster}" in
pascal)
CMD="python3 -m pip install -i https://pypi.org/simple -U --force-reinstall --prefix ${prefix}/lbann torch"
echo ${CMD}
${CMD}
;;
*)
echo "Unable to install torch via pip on ${cluster}"
;;
esac

LBANN_MODFILES_DIR=${build_dir}/install/lbann/etc/modulefiles
#echo "I think that the module is in ${LBANN_MODFILES_DIR}"
ml use ${LBANN_MODFILES_DIR}
Expand Down
19 changes: 7 additions & 12 deletions .gitlab/configure_lbann.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
if [[ "$cluster" == "lassen" ]]
then
# lbann_lapack_opt="-D LBANN_BLA_VENDOR=IBMESSL"
lbann_lapack_opt="-D BLA_VENDOR=Generic"
build_fft=ON
else
lbann_lapack_opt=""
fi

# Just for good measure...
if [[ "$cluster" == "tioga" ]]
then
build_fft=OFF
fi

# Default RPATH rules will not include in-source libraries from the prefix path... add them here.
if [ -z "${extra_rpaths}" ]; then
extra_rpaths=${dha_prefix}/aluminum/lib64:${dha_prefix}/hydrogen/lib:${dha_prefix}/dihydrogen/lib64
else
extra_rpaths=${dha_prefix}/aluminum/lib64:${dha_prefix}/hydrogen/lib:${dha_prefix}/dihydrogen/lib64:${extra_rpaths:-""}
fi

echo "I have modified the extra rpaths to be ${extra_rpaths}"
# -D CMAKE_BUILD_RPATH="${extra_rpaths//:/\;}" \
# -D CMAKE_INSTALL_RPATH="${extra_rpaths//:/\;}" \

cmake -G Ninja \
-S ${project_dir} \
-B ${build_dir}/build-lbann \
Expand Down Expand Up @@ -57,9 +58,3 @@ cmake -G Ninja \
-D LBANN_WITH_EMBEDDED_PYTHON=ON \
-D LBANN_WITH_PYTHON_FRONTEND=ON \
-D LBANN_WITH_VISION=ON

# -D CMAKE_BUILD_RPATH_USE_ORIGIN=OFF \
# -D CMAKE_BUILD_WITH_INSTALL_RPATH=OFF \
# -D CMAKE_SKIP_BUILD_RPATH=OFF \
# -D CMAKE_SKIP_INSTALL_RPATH=OFF \
# -D CMAKE_SKIP_RPATH=OFF \
2 changes: 2 additions & 0 deletions .gitlab/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ case "${cluster}" in
else
extra_rpaths="${ROCM_PATH}/lib:${ROCM_PATH}/llvm/lib:${extra_rpaths}"
fi
extra_rpaths="/usr/workspace/lbann/ci_stable_dependencies/tioga/rocm-5.7.1/cray/cray-mpich-8.1.30/aws_ofi_rccl/lib:${extra_rpaths}"
rocm_platform=ON
gpu_arch=gfx90a,gfx942
launcher=flux
Expand All @@ -103,6 +104,7 @@ source ${INSTALL_EXTERNALS_ROOT}/${SYSTEM_INSTALL_PREFIX_EXTERNALS}/logs/lbann_s
export CMAKE_PREFIX_PATH=${CI_STABLE_DEPENDENCIES_ROOT}/half-2.1.0:${CMAKE_PREFIX_PATH}
#CMAKE_PREFIX_PATH=${INSTALL_EXTERNALS_ROOT}/${SYSTEM_INSTALL_PREFIX_EXTERNALS}
CMAKE_CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH//:/;}
echo "BVE HERE I think that AWS_OFI_RCCL_LIBRARY=${AWS_OFI_RCCL_LIBRARY}"

CFLAGS=${CFLAGS:-""}
CXXFLAGS=${CXXFLAGS:-""}
Expand Down
3 changes: 2 additions & 1 deletion scripts/superbuild/ci/ci_tioga_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ PE_ENV_lc=$(echo "${PE_ENV}" | tr '[:upper:]' '[:lower:]')
INSTALL_ROOT=/usr/workspace/lbann/ci_stable_dependencies/tioga/${ROCM_VER}/${PE_ENV_lc}
INSTALL_PREFIX_EXTERNALS=${INSTALL_ROOT}/cray-mpich-${CRAY_MPICH_VERSION}

if [[ "${PE_ENV_lc}" == "cray" ]]; then
if [[ "${PE_ENV_lc}" = "cray" ]]; then
# If using PrgEnv-cray add ${CRAYLIBS_X86_64}
EXTRA_RPATHS="${CRAYLIBS_X86_64}|${EXTRA_RPATHS}"
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
fi

# Use an accessible build directory so that the source files are preserved for debuggin
Expand Down

0 comments on commit 60ff967

Please sign in to comment.