Skip to content

Commit

Permalink
save
Browse files Browse the repository at this point in the history
  • Loading branch information
barne856 committed Sep 1, 2024
1 parent e3608a1 commit 468394f
Show file tree
Hide file tree
Showing 18 changed files with 975 additions and 280 deletions.
12 changes: 11 additions & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
netpbm \
imagemagick \
ghostscript \
nvidia-cuda-toolkit \
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
&& ln -s /usr/bin/clang-$LLVM_VERSION /usr/bin/clang \
&& ln -s /usr/bin/clang++-$LLVM_VERSION /usr/bin/clang++ \
Expand Down Expand Up @@ -70,6 +69,17 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cm
&& ln -s /opt/cmake-${CMAKE_VERSION}/bin/* /usr/local/bin \
&& rm /tmp/cmake-install.sh

# Install CUDA
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb \
&& dpkg -i cuda-keyring_1.1-1_all.deb

RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
cuda-toolkit-12-6

# add CUDA to the path
ENV PATH=/usr/local/cuda-12.6/bin:/usr/local/cuda-12.6:$PATH

# Clean up
RUN apt-get -y remove wget gnupg software-properties-common

Expand Down
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,7 @@ if(SQUINT_BUILD_DOCUMENTATION)
DESTINATION ${CMAKE_INSTALL_DOCDIR})

endif()

add_executable(main main.cpp)
target_link_libraries(main PRIVATE SQUINT)
include_directories(include)
18 changes: 9 additions & 9 deletions include/squint/tensor/cuda/cuda_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,32 @@

namespace squint::cuda {

class CudaContext {
class cuda_context {
public:
static auto instance() -> CudaContext & {
static CudaContext instance;
static auto instance() -> cuda_context & {
static cuda_context instance;
return instance;
}

[[nodiscard]] auto cublas_handle() const -> cublasHandle_t { return cublas_handle_; }

// Delete copy constructor and assignment operator
CudaContext(const CudaContext &) = delete;
auto operator=(const CudaContext &) -> CudaContext & = delete;
cuda_context(const cuda_context &) = delete;
auto operator=(const cuda_context &) -> cuda_context & = delete;

// Delete move constructor and assignment operator
CudaContext(CudaContext &&) = delete;
auto operator=(CudaContext &&) -> CudaContext & = delete;
cuda_context(cuda_context &&) = delete;
auto operator=(cuda_context &&) -> cuda_context & = delete;

private:
CudaContext() {
cuda_context() {
cublasStatus_t status = cublasCreate(&cublas_handle_);
if (status != CUBLAS_STATUS_SUCCESS) {
throw std::runtime_error("Failed to create cuBLAS handle");
}
}

~CudaContext() { cublasDestroy(cublas_handle_); }
~cuda_context() { cublasDestroy(cublas_handle_); }

cublasHandle_t cublas_handle_{};
};
Expand Down
17 changes: 11 additions & 6 deletions include/squint/tensor/cuda/element_wise.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <cstdint>
#ifndef SQUINT_TENSOR_CUDA_ELEMENT_WISE_HPP

template <typename T>
Expand All @@ -7,17 +8,21 @@ void element_wise_addition(T *output, const T *a, const T *b, const unsigned lon

template <typename T>
void element_wise_subtraction(T *output, const T *a, const T *b, const unsigned long *dims,
const unsigned long *strides_out, const unsigned long *strides_a,
const unsigned long *strides_b, unsigned long num_dims, unsigned long total_size);
const unsigned long *strides_out, const unsigned long *strides_a,
const unsigned long *strides_b, unsigned long num_dims, unsigned long total_size);

template <typename T>
void element_wise_equality(T *output, const T *a, const T *b, const unsigned long *dims,
void element_wise_equality(uint8_t *output, const T *a, const T *b, const unsigned long *dims,
const unsigned long *strides_out, const unsigned long *strides_a,
const unsigned long *strides_b, unsigned long num_dims, unsigned long total_size);

template <typename T>
void element_wise_inequality(T *output, const T *a, const T *b, const unsigned long *dims,
const unsigned long *strides_out, const unsigned long *strides_a,
const unsigned long *strides_b, unsigned long num_dims, unsigned long total_size);
void element_wise_inequality(uint8_t *output, const T *a, const T *b, const unsigned long *dims,
const unsigned long *strides_out, const unsigned long *strides_a,
const unsigned long *strides_b, unsigned long num_dims, unsigned long total_size);

template <typename T>
void element_wise_negation(T *output, const T *a, const unsigned long *dims, const unsigned long *strides_out,
const unsigned long *strides_a, unsigned long num_dims, unsigned long total_size);

#endif // SQUINT_TENSOR_CUDA_ELEMENT_WISE_HPP
7 changes: 7 additions & 0 deletions include/squint/tensor/cuda/scalar.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#ifndef SQUINT_TENSOR_CUDA_SCALAR_HPP

template <typename T>
void scalar_multiplication(T scalar, T *output, const T *a, const unsigned long *dims, const unsigned long *strides_out,
const unsigned long *strides_a, unsigned long num_dims, unsigned long total_size);

#endif // SQUINT_TENSOR_CUDA_SCALAR_HPP
Loading

0 comments on commit 468394f

Please sign in to comment.