Skip to content

Commit

Permalink
resolving conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
ShreyaTalati committed Nov 22, 2023
2 parents 963d52f + 9bf75e8 commit e2b22ca
Show file tree
Hide file tree
Showing 59 changed files with 3,518 additions and 1,104 deletions.
33 changes: 15 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,25 @@ include(CTest)
find_package(
Python
COMPONENTS Interpreter Development.Module
REQUIRED
)
REQUIRED)

set(CYTHON cython)
message(STATUS "Cython: " ${CYTHON})
#Test if cython is installed
# Test if cython is installed
if(NOT CYTHON)
message(
FATAL_ERROR
"Cython is required to build Parla. Please install cython and try again."
)
"Cython is required to build Parla. Please install cython and try again.")
endif()
#Test if cython exists
execute_process(COMMAND ${CYTHON} --version ERROR_QUIET OUTPUT_VARIABLE CYTHON_VERSION)
# Test if cython exists
execute_process(
COMMAND ${CYTHON} --version
ERROR_QUIET
OUTPUT_VARIABLE CYTHON_VERSION)
if(NOT CYTHON_VERSION)
message(
FATAL_ERROR
"Cython is required to build Parla. Please install cython and try again."
)
"Cython is required to build Parla. Please install cython and try again.")
endif()

message(STATUS "Python: " ${Python_EXECUTABLE})
Expand Down Expand Up @@ -82,10 +82,9 @@ if(PARLA_ENABLE_CUDA)
endif(PARLA_ENABLE_CUDA)

if(PARLA_ENABLE_HIP)
enable_language(HIP)
enable_language(HIP)
endif(PARLA_ENABLE_HIP)


if(PARLA_ENABLE_LOGGING)
# TODO: figure out binlog cmake support
message(STATUS "Finding binlog...")
Expand Down Expand Up @@ -119,11 +118,9 @@ endif(PARLA_ENABLE_LOGGING)
add_subdirectory(src/c/backend)
add_subdirectory(src/python/parla)

# if(PARLA_BUILD_TESTS)
# add_subdirectory(testing)
# endif(PARLA_BUILD_TESTS)
# if(PARLA_BUILD_TESTS) add_subdirectory(testing) endif(PARLA_BUILD_TESTS)

# set(test_path_file ${CMAKE_SOURCE_DIR}/testing/run_tests.sh)
# file(WRITE ${test_path_file} "export PARLA_TESTS=${CMAKE_BINARY_DIR}/testing\n")
# file(APPEND ${test_path_file} "py.test $PARLA_TESTS\n")
# file(APPEND ${test_path_file} "ctest --test-dir $PARLA_TESTS\n")
# set(test_path_file ${CMAKE_SOURCE_DIR}/testing/run_tests.sh) file(WRITE
# ${test_path_file} "export PARLA_TESTS=${CMAKE_BINARY_DIR}/testing\n")
# file(APPEND ${test_path_file} "py.test $PARLA_TESTS\n") file(APPEND
# ${test_path_file} "ctest --test-dir $PARLA_TESTS\n")
8 changes: 4 additions & 4 deletions benchmark/python/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def reduction_scalinum_gpus(fD_array_bytes, sD_array_bytes, \

run_config = RunConfig(
outer_iterations=1,
inner_iterations=1,
inner_iterations=iter,
verbose=verbose,
logfile=logpath,
num_gpus=num_gpus,
Expand Down Expand Up @@ -160,7 +160,7 @@ def independent_scalinum_gpus(fD_array_bytes, sD_array_bytes, num_gpus, \

run_config = RunConfig(
outer_iterations=1,
inner_iterations=1,
inner_iterations=iter,
verbose=verbose,
logfile=logpath,
num_gpus=num_gpus,
Expand Down Expand Up @@ -209,7 +209,7 @@ def serial_scalinum_gpus(fD_array_bytes, sD_array_bytes, num_gpus,

run_config = RunConfig(
outer_iterations=1,
inner_iterations=1,
inner_iterations=iter,
verbose=verbose,
num_gpus=num_gpus,
logfile=logpath,
Expand Down Expand Up @@ -263,7 +263,7 @@ def reduction_scatter_scalinum_gpus(fD_array_bytes, sD_array_bytes, \

run_config = RunConfig(
outer_iterations=1,
inner_iterations=1,
inner_iterations=iter,
verbose=verbose,
logfile=logpath,
num_gpus=num_gpus,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ build-dir = "build"
wheel.packages = ["src/python/parla"]
wheel.license-files = []
cmake.minimum-version = "3.22.1"
cmake.build-type = "Debug"
cmake.build-type = "Release"
cmake.verbose = true
ninja.minimum-version = "1.11"

Expand Down
15 changes: 7 additions & 8 deletions src/c/backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,17 @@ if(PARLA_ENABLE_CUDA)

set_target_properties(backend PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(backend PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)

target_compile_definitions(backend PUBLIC PARLA_ENABLE_CUDA)

target_include_directories(backend PUBLIC ${CUDAToolkit_LIBRARY_DIR})
target_include_directories(backend PUBLIC ${CUDAToolkit_LIBRARY_ROOT})
target_include_directories(backend PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
#target_include_directories(backend PUBLIC ${CUDAToolkit_LIBRARY_DIR})
#target_include_directories(backend PUBLIC ${CUDAToolkit_LIBRARY_ROOT})
#target_include_directories(backend PUBLIC ${CUDAToolkit_INCLUDE_DIRS})

target_link_directories(backend PUBLIC ${CUDAToolkit_LIBRARY_ROOT})
target_compile_options(backend PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda -DPROD>)
target_compile_options(backend PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
#target_link_directories(backend PUBLIC ${CUDAToolkit_LIBRARY_ROOT})
#target_compile_options(backend PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda -DPROD>)
#target_compile_options(backend PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)

target_link_libraries(backend PUBLIC CUDA::cudart)
#target_link_libraries(backend PUBLIC CUDA::cudart)

set(GPU_ARCH $ENV{CUDA_ARCH})
if(GPU_ARCH)
Expand Down
3 changes: 1 addition & 2 deletions src/c/backend/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@

const bool
Device::check_resource_availability(DeviceRequirement *dev_req) const {
return get_resource_pool().check_greater<ResourceCategory::All>(
dev_req->res_req());
return get_resource_pool().check_greater<GPUResources>(dev_req->res_req());
}
14 changes: 14 additions & 0 deletions src/c/backend/impl_cuda/utility.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,17 @@ void stream_synchronize(uintptr_t stream_ptr) {
cudaStream_t stream = reinterpret_cast<cudaStream_t>(stream_ptr);
cudaStreamSynchronize(stream);
};

void set_device(int device) { cudaSetDevice(device); }

int get_device() {
int device;
cudaGetDevice(&device);
return device;
}

int get_num_devices() {
int num_devices;
cudaGetDeviceCount(&num_devices);
return num_devices;
}
25 changes: 25 additions & 0 deletions src/c/backend/impl_hip/utility.hip
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,28 @@ void stream_synchronize(uintptr_t stream_ptr) {
hipStream_t stream = reinterpret_cast<hipStream_t>(stream_ptr);
auto res = hipStreamSynchronize(stream);
};

void set_device(int device) {
auto res = hipSetDevice(device);
if (res != hipSuccess) {
throw std::runtime_error("hipSetDevice failed");
}
}

int get_device(){
int device;
auto res = hipGetDevice(&device);
if (res != hipSuccess) {
throw std::runtime_error("hipGetDevice failed");
}
return device;
}

int get_num_devices() {
int num_devices;
auto res = hipGetDeviceCount(&num_devices);
if (res != hipSuccess) {
throw std::runtime_error("hipGetDeviceCount failed");
}
return num_devices;
}
5 changes: 4 additions & 1 deletion src/c/backend/impl_none/utility.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <gpu_utility.hpp>
#include "../include/gpu_utility.hpp"

void gpu_busy_sleep(const int device, const unsigned long t,
uintptr_t stream_ptr) {
Expand All @@ -8,3 +8,6 @@ void gpu_busy_sleep(const int device, const unsigned long t,
void event_synchronize(uintptr_t event_ptr){};
void event_wait(uintptr_t event_ptr, uintptr_t stream_ptr){};
void stream_synchronize(uintptr_t stream_ptr){};
void set_device(int device){};
int get_device() { return 0; };
int get_num_devices() { return 0; };
33 changes: 14 additions & 19 deletions src/c/backend/include/containers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
*
*
*/

#pragma once
#ifndef PARLA_CONTAINERS_HPP
#define PARLA_CONTAINERS_HPP

Expand Down Expand Up @@ -46,36 +44,33 @@ template <typename T> class ProtectedVector {
std::string name;

public:
ProtectedVector() = default;
ProtectedVector() { this->name = "default"; };

ProtectedVector(std::string name) {
this->mtx.lock();
this->name = name;
this->mtx.unlock();
ProtectedVector(const ProtectedVector<T> &other) {
this->name = other.name;
this->vec = other.vec;
this->length.exchange(other.length);
}

ProtectedVector(std::string name) { this->name = name; }

Check warning on line 55 in src/c/backend/include/containers.hpp

View workflow job for this annotation

GitHub Actions / cpp-linter

/src/c/backend/include/containers.hpp:55:31 [performance-unnecessary-value-param]

the parameter 'name' is copied for each invocation but only used as a const reference; consider making it a const reference

ProtectedVector(std::string name, std::vector<T> vec) {

Check warning on line 57 in src/c/backend/include/containers.hpp

View workflow job for this annotation

GitHub Actions / cpp-linter

/src/c/backend/include/containers.hpp:57:31 [performance-unnecessary-value-param]

the parameter 'name' is copied for each invocation but only used as a const reference; consider making it a const reference
this->mtx.lock();
this->name = name;
this->vec = vec;
this->mtx.unlock();
}

ProtectedVector(std::string name, size_t size) {

Check warning on line 62 in src/c/backend/include/containers.hpp

View workflow job for this annotation

GitHub Actions / cpp-linter

/src/c/backend/include/containers.hpp:62:31 [performance-unnecessary-value-param]

the parameter 'name' is copied for each invocation but only used as a const reference; consider making it a const reference
this->mtx.lock();
this->name = name;
this->vec.reserve(size);
this->mtx.unlock();
}

/// Explicit move assignment due to the atomic size member.
ProtectedVector &operator=(ProtectedVector &&other) {
this->length.exchange(other.length);
this->vec = std::move(other.vec);
// The string should be small
this->name = std::move(other.name);
return *this;
}
// ProtectedVector &operator=(const ProtectedVector<T> &&other) {
// this->length.exchange(other.length);
// this->vec = std::move(other.vec);
// // The string should be small
// this->name = std::move(other.name);
// return *this;
// }

void lock() { this->mtx.lock(); }

Expand Down
66 changes: 26 additions & 40 deletions src/c/backend/include/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
* @brief Provides interface for abstract device object.
*/

#pragma once
#ifndef PARLA_DEVICE_HPP
#define PARLA_DEVICE_HPP

Expand All @@ -16,21 +15,28 @@
using DevID_t = uint32_t;
using MemorySz_t = Resource_t;
using VCU_t = Resource_t;
// using ResourcePool_t = ResourcePool<std::atomic<Resource_t>>;
using ResourcePool_t = ResourcePool;

using GPUResources = Resources<Resource::Memory, Resource::VCU, Resource::Copy>;
using GPUResourcePool = ResourcePool<GPUResources>;

using CPUResources = Resources<Resource::Memory, Resource::VCU, Resource::Copy>;
using CPUResourcePool = ResourcePool<CPUResources>;

// TODO(wlr): Temporarily maintain a single resource pool for all devices.
using ResourcePool_t = GPUResourcePool;

class DeviceRequirement;

/**
* @brief Architecture types for devices.
*/
enum class DeviceType { INVALID = -2, All = -1, CPU = 0, CUDA = 1 };
enum class DeviceType { INVALID = -2, All = -1, CPU = 0, GPU = 1 };

inline const constexpr std::array architecture_types{DeviceType::CPU,
DeviceType::CUDA};
DeviceType::GPU};
inline const constexpr int NUM_DEVICE_TYPES = architecture_types.size();
inline const std::array<std::string, NUM_DEVICE_TYPES> architecture_names{
"CPU", "CUDA"};
"CPU", "GPU"};

/// Devices can be distinguished from other devices
/// by a class type and its index.
Expand All @@ -43,17 +49,9 @@ class Device {
void *py_dev, int copy_engines = 2)
: py_dev_(py_dev), dev_id_(dev_id), dev_type_(arch) {

res_.set(Resource::VCU, num_vcus);
res_.set(Resource::Memory, mem_sz);
res_.set(Resource::Copy, copy_engines);

reserved_res_.set(Resource::VCU, num_vcus);
reserved_res_.set(Resource::Memory, mem_sz);
reserved_res_.set(Resource::Copy, copy_engines);

mapped_res_.set(Resource::VCU, 0);
mapped_res_.set(Resource::Memory, 0);
mapped_res_.set(Resource::Copy, 0);
res_.set<GPUResources>({mem_sz, num_vcus, copy_engines});
reserved_res_.set<GPUResources>({mem_sz, num_vcus, copy_engines});
mapped_res_.set<GPUResources>({0, 0, 0});
}

/// Return a device id.
Expand All @@ -64,16 +62,16 @@ class Device {
std::to_string(dev_id_);
}

const Resource_t query_resource(Resource type) const {
return this->res_.get(type);
template <typename Resource> const Resource_t query_max() const {
return this->res_.get<Resource>();
}

const Resource_t query_reserved_resource(Resource type) const {
return this->reserved_res_.get(type);
template <typename Resource> const Resource_t query_reserved() const {
return this->reserved_res_.get<Resource>();
}

const Resource_t query_mapped_resource(Resource type) const {
return this->mapped_res_.get(type);
template <typename Resource> const Resource_t query_mapped() const {
return this->mapped_res_.get<Resource>();
}

const DeviceType get_type() const { return dev_type_; }
Expand Down Expand Up @@ -111,22 +109,10 @@ class Device {
const DevID_t get_global_id() const { return dev_global_id_; }

const MemorySz_t get_memory_size() const {
return res_.get(Resource::Memory);
return res_.get<Resource::Memory>();
}

const VCU_t get_num_vcus() const { return res_.get(Resource::VCU); }

const Resource_t get_max_resource(Resource type) const {
return this->res_.get(type);
}

const Resource_t get_reserved_resource(Resource type) const {
return this->reserved_res_.get(type);
}

const Resource_t get_mapped_resource(Resource type) const {
return this->mapped_res_.get(type);
}
const VCU_t get_num_vcus() const { return res_.get<Resource::VCU>(); }

const bool check_resource_availability(DeviceRequirement *dev_req) const;

Expand All @@ -142,10 +128,10 @@ class Device {
};

///
class CUDADevice : public Device {
class GPUDevice : public Device {
public:
CUDADevice(DevID_t dev_id, size_t mem_sz, size_t num_vcus, void *py_dev)
: Device(DeviceType::CUDA, dev_id, mem_sz, num_vcus, py_dev, 3) {}
GPUDevice(DevID_t dev_id, size_t mem_sz, size_t num_vcus, void *py_dev)
: Device(DeviceType::GPU, dev_id, mem_sz, num_vcus, py_dev, 3) {}

private:
};
Expand Down
Loading

0 comments on commit e2b22ca

Please sign in to comment.