Skip to content

Commit

Permalink
save
Browse files Browse the repository at this point in the history
  • Loading branch information
barne856 committed Sep 1, 2024
1 parent 468394f commit 9a70aae
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 7 deletions.
48 changes: 41 additions & 7 deletions main.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,49 @@
#include <chrono>
#include <iostream>
#include <squint/squint.hpp>

using namespace squint;
using namespace std::chrono;

auto main() -> int {
tensor<float, dynamic, dynamic> a({2, 3}, std::vector<float>{1, 4, 2, 5, 3, 6});
auto a = tensor<float, dynamic, dynamic>::arange(1, 1, {10000, 10000});
auto b = tensor<float, dynamic, dynamic>::arange(1, 1, {10000, 10000});

// Time host multiplication
auto start_host = high_resolution_clock::now();
auto c_host = a * b;
auto end_host = high_resolution_clock::now();
auto duration_host = duration_cast<milliseconds>(end_host - start_host);

// Time transfer to device
auto start_transfer_to_device = high_resolution_clock::now();
auto a_device = a.to_device();
auto b_device = a_device * 2.0f;
auto b_host = b_device.to_host();
std::cout << "b_host: " << b_host << std::endl;
auto permute_device = a_device.permute({1, 0});
auto permute_host = permute_device.to_host();
std::cout << "permute_host: " << permute_host << std::endl;
auto b_device = b.to_device();
auto end_transfer_to_device = high_resolution_clock::now();
auto duration_transfer_to_device = duration_cast<milliseconds>(end_transfer_to_device - start_transfer_to_device);

// Time device multiplication
auto start_device = high_resolution_clock::now();
auto c_device = a_device * b_device;
auto end_device = high_resolution_clock::now();
auto duration_device = duration_cast<milliseconds>(end_device - start_device);

// Time transfer from device
auto start_transfer_from_device = high_resolution_clock::now();
auto c_host_from_device = c_device.to_host();
auto end_transfer_from_device = high_resolution_clock::now();
auto duration_transfer_from_device =
duration_cast<milliseconds>(end_transfer_from_device - start_transfer_from_device);

// Print results
std::cout << "Host multiplication time: " << duration_host.count() << " ms\n";
std::cout << "Transfer to device time: " << duration_transfer_to_device.count() << " ms\n";
std::cout << "Device multiplication time: " << duration_device.count() << " ms\n";
std::cout << "Transfer from device time: " << duration_transfer_from_device.count() << " ms\n";

// Calculate and print total device time
auto total_device_time = duration_transfer_to_device + duration_device + duration_transfer_from_device;
std::cout << "Total device time (including transfers): " << total_device_time.count() << " ms\n";

return 0;
}
45 changes: 45 additions & 0 deletions scripts/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy as np
import cupy as cp
import time

def time_function(func, *args, **kwargs):
start = time.time()
result = func(*args, **kwargs)
end = time.time()
return result, (end - start) * 1000 # Convert to milliseconds

def main():
# Create large matrices
size = 10000
a_cpu = np.arange(1, size*size + 1, dtype=np.float32).reshape(size, size)
b_cpu = np.arange(1, size*size + 1, dtype=np.float32).reshape(size, size)

# CPU multiplication
_, cpu_time = time_function(np.dot, a_cpu, b_cpu)
print(f"CPU multiplication time: {cpu_time:.2f} ms")

# Transfer to GPU
transfer_to_gpu_start = time.time()
a_gpu = cp.asarray(a_cpu)
b_gpu = cp.asarray(b_cpu)
transfer_to_gpu_end = time.time()
transfer_to_gpu_time = (transfer_to_gpu_end - transfer_to_gpu_start) * 1000
print(f"Transfer to GPU time: {transfer_to_gpu_time:.2f} ms")

# GPU multiplication
_, gpu_time = time_function(cp.dot, a_gpu, b_gpu)
print(f"GPU multiplication time: {gpu_time:.2f} ms")

# Transfer from GPU
transfer_from_gpu_start = time.time()
_ = cp.asnumpy(a_gpu.dot(b_gpu))
transfer_from_gpu_end = time.time()
transfer_from_gpu_time = (transfer_from_gpu_end - transfer_from_gpu_start) * 1000
print(f"Transfer from GPU time: {transfer_from_gpu_time:.2f} ms")

# Calculate and print total GPU time
total_gpu_time = transfer_to_gpu_time + gpu_time + transfer_from_gpu_time
print(f"Total GPU time (including transfers): {total_gpu_time:.2f} ms")

if __name__ == "__main__":
main()

0 comments on commit 9a70aae

Please sign in to comment.