Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reverting PlotWriter to previous implementation #383

Open
wants to merge 29 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f91e57e
Ensuring plotting correctness on windows
haorldbchi Aug 15, 2023
e8d0ec2
Fixed issue with indirect uploads.
haorldbchi Aug 15, 2023
58d6cd4
Fixing buffer transfers for windows disk gpu
haorldbchi Aug 17, 2023
b188624
preprocessor def fix
haorldbchi Aug 17, 2023
c27d1e6
Set correct descriptors during transfer setup
haorldbchi Aug 17, 2023
a55bfd1
Trivial fixes
haorldbchi Aug 17, 2023
2eeee84
Supporting 16G instead of 64G - phase 1
haorldbchi Aug 22, 2023
4fc3d97
Phase 3 updates
haorldbchi Aug 22, 2023
16e1310
Fixed Phase3 disk issues for 16G
haorldbchi Aug 24, 2023
e9efc94
Fixed correct buffer usage in GPU stream with disk
haorldbchi Aug 24, 2023
4d30e1d
Some extra guards for disk buffers
haorldbchi Aug 24, 2023
74c5640
Re-configure buffers for 128G windows
haorldbchi Aug 24, 2023
20af47e
Add P7 seriealization for 16G mode
haorldbchi Aug 25, 2023
a99fe73
Merge branch 'cuda-windows-fix' of github.com:Chia-Network/bladebit i…
haorldbchi Aug 25, 2023
46029fb
Fixed overflow on park buffers with -z > 1
haorldbchi Aug 29, 2023
a7a81f3
Fix macos build issue.
haorldbchi Aug 30, 2023
c924aef
Adding windows build support
wallentx Aug 30, 2023
9519d2b
Merge pull request #380 from Chia-Network/wallentx/extract-v-win
haorldbchi Aug 30, 2023
7079519
Fix harvester build
haorldbchi Aug 30, 2023
cf7ed4a
Code cleanup
haorldbchi Aug 30, 2023
f4e346f
Attempt to fix Harvester on windows again
haorldbchi Aug 30, 2023
7c4d43c
Fixing EmbedVersion cmd execution
wallentx Aug 30, 2023
ce07925
Trying full powershell cmd name
wallentx Aug 30, 2023
225b11d
Revert to allow git bash on windows, if found
haorldbchi Aug 30, 2023
a12de6f
Merge branch 'cuda-windows-fix' of github.com:Chia-Network/bladebit i…
haorldbchi Aug 30, 2023
8eb8523
Check for git bash on windows
haorldbchi Aug 30, 2023
5db9f94
Fis harvester CI on windows
haorldbchi Aug 30, 2023
9aa55e4
Fix incorrect file name on Harvester sources
haorldbchi Aug 30, 2023
42baec2
Reverting PlotWriter to previous implementation.
haorldbchi Aug 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
"preLaunchTask" : "build_cuda_debug",

"program": "${workspaceFolder}/build/bladebit_cuda",

// "-c", "xch1uf48n3f50xrs7zds0uek9wp9wmyza6crnex6rw8kwm3jnm39y82q5mvps6",
// "-i", "7a709594087cca18cffa37be61bdecf9b6b465de91acb06ecb6dbe0f4a536f73", // Yes overflow
// "--memo", "80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef207d52406afa2b6d7d92ea778f407205bd9dca40816c1b1cacfca2a6612b93eb",
Expand All @@ -140,8 +140,9 @@
// "-w -z 3 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot ~/plot/tmp",

// "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot /home/harold/plot",
"-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-128 -t1 /home/harold/plotdisk /home/harold/plot",
// "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-64 -t1 /home/harold/plotdisk /home/harold/plot",
// "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-128 -t1 /home/harold/plotdisk --no-direct-buffers /home/harold/plot",
// "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-128 -t1 /home/harold/plotdisk /home/harold/plot",
"-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-64 -t1 /home/harold/plotdisk /home/harold/plot",


"windows": {
Expand Down Expand Up @@ -357,8 +358,8 @@

/// Compare
"plotcmp",
"/home/harold/plot/plot-k32-c01-2023-08-09-20-50-0a1b7c85644fcb9c274c5b75060ffd2a718c3c246fa24cba4399e1106d042172.plot.ref",
"/home/harold/plot/plot-k32-c01-2023-08-09-21-33-0a1b7c85644fcb9c274c5b75060ffd2a718c3c246fa24cba4399e1106d042172.plot",
"/home/harold/plot/plot-k32-c01-2023-08-22-16-21-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot",
"/home/harold/plot/plot-k32-c01-2023-08-22-16-21-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot",

// "/home/harold/plot/plot-k32-c01-2023-08-03-22-59-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
// "/home/harold/plot/jmplot-c01-c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835.plot"
Expand Down
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@
],
// "cmake.buildArgs": [],
"cmake.configureSettings": {
"BB_ENABLE_TESTS": "ON"
"BB_ENABLE_TESTS": "ON",
"BB_CUDA_USE_NATIVE": "ON"
},
"C_Cpp.dimInactiveRegions": false,
// "cmake.generator": "Unix Makefiles"
Expand Down
6 changes: 5 additions & 1 deletion Bladebit.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
add_library(bladebit_core src/plotting/DiskBuffer.h src/plotting/DiskBufferBase.cpp src/plotting/DiskBufferBase.h)
add_library(bladebit_core)
target_link_libraries(bladebit_core PUBLIC bladebit_config)

target_include_directories(bladebit_core PUBLIC
Expand Down Expand Up @@ -294,9 +294,13 @@ set(src_bladebit

src/plotting/DiskQueue.h
src/plotting/DiskQueue.cpp
src/plotting/DiskBuffer.h
src/plotting/DiskBuffer.cpp
src/plotting/DiskBucketBuffer.h
src/plotting/DiskBucketBuffer.cpp
src/plotting/DiskBufferBase.h
src/plotting/DiskBufferBase.cpp

src/util/MPMCQueue.h
src/util/CommandQueue.h
)
Expand Down
7 changes: 3 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.16)

set(CMAKE_CONFIGURATION_TYPES Release Debug)

Expand All @@ -19,7 +18,7 @@ if(POLICY CMP0091)
cmake_policy(SET CMP0091 NEW)
endif()

set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "macOS minimum supported version.")
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.16" CACHE STRING "macOS minimum supported version.")
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>" CACHE STRING "MSVC Runtime Library")

project(bladebit LANGUAGES C CXX ASM)
Expand Down Expand Up @@ -85,7 +84,7 @@ endif()
# NOTE: These are mostly sandbox test environment, not proper tests
option(BB_ENABLE_TESTS "Enable tests." OFF)
option(NO_CUDA_HARVESTER "Explicitly disable CUDA in the bladebit_harvester target." OFF)
option(BB_NO_EMBED_VERSION "Disable embedding the version when building locally (non-CI)." ON)
option(BB_NO_EMBED_VERSION "Disable embedding the version when building locally (non-CI)." OFF)
option(BB_HARVESTER_ONLY "Enable only the harvester target." OFF)
option(BB_HARVESTER_STATIC "Build the harvester target as a static library." OFF)
option(BB_CUDA_USE_NATIVE "Only build the native CUDA architecture when in release mode." OFF)
Expand Down Expand Up @@ -146,7 +145,7 @@ endif()
include(Config.cmake)

if(NOT ${BB_HARVESTER_ONLY})
if(NOT BB_IS_DEPENDENCY AND (NOT BB_NO_EMBED_VERSION))
if((NOT BB_IS_DEPENDENCY) AND (NOT BB_NO_EMBED_VERSION))
include(cmake_modules/EmbedVersion.cmake)
endif()

Expand Down
10 changes: 8 additions & 2 deletions Harvester.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
if(NOT ${BB_HARVESTER_STATIC})
add_library(bladebit_harvester SHARED)
add_library(bladebit_harvester SHARED src/harvesting/HarvesterDummy.cpp)
else()
add_library(bladebit_harvester STATIC)
endif()
Expand Down Expand Up @@ -82,9 +82,15 @@ target_sources(bladebit_harvester PRIVATE
cuda/CudaF1.cu
cuda/CudaMatch.cu
cuda/CudaPlotUtil.cu
cuda/GpuQueue.cu

# TODO: Remove this, ought not be needed in harvester
# TODO: Does this have to be here?
cuda/GpuStreams.cu
cuda/GpuDownloadStream.cu
src/plotting/DiskBuffer.cpp
src/plotting/DiskBucketBuffer.cpp
src/plotting/DiskBufferBase.cpp
src/plotting/DiskQueue.cpp
>

$<$<NOT:${have_cuda}>:
Expand Down
27 changes: 18 additions & 9 deletions cmake_modules/EmbedVersion.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,25 @@
if((NOT DEFINED ENV{CI}) AND (NOT DEFINED CACHE{bb_version_embedded}))
message("Embedding local build version")

set(bb_version_embedded on CACHE BOOL "Version embedding has already happened.")

set(cmd_ver bash)
set(cmd_shell bash)
set(cmd_ext sh)
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
set(cmd_ver bash.exe)

find_program(bash_path NAMES bash.exe NO_CACHE)

if(${bash_path} MATCHES "-NOTFOUND")
set(cmd_shell powershell)
set(cmd_ext ps1)
else()
set(cmd_shell "${bash_path}")
endif()
endif()

execute_process(COMMAND ${cmd_ver} ${CMAKE_SOURCE_DIR}/extract-version.sh major OUTPUT_VARIABLE bb_ver_maj WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_ver} ${CMAKE_SOURCE_DIR}/extract-version.sh minor OUTPUT_VARIABLE bb_ver_min WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_ver} ${CMAKE_SOURCE_DIR}/extract-version.sh revision OUTPUT_VARIABLE bb_ver_rev WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_ver} ${CMAKE_SOURCE_DIR}/extract-version.sh suffix OUTPUT_VARIABLE bb_ver_suffix WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_ver} ${CMAKE_SOURCE_DIR}/extract-version.sh commit OUTPUT_VARIABLE bb_ver_commit WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_shell} ${CMAKE_SOURCE_DIR}/extract-version.${cmd_ext} major OUTPUT_VARIABLE bb_ver_maj WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_shell} ${CMAKE_SOURCE_DIR}/extract-version.${cmd_ext} minor OUTPUT_VARIABLE bb_ver_min WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_shell} ${CMAKE_SOURCE_DIR}/extract-version.${cmd_ext} revision OUTPUT_VARIABLE bb_ver_rev WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_shell} ${CMAKE_SOURCE_DIR}/extract-version.${cmd_ext} suffix OUTPUT_VARIABLE bb_ver_suffix WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
execute_process(COMMAND ${cmd_shell} ${CMAKE_SOURCE_DIR}/extract-version.${cmd_ext} commit OUTPUT_VARIABLE bb_ver_commit WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)

# Remove trailing whitespace incurred in windows gitbash
string(STRIP "${bb_ver_maj}" bb_ver_maj)
Expand All @@ -39,3 +46,5 @@ if(NOT DEFINED ENV{CI})
add_compile_definitions(BLADEBIT_VERSION_SUFFIX="${bb_ver_suffix}")
add_compile_definitions(BLADEBIT_GIT_COMMIT="${bb_ver_commit}")
endif()

set(bb_version_embedded on CACHE BOOL "Version embedding has already happened.")
1 change: 1 addition & 0 deletions cuda/CudaPlotConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ static_assert( BBCU_BUCKET_ALLOC_ENTRY_COUNT / BBCU_BUCKET_COUNT == BBCU_MAX_SLI
// #define DBG_BBCU_P2_WRITE_MARKS 1

// #define DBG_BBCU_P2_COUNT_PRUNED_ENTRIES 1
// #define DBG_BBCU_KEEP_TEMP_FILES 1


#define _ASSERT_DOES_NOT_OVERLAP( b0, b1, size ) ASSERT( (b1+size) <= b0 || b1 >= (b0+size) )
Expand Down
27 changes: 22 additions & 5 deletions cuda/CudaPlotContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,24 @@ struct CudaK32ParkContext

struct CudaK32HybridMode
{
// For clarity, these are the file names for the disk buffers
// whose disk space will be shared for temp data in both phase 1 and phase 3.
// The name indicates their usage and in which phase.
static constexpr std::string_view Y_DISK_BUFFER_FILE_NAME = "p1y-p3index.tmp";
static constexpr std::string_view META_DISK_BUFFER_FILE_NAME = "p1meta-p3rmap.tmp";
static constexpr std::string_view LPAIRS_DISK_BUFFER_FILE_NAME = "p1unsortedx-p1lpairs-p3lp-p3-lmap.tmp";

static constexpr std::string_view P3_RMAP_DISK_BUFFER_FILE_NAME = META_DISK_BUFFER_FILE_NAME;
static constexpr std::string_view P3_INDEX_DISK_BUFFER_FILE_NAME = Y_DISK_BUFFER_FILE_NAME;
static constexpr std::string_view P3_LP_AND_LMAP_DISK_BUFFER_FILE_NAME = LPAIRS_DISK_BUFFER_FILE_NAME;

DiskQueue* temp1Queue; // Tables Queue
DiskQueue* temp2Queue; // Metadata Queue (could be the same as temp1Queue)

DiskBucketBuffer* metaBuffer; // Enabled in 64G mode
DiskBucketBuffer* unsortedXs; // Unsorted Xs are written to disk (uint64 entries)
DiskBucketBuffer* metaBuffer; // Enabled in < 128G mode
DiskBucketBuffer* yBuffer; // Enabled in < 128G mode
DiskBucketBuffer* unsortedL; // Unsorted Xs (or L pairs in < 128G) are written to disk (uint64 entries)
DiskBucketBuffer* unsortedR; // Unsorted R pairs in < 128G mode

DiskBuffer* tablesL[7];
DiskBuffer* tablesR[7];
Expand All @@ -58,8 +71,11 @@ struct CudaK32HybridMode

struct
{
DiskBucketBuffer* lpOut;
DiskBucketBuffer* indexOut;
// #NOTE: These buffers shared the same file-backed storage as
// with other buffers in phase 1.
DiskBucketBuffer* rMapBuffer; // Step 1
DiskBucketBuffer* indexBuffer; // X-step/Step 2
DiskBucketBuffer* lpAndLMapBuffer; // X-step/Step 2 (LP) | Step 3 (LMap)

} phase3;
};
Expand Down Expand Up @@ -142,6 +158,7 @@ struct CudaK32Phase3
GpuUploadBuffer lMapIn; // Output map (uint64) from the previous table run. Or, when L table is the first stored table, it is inlined x values
GpuDownloadBuffer lpOut; // Output line points (uint64)
GpuDownloadBuffer indexOut; // Output source line point index (uint32) (taken from the rMap source value)
GpuDownloadBuffer parksOut; // Output P7 parks on the last table
uint32* devLTable[2]; // Unpacked L table bucket

uint32 prunedBucketSlices[BBCU_BUCKET_COUNT][BBCU_BUCKET_COUNT];
Expand All @@ -151,7 +168,7 @@ struct CudaK32Phase3
struct {
GpuUploadBuffer lpIn; // Line points from step 2
GpuUploadBuffer indexIn; // Indices from step 2
GpuDownloadBuffer mapOut; // lTable for next step 1
GpuDownloadBuffer mapOut; // lTable for next step 2
GpuDownloadBuffer parksOut; // Downloads park buffers to host

uint32* hostParkOverrunCount;
Expand Down
2 changes: 1 addition & 1 deletion cuda/CudaPlotPhase2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ void CudaK32PlotPhase2AllocateBuffers( CudaK32PlotContext& cx, CudaK32AllocConte
desc.sliceAlignment = cx.diskContext->temp1Queue->BlockSize();
}

if( cx.cfg.disableDirectDownloads )
if( !cx.downloadDirect )
desc.pinnedAllocator = acx.pinnedAllocator;

CudaK32Phase2& p2 = *cx.phase2;
Expand Down
Loading