fireice-uk · psychocrypt · Oct 9, 2018 · Oct 9, 2018 · Oct 10, 2018
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -607,6 +607,36 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
 	return 0;
 }
 
+void FinalizeOpenCL(GpuContext* ctx)
+{
+	xmrstak_algo miner_algo[2] = {
+		::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo(),
+		::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()
+	};
+	int num_algos = miner_algo[0] == miner_algo[1] ? 1 : 2;
+
+	for(int ii = 0; ii < num_algos; ++ii)
+	{
+		if(ii == 0)
+			for(int i = 0; i < 7; ++i)
+				clReleaseKernel(ctx->Kernels[ii][i]);
+		else
+			for(int i = 0; i < 3; ++i)
+				clReleaseKernel(ctx->Kernels[ii][i]);
+	}
+
+	for(size_t i = 0; i < 6; ++i)
+		clReleaseMemObject(ctx->ExtraBuffers[i]);
+	clReleaseMemObject(ctx->InputBuffer);
+	clReleaseMemObject(ctx->OutputBuffer);
+
+	for(size_t i = 0; i < 2; ++i)
+		clReleaseProgram(ctx->Program[i]);
+
+	clReleaseCommandQueue(ctx->CommandQueues);
+	clReleaseDevice(ctx->DeviceID);
+}
+
 const cl_platform_info attributeTypes[5] = {
 	CL_PLATFORM_NAME,
 	CL_PLATFORM_VENDOR,

diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -52,6 +52,7 @@ int getAMDPlatformIdx();
 std::vector<GpuContext> getAMDDevices(int index);
 
 size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx);
+void FinalizeOpenCL(GpuContext* ctx);
 size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo);
 size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo);
 

diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
@@ -162,118 +162,137 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
 
 void minethd::work_main()
 {
-	if(affinity >= 0) //-1 means no affinity
-		bindMemoryToNUMANode(affinity);
+	cryptonight_ctx* cpu_ctx = nullptr;
+	try
+	{
+		if(affinity >= 0) //-1 means no affinity
+			bindMemoryToNUMANode(affinity);
 
-	order_fix.set_value();
-	std::unique_lock<std::mutex> lck(thd_aff_set);
-	lck.release();
-	std::this_thread::yield();
+		order_fix.set_value();
+		std::unique_lock<std::mutex> lck(thd_aff_set);
+		lck.release();
+		std::this_thread::yield();
 
-	uint64_t iCount = 0;
-	cryptonight_ctx* cpu_ctx;
-	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
+		uint64_t iCount = 0;
+		cpu_ctx = cpu::minethd::minethd_alloc_ctx();
 
 	if(cpu_ctx == nullptr)
 	{
 		printer::inst()->print_msg(L0, "ERROR: miner was not able to allocate memory, miner will be stopped.");
 		win_exit(1);
 	}
-	// start with root algorithm and switch later if fork version is reached
-	auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
-	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
+		// start with root algorithm and switch later if fork version is reached
+		auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
+		cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
 
-	uint8_t version = 0;
-	size_t lastPoolId = 0;
+		uint8_t version = 0;
+		size_t lastPoolId = 0;
 
-	while (bQuit == 0)
-	{
-		if (oWork.bStall)
+		while (!bQuit)
 		{
-			/* We are stalled here because the executor didn't find a job for us yet,
-			 * either because of network latency, or a socket problem. Since we are
-			 * raison d'etre of this software it us sensible to just wait until we have something
-			 */
-
-			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
-				std::this_thread::sleep_for(std::chrono::milliseconds(100));
+			if (oWork.bStall)
+			{
+				/* We are stalled here because the executor didn't find a job for us yet,
+				 * either because of network latency, or a socket problem. Since we are
+				 * raison d'etre of this software it us sensible to just wait until we have something
+				 */
 
-			globalStates::inst().consume_work(oWork, iJobNo);
-			continue;
-		}
+				while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+					std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
-		uint8_t new_version = oWork.getVersion();
-		if(new_version != version || oWork.iPoolId != lastPoolId)
-		{
-			coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId);
-			if(new_version >= coinDesc.GetMiningForkVersion())
-			{
-				miner_algo = coinDesc.GetMiningAlgo();
-				hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
+				globalStates::inst().consume_work(oWork, iJobNo);
+				continue;
 			}
 			else
-			{
-				miner_algo = coinDesc.GetMiningAlgoRoot();
-				hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
-			}
-			lastPoolId = oWork.iPoolId;
-			version = new_version;
-		}
-
-		uint32_t h_per_round = pGpuCtx->rawIntensity;
-		size_t round_ctr = 0;
-
-		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
-		uint64_t target = oWork.iTarget;
-
-		XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo);
-
-		if(oWork.bNiceHash)
-			pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
+				globalStates::inst().consume_work(oWork, iJobNo);
 
-		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
-		{
-			//Allocate a new nonce every 16 rounds
-			if((round_ctr++ & 0xF) == 0)
+			uint8_t new_version = oWork.getVersion();
+			if(new_version != version || oWork.iPoolId != lastPoolId)
 			{
-				globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
-				// check if the job is still valid, there is a small possibility that the job is switched
-				if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
-					break;
+				coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId);
+				if(new_version >= coinDesc.GetMiningForkVersion())
+				{
+					miner_algo = coinDesc.GetMiningAlgo();
+					hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
+				}
+				else
+				{
+					miner_algo = coinDesc.GetMiningAlgoRoot();
+					hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
+				}
+				lastPoolId = oWork.iPoolId;
+				version = new_version;
 			}
 
+			uint32_t h_per_round = pGpuCtx->rawIntensity;
+			size_t round_ctr = 0;
 
-			cl_uint results[0x100];
-			memset(results,0,sizeof(cl_uint)*(0x100));
-
-			XMRRunJob(pGpuCtx, results, miner_algo);
-
-			for(size_t i = 0; i < results[0xFF]; i++)
-			{
-				uint8_t	bWorkBlob[112];
-				uint8_t	bResult[32];
+			assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
+			uint64_t target = oWork.iTarget;
 
-				memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
-				memset(bResult, 0, sizeof(job_result::bResult));
+			XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target, miner_algo);
 
-				*(uint32_t*)(bWorkBlob + 39) = results[i];
+			if(oWork.bNiceHash)
+				pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
 
-				hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx);
-				if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
-					executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo, miner_algo), oWork.iPoolId));
-				else
-					executor::inst()->push_event(ex_event("AMD Invalid Result", pGpuCtx->deviceIdx, oWork.iPoolId));
+			while(!bQuit && globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+			{
+				//Allocate a new nonce every 16 rounds
+				if((round_ctr++ & 0xF) == 0)
+				{
+					globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
+					// check if the job is still valid, there is a small possibility that the job is switched
+					if(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) != iJobNo)
+						break;
+				}
+
+
+				cl_uint results[0x100];
+				memset(results,0,sizeof(cl_uint)*(0x100));
+
+				XMRRunJob(pGpuCtx, results, miner_algo);
+
+				for(size_t i = 0; i < results[0xFF]; i++)
+				{
+					uint8_t	bWorkBlob[112];
+					uint8_t	bResult[32];
+
+					memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
+					memset(bResult, 0, sizeof(job_result::bResult));
+
+					*(uint32_t*)(bWorkBlob + 39) = results[i];
+
+					hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx);
+					if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
+						executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo, miner_algo), oWork.iPoolId));
+					else
+						executor::inst()->push_event(ex_event("AMD Invalid Result", pGpuCtx->deviceIdx, oWork.iPoolId));
+				}
+
+				iCount += pGpuCtx->rawIntensity;
+				uint64_t iStamp = get_timestamp_ms();
+				iHashCount.store(iCount, std::memory_order_relaxed);
+				iTimestamp.store(iStamp, std::memory_order_relaxed);
+				std::this_thread::yield();
 			}
 
-			iCount += pGpuCtx->rawIntensity;
-			uint64_t iStamp = get_timestamp_ms();
-			iHashCount.store(iCount, std::memory_order_relaxed);
-			iTimestamp.store(iStamp, std::memory_order_relaxed);
-			std::this_thread::yield();
 		}
+	}
+	catch(...)
+	{
+		win_exit(1);
+	}
 
-		globalStates::inst().consume_work(oWork, iJobNo);
+	try
+	{
+		cryptonight_free_ctx(cpu_ctx);
+		FinalizeOpenCL(pGpuCtx);
+	}
+	catch(...)
+	{
+		win_exit(1);
 	}
+	shutdownFinished = true;
 }
 
 } // namespace amd

diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp
@@ -40,11 +40,10 @@ class minethd  : public iBackend
 	std::thread oWorkThd;
 	int64_t affinity;
 
-	bool bQuit;
 	bool bNoPrefetch;
 
 	//Mutable ptr to vector below, different for each thread
-	GpuContext* pGpuCtx;
+	GpuContext* pGpuCtx = nullptr;
 
 	// WARNING - this vector (but not its contents) must be immutable
 	// once the threads are started