diff --git a/.gitignore b/.gitignore index 4a4c233bb..26d278f0b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,11 @@ xmr-stak.kdev4 cmake-build-release/ cmake-build-debug/ \.idea/ + +# MacOS files +.DS_Store +.AppleDouble +.LSOverride + +# Thumbnails +._* diff --git a/doc/compile_Linux.md b/doc/compile_Linux.md index 79e036ef7..ebf115430 100644 --- a/doc/compile_Linux.md +++ b/doc/compile_Linux.md @@ -2,10 +2,17 @@ ## Install Dependencies -### AMD APP SDK 3.0 (only needed to use AMD GPUs) +### AMD Driver (only needed to use AMD GPUs) -- download and install the latest version from http://debian.nullivex.com/amd/AMD-APP-SDKInstaller-v3.0.130.136-GA-linux64.tar.bz2 (see https://github.com/fireice-uk/xmr-stak/issues/1511#issuecomment-385120692) - (do not wonder why it is a link to a dropbox but AMD has removed the SDK downloads, see https://community.amd.com/thread/228059) +- the AMD APP SDK is not longer needed (all is included in the driver package) +- download & unzip the AMD driver: https://www.amd.com/en/support +- run `./amdgpu-pro-install --opencl=legacy,pal` from the unzipped folder +- set the environment variable to opencl `export AMDAPPSDKROOT=/opt/amdgpu-pro/` + +**ATTENTION** The linux driver 18.3 creating invalid shares. +If you have an issue with `invalid shares` please downgrade your driver or switch to ROCm. + +For linux also the OpenSource driver ROCm 1.9.X+ is a well working alternative, see https://rocm.github.io/ROCmInstall.html ### Cuda 8.0+ (only needed to use NVIDIA GPUs) diff --git a/doc/compile_Windows.md b/doc/compile_Windows.md index add5fbfd0..1b5787879 100644 --- a/doc/compile_Windows.md +++ b/doc/compile_Windows.md @@ -30,7 +30,12 @@ - CUDA/Runtime - Driver components -### AMD APP SDK 3.0 (only needed for AMD GPUs) +### AMD DRIVER/APP SDK 3.0 (only needed for AMD GPUs) + +- Download & install the AMD driver: https://www.amd.com/en/support + +**ATTENTION** Many windows driver 18.5+ creating invalid shares. +If you have an issue with `invalid shares` please downgrade your driver. - Download and install the latest version from http://amd-dev.wpengine.netdna-cdn.com/app-sdk/installers/APPSDKInstaller/3.0.130.135-GA/full/AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe (do not wonder why it is a link to a netdna-cdn.com but AMD has removed the SDK downloads, see https://community.amd.com/thread/222855) diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index 7c7aff788..6b0d13d79 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -420,6 +420,11 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ options += " -DMEMORY=" + std::to_string(hashMemSize); options += " -DALGO=" + std::to_string(miner_algo[ii]); options += " -DCN_UNROLL=" + std::to_string(ctx->unroll); + /* AMD driver output is something like: `1445.5 (VM)` + * and is mapped to `14` only. The value is only used for a compiler + * workaround. + */ + options += " -DOPENCL_DRIVER_MAJOR=" + std::to_string(std::stoi(openCLDriverVer.data()) / 100); /* create a hash for the compile time cache * used data: @@ -928,13 +933,6 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) // create a directory for the OpenCL compile cache create_directory(get_home() + "/.openclcache"); - // check if cryptonight_monero_v8 is selected for the user or dev pool - bool useCryptonight_v8 = - ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_monero_v8 || - ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot() == cryptonight_monero_v8 || - ::jconf::inst()->GetCurrentCoinSelection().GetDescription(0).GetMiningAlgo() == cryptonight_monero_v8 || - ::jconf::inst()->GetCurrentCoinSelection().GetDescription(0).GetMiningAlgoRoot() == cryptonight_monero_v8; - for(int i = 0; i < num_gpus; ++i) { const std::string backendName = xmrstak::params::inst().openCLVendor; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl b/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl index 607806b7a..1ef1dead4 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl @@ -2,6 +2,10 @@ R"===( /* * @author SChernykh */ + +// cryptonight_monero_v8 +#if(ALGO==11) + static const __constant uint RCP_C[256] = { 0xfe01be73u,0xfd07ff01u,0xfa118c5au,0xf924fb13u,0xf630cddbu,0xf558f73cu,0xf25f2934u,0xf1a3f37bu, @@ -68,9 +72,21 @@ inline uint2 fast_div_v2(const __local uint *RCP, ulong a, uint b) const ulong k = mul_hi(as_uint2(a).s0, r) + ((ulong)(r) * as_uint2(a).s1) + a; ulong q; - ((uint*)&q)[0] = as_uint2(k).s1;; - ((uint*)&q)[1] = (k < a) ? 1 : 0; - + ((uint*)&q)[0] = as_uint2(k).s1; + +#if defined(cl_amd_device_attribute_query) && (OPENCL_DRIVER_MAJOR == 14) + /* The AMD driver 14.XX is not able to compile `(k < a)` + * https://github.com/fireice-uk/xmr-stak/issues/1922 + * This is a workaround for the broken compiler. + */ + ulong whyAMDwhy; + ((uint*)&whyAMDwhy)[0] = as_uint2(k).s0; + ((uint*)&whyAMDwhy)[1] = as_uint2(k).s1; + ((uint*)&q)[1] = (whyAMDwhy < a) ? 1U : 0U; +#else + ((uint*)&q)[1] = (k < a) ? 1U : 0U; +#endif + const long tmp = a - q * b; const bool overshoot = (tmp < 0); const bool undershoot = (tmp >= b); @@ -105,4 +121,7 @@ inline uint fast_sqrt_v2(const ulong n1) return result; } + +#endif + )===" diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index a9f18d1b0..55879110a 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -296,6 +296,7 @@ bool minethd::self_test() bResult = bResult && memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0; hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_lite); + hashf("This is a test This is a test This is a test", 44, out, ctx); bResult = bResult && memcmp(out, "\x5a\x24\xa0\x29\xde\x1c\x39\x3f\x3d\x52\x7a\x2f\x9b\x39\xdc\x3d\xb3\xbc\x87\x11\x8b\x84\x52\x9b\x9f\x0\x88\x49\x25\x4b\x5\xce", 32) == 0; } else if(algo == cryptonight_monero) diff --git a/xmrstak/misc/coinDescription.hpp b/xmrstak/misc/coinDescription.hpp index 73d9a9548..55e86f4e2 100644 --- a/xmrstak/misc/coinDescription.hpp +++ b/xmrstak/misc/coinDescription.hpp @@ -10,9 +10,9 @@ namespace xmrstak { struct coinDescription { - xmrstak_algo algo; - xmrstak_algo algo_root; - uint8_t fork_version; + xmrstak_algo algo = xmrstak_algo::invalid_algo; + xmrstak_algo algo_root = xmrstak_algo::invalid_algo; + uint8_t fork_version = 0u; coinDescription() = default; diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index 4fcce1f97..a303b34cd 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -627,8 +627,12 @@ void executor::ex_main() break; case EV_GPU_RES_ERROR: - log_result_error(std::string(ev.oGpuError.error_str + std::string(" GPU ID ") + std::to_string(ev.oGpuError.idx))); + { + std::string err_msg = std::string(ev.oGpuError.error_str) + " GPU ID " + std::to_string(ev.oGpuError.idx); + printer::inst()->print_msg(L0, err_msg.c_str()); + log_result_error(std::move(err_msg)); break; + } case EV_PERF_TICK: for (i = 0; i < pvThreads->size(); i++) diff --git a/xmrstak/misc/telemetry.cpp b/xmrstak/misc/telemetry.cpp index 197da8eca..47442df09 100644 --- a/xmrstak/misc/telemetry.cpp +++ b/xmrstak/misc/telemetry.cpp @@ -36,6 +36,7 @@ telemetry::telemetry(size_t iThd) ppHashCounts = new uint64_t*[iThd]; ppTimestamps = new uint64_t*[iThd]; iBucketTop = new uint32_t[iThd]; + mtx = new std::mutex[iThd]; for (size_t i = 0; i < iThd; i++) { @@ -49,8 +50,7 @@ telemetry::telemetry(size_t iThd) double telemetry::calc_telemetry_data(size_t iLastMillisec, size_t iThread) { - std::unique_lock lk(mtx); - uint64_t iTimeNow = get_timestamp_ms(); + uint64_t iEarliestHashCnt = 0; uint64_t iEarliestStamp = 0; @@ -58,6 +58,9 @@ double telemetry::calc_telemetry_data(size_t iLastMillisec, size_t iThread) uint64_t iLatestHashCnt = 0; bool bHaveFullSet = false; + std::unique_lock lk(mtx[iThread]); + uint64_t iTimeNow = get_timestamp_ms(); + //Start at 1, buckettop points to next empty for (size_t i = 1; i < iBucketSize; i++) { @@ -81,6 +84,7 @@ double telemetry::calc_telemetry_data(size_t iLastMillisec, size_t iThread) iEarliestStamp = ppTimestamps[iThread][idx]; iEarliestHashCnt = ppHashCounts[iThread][idx]; } + lk.unlock(); if (!bHaveFullSet || iEarliestStamp == 0 || iLatestStamp == 0) return nan(""); @@ -99,7 +103,7 @@ double telemetry::calc_telemetry_data(size_t iLastMillisec, size_t iThread) void telemetry::push_perf_value(size_t iThd, uint64_t iHashCount, uint64_t iTimestamp) { - std::unique_lock lk(mtx); + std::unique_lock lk(mtx[iThd]); size_t iTop = iBucketTop[iThd]; ppHashCounts[iThd][iTop] = iHashCount; ppTimestamps[iThd][iTop] = iTimestamp; diff --git a/xmrstak/misc/telemetry.hpp b/xmrstak/misc/telemetry.hpp index 1813c00e6..580565de2 100644 --- a/xmrstak/misc/telemetry.hpp +++ b/xmrstak/misc/telemetry.hpp @@ -15,7 +15,7 @@ class telemetry double calc_telemetry_data(size_t iLastMillisec, size_t iThread); private: - mutable std::mutex mtx; + std::mutex* mtx; constexpr static size_t iBucketSize = 2 << 11; //Power of 2 to simplify calculations constexpr static size_t iBucketMask = iBucketSize - 1; uint32_t* iBucketTop; diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index 80c25c6b8..0e78283e6 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -18,7 +18,7 @@ #endif #define XMR_STAK_NAME "xmr-stak" -#define XMR_STAK_VERSION "2.5.0" +#define XMR_STAK_VERSION "2.5.1" #if defined(_WIN32) #define OS_TYPE "win"