Skip to content

Commit

Permalink
Merge pull request #2673 from fireice-uk/xmr-stak-rx-dev
Browse files Browse the repository at this point in the history
[RX] release 1.0.5-rx
  • Loading branch information
fireice-uk authored May 6, 2020
2 parents 65ade74 + cd2e233 commit af03d89
Show file tree
Hide file tree
Showing 28 changed files with 772 additions and 538 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ option(CMAKE_LINK_STATIC "link as much as possible libraries static" OFF)
#option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF)
#set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "Use the static version of the CUDA runtime library if available" FORCE)

option(CUDA_ENABLE "Enable or disable CUDA support (NVIDIA backend)" ON)
option(CUDA_ENABLE "Enable or disable CUDA support (NVIDIA backend)" OFF)
if(CUDA_ENABLE)
find_package(CUDA 9.0)

Expand Down Expand Up @@ -205,7 +205,7 @@ endif()
# Find OpenCL
###############################################################################

option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" ON)
option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" OFF)
if(OpenCL_ENABLE)
# try to find AMD OpenCL before NVIDIA OpenCL
find_path(OpenCL_INCLUDE_DIR
Expand Down
7 changes: 7 additions & 0 deletions xmrstak/backend/cpu/cpuType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,12 @@ Model getModel()
return result;
}

bool firstHasBMI2()
{
int32_t cpu_info[4];
cpuid(7, 0, cpu_info);
return has_feature(cpu_info[1], 8);
}

} // namespace cpu
} // namespace xmrstak
8 changes: 8 additions & 0 deletions xmrstak/backend/cpu/cpuType.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,13 @@ Model getModel();
*/
int32_t get_masked(int32_t val, int32_t h, int32_t l);

bool firstHasBMI2();

inline bool hasBMI2()
{
static bool bmi2 = firstHasBMI2();
return bmi2;
}

} // namespace cpu
} // namespace xmrstak
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/crypto/cryptonight.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ struct randomX_global_ctx
}
printer::inst()->print_msg(LDEBUG,"allocate dataset/cache for numa %u", uint32_t(numaId));
#ifdef __linux__
randomx_dataset* dataset = randomx_alloc_dataset(static_cast<randomx_flags>(RANDOMX_FLAG_LARGE_PAGES | RANDOMX_FLAG_LARGE_PAGES_1G));
randomx_dataset* dataset = randomx_alloc_dataset(static_cast<randomx_flags>(RANDOMX_FLAG_LARGE_PAGES | RANDOMX_FLAG_1GB_PAGES));
if (!dataset)
{
printer::inst()->print_msg(LDEBUG,"Warning: dataset allocation with 1 GiB pages failed");
Expand Down
4 changes: 4 additions & 0 deletions xmrstak/backend/cpu/crypto/cryptonight_aesni.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ struct RandomX_generator
randomx_apply_config(RandomX_WowneroConfig);
else if(ALGO == randomX_arqma)
randomx_apply_config(RandomX_ArqmaConfig);
else if(ALGO == randomX_safex)
randomx_apply_config(RandomX_SafexConfig);
else if(ALGO == randomX_keva)
randomx_apply_config(RandomX_KevaConfig);
}

for(size_t i = 0; i < N; i++)
Expand Down
23 changes: 20 additions & 3 deletions xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);

constexpr int PREFETCH_DISTANCE = 4096;
constexpr int PREFETCH_DISTANCE = 7168;
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
scratchpadEnd -= PREFETCH_DISTANCE;

Expand All @@ -258,8 +258,25 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi

rx_prefetch_t0(prefetchPtr);

scratchpadPtr += 64;
prefetchPtr += 64;
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));

fill_state0 = aesdec<softAes>(fill_state0, key0);
fill_state1 = aesenc<softAes>(fill_state1, key1);
fill_state2 = aesdec<softAes>(fill_state2, key2);
fill_state3 = aesenc<softAes>(fill_state3, key3);

rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);

rx_prefetch_t0(prefetchPtr + 64);

scratchpadPtr += 128;
prefetchPtr += 128;
}
prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
;# save VM register values
add rsp, 40
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
Expand Down
20 changes: 10 additions & 10 deletions xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load.inc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
lea rcx, [rsi+rax]
push rcx
mov [rsp+16], rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
Expand All @@ -9,7 +9,7 @@
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
push rcx
mov [rsp+24], rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
Expand All @@ -18,11 +18,11 @@
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14
andpd xmm4, xmm13
andpd xmm5, xmm13
andpd xmm6, xmm13
andpd xmm7, xmm13
orpd xmm4, xmm14
orpd xmm5, xmm14
orpd xmm6, xmm14
orpd xmm7, xmm14
24 changes: 24 additions & 0 deletions xmrstak/backend/cpu/crypto/randomx/asm/program_loop_load_xop.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
lea rcx, [rsi+rax]
mov [rsp+16], rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
mov [rsp+24], rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
vpcmov xmm4, xmm4, xmm14, xmm13
vpcmov xmm5, xmm5, xmm14, xmm13
vpcmov xmm6, xmm6, xmm14, xmm13
vpcmov xmm7, xmm7, xmm14, xmm13
4 changes: 2 additions & 2 deletions xmrstak/backend/cpu/crypto/randomx/asm/program_loop_store.inc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pop rcx
mov rcx, [rsp+24]
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
Expand All @@ -7,7 +7,7 @@
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
mov rcx, [rsp+16]
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mantissaMask:
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
db 0, 0, 192, 255, 255, 255, 255, 0, 0, 0, 192, 255, 255, 255, 255, 0
exp240:
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
scaleMask:
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/crypto/randomx/blake2/blake2b.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static const uint64_t blake2b_IV[8] = {
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };

static const unsigned int blake2b_sigma[12][16] = {
static const uint8_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/crypto/randomx/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_DATASET_MAX_SIZE 2181038080

// Increase it if some configs use larger programs
#define RANDOMX_PROGRAM_MAX_SIZE 512
#define RANDOMX_PROGRAM_MAX_SIZE 320

// Increase it if some configs use larger scratchpad
#define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152
3 changes: 2 additions & 1 deletion xmrstak/backend/cpu/crypto/randomx/jit_compiler_fallback.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ namespace randomx {
JitCompilerFallback() {
throw std::runtime_error("JIT compilation is not supported on this platform");
}
void generateProgram(Program&, ProgramConfiguration&) {
void prepare() {}
void generateProgram(Program&, ProgramConfiguration&, uint32_t) {

}
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
Expand Down
Loading

0 comments on commit af03d89

Please sign in to comment.