Skip to content

Commit

Permalink
2.4.5
Browse files Browse the repository at this point in the history
  • Loading branch information
IndeedMiners committed Apr 22, 2018
1 parent b830038 commit 7d09a6d
Show file tree
Hide file tree
Showing 35 changed files with 362 additions and 293 deletions.
44 changes: 29 additions & 15 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
}

size_t scratchPadSize = std::max(
cn_select_memory(::jconf::inst()->GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot())
);

size_t g_thd = ctx->rawIntensity;
Expand Down Expand Up @@ -376,8 +376,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
}

xmrstak_algo miner_algo[2] = {
::jconf::inst()->GetMiningAlgo(),
::jconf::inst()->GetMiningAlgoRoot()
::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo(),
::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot()
};
int num_algos = miner_algo[0] == miner_algo[1] ? 1 : 2;

Expand Down Expand Up @@ -675,11 +675,18 @@ std::vector<GpuContext> getAMDDevices(int index)
}

std::string devVendor(devVendorVec.data());
if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos)

bool isAMDDevice = devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos;
bool isNVIDIADevice = devVendor.find("NVIDIA Corporation") != std::string::npos || devVendor.find("NVIDIA") != std::string::npos;

std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
if((isAMDDevice && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
{
GpuContext ctx;
std::vector<char> devNameVec(1024);
size_t maxMem;
if( devVendor.find("NVIDIA Corporation") != std::string::npos)
ctx.isNVIDIA = true;

if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS)
{
Expand All @@ -699,6 +706,10 @@ std::vector<GpuContext> getAMDDevices(int index)
continue;
}

// the allocation for NVIDIA OpenCL is not limited to 1/4 of the GPU memory per allocation
if(ctx.isNVIDIA)
maxMem = ctx.freeMem;

if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
Expand Down Expand Up @@ -747,13 +758,15 @@ int getAMDPlatformIdx()

clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
std::string platformName(platformNameVec.data());
if( platformName.find("Advanced Micro Devices") != std::string::npos ||

bool isAMDOpenCL = platformName.find("Advanced Micro Devices") != std::string::npos ||
platformName.find("Apple") != std::string::npos ||
platformName.find("Mesa") != std::string::npos
)
platformName.find("Mesa") != std::string::npos;
bool isNVIDIADevice = platformName.find("NVIDIA Corporation") != std::string::npos || platformName.find("NVIDIA") != std::string::npos;
std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
if((isAMDOpenCL && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
{

printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str());
printer::inst()->print_msg(L0,"Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i , platformName.c_str());
if(platformName.find("Mesa") != std::string::npos)
mesaPlatform = i;
else
Expand Down Expand Up @@ -819,7 +832,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
std::vector<char> platformNameVec(infoSize);
clGetPlatformInfo(PlatformIDList[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL);
std::string platformName(platformNameVec.data());
if( platformName.find("Advanced Micro Devices") == std::string::npos)
if(xmrstak::params::inst().openCLVendor == "AMD" && platformName.find("Advanced Micro Devices") == std::string::npos)
{
printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
}
Expand Down Expand Up @@ -907,7 +920,8 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
{
size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize;
ctx[i].rawIntensity = reduced_intensity;
printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity));
const std::string backendName = xmrstak::params::inst().openCLVendor;
printer::inst()->print_msg(L0, "WARNING %s: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", backendName.c_str(), ctx[i].deviceIdx, int(reduced_intensity));
}

if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS)
Expand All @@ -922,7 +936,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, xmrstak_algo miner_algo)
{
// switch to the kernel storage
int kernel_storage = miner_algo == ::jconf::inst()->GetMiningAlgo() ? 0 : 1;
int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1;

cl_int ret;

Expand Down Expand Up @@ -990,7 +1004,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
return(ERR_OCL_API);
}

if(miner_algo == cryptonight_ipbc || miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon )
if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon )
{
// Input
if ((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
Expand Down Expand Up @@ -1087,7 +1101,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
{
// switch to the kernel storage
int kernel_storage = miner_algo == ::jconf::inst()->GetMiningAlgo() ? 0 : 1;
int kernel_storage = miner_algo == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() ? 0 : 1;

cl_int ret;
cl_uint zero = 0;
Expand Down
1 change: 1 addition & 0 deletions xmrstak/backend/amd/amd_gpu/gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct GpuContext
size_t workSize;
int stridedIndex;
int memChunk;
bool isNVIDIA = false;
int compMode;

/*Output vars*/
Expand Down
22 changes: 8 additions & 14 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ R"===(
* The implemented function is modified because the last is in our case always a scalar.
* We can ignore the bitwise AND operation.
*/

inline uint2 __attribute__((overloadable)) amd_bitalign( const uint2 src0, const uint2 src1, const uint src2)
inline uint2 amd_bitalign( const uint2 src0, const uint2 src1, const uint src2)
{
uint2 result;
result.s0 = (uint) (((((long)src0.s0) << 32) | (long)src1.s0) >> (src2));
Expand All @@ -60,7 +59,7 @@ inline uint2 __attribute__((overloadable)) amd_bitalign( const uint2 src0, const
* dst.s0 = src0.s0 >> offset;
* similar operation applied to other components of the vectors
*/
inline uint __attribute__((overloadable)) amd_bfe(const uint src0, const uint offset, const uint width)
inline int amd_bfe(const uint src0, const uint offset, const uint width)
{
/* casts are removed because we can implement everything as uint
* int offset = src1;
Expand Down Expand Up @@ -554,8 +553,8 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,

__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states, ulong Threads
// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc
#if(ALGO == 3 || ALGO == 5 || ALGO == 6)
// cryptonight_monero || cryptonight_aeon
#if(ALGO == 3 || ALGO == 5)
, __global ulong *input
#endif
)
Expand All @@ -575,7 +574,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
}

barrier(CLK_LOCAL_MEM_FENCE);
#if(ALGO == 3 || ALGO == 5 || ALGO == 6)
#if(ALGO == 3 || ALGO == 5)
uint2 tweak1_2;
#endif
uint4 b_x;
Expand All @@ -599,7 +598,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
b[1] = states[3] ^ states[7];

b_x = ((uint4 *)b)[0];
#if(ALGO == 3 || ALGO == 5 || ALGO == 6)
#if(ALGO == 3 || ALGO == 5)
tweak1_2 = as_uint2(input[4]);
tweak1_2.s0 >>= 24;
tweak1_2.s0 |= tweak1_2.s1 << 8;
Expand All @@ -626,7 +625,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]);

b_x ^= ((uint4 *)c)[0];
#if(ALGO == 3 || ALGO == 5 || ALGO == 6)
#if(ALGO == 3 || ALGO == 5)
uint table = 0x75310U;
uint index = ((b_x.s2 >> 26) & 12) | ((b_x.s2 >> 23) & 2);
b_x.s2 ^= ((table >> index) & 0x30U) << 24;
Expand All @@ -640,18 +639,13 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
a[0] += mul_hi(c[0], as_ulong2(tmp).s0);


#if(ALGO == 3 || ALGO == 5 || ALGO == 6)
#if(ALGO == 3 || ALGO == 5)
((uint2 *)&(a[1]))[0] ^= tweak1_2;
Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0];
((uint2 *)&(a[1]))[0] ^= tweak1_2;
#else
Scratchpad[IDX((c[0] & MASK) >> 4)] = ((uint4 *)a)[0];
#endif
#if(ALGO == 6)
long prev = *((__global long*)(Scratchpad + (IDX((c[0] & MASK) >> 4))));
long cur = *((__global long*)(Scratchpad + (IDX((c[0] & MASK) >> 4))) + 1);
*((__global long*)(Scratchpad + (IDX((c[0] & MASK) >> 4))) + 1) = prev ^ cur;
#endif

((uint4 *)a)[0] ^= tmp;
idx0 = a[0];
Expand Down
29 changes: 23 additions & 6 deletions xmrstak/backend/amd/autoAdjust.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,14 @@ class autoAdjust
constexpr size_t byteToMiB = 1024u * 1024u;

size_t hashMemSize = std::max(
cn_select_memory(::jconf::inst()->GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot())
);

std::string conf;
for(auto& ctx : devVec)
{
size_t minFreeMem = 128u * byteToMiB;
/* 1000 is a magic selected limit, the reason is that more than 2GiB memory
* sowing down the memory performance because of TLB cache misses
*/
Expand All @@ -112,12 +113,26 @@ class autoAdjust
*/
maxThreads = 2024u;
}

// NVIDIA optimizations
if(
ctx.isNVIDIA && (
ctx.name.find("P100") != std::string::npos ||
ctx.name.find("V100") != std::string::npos
)
)
{
// do not limit the number of threads
maxThreads = 40000u;
minFreeMem = 512u * byteToMiB;
}

// increase all intensity limits by two for aeon
if(::jconf::inst()->GetMiningAlgo() == cryptonight_lite)
if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_lite)
maxThreads *= 2u;

// keep 128MiB memory free (value is randomly chosen)
size_t availableMem = ctx.freeMem - (128u * byteToMiB);
size_t availableMem = ctx.freeMem - minFreeMem;
// 224byte extra memory is used per thread for meta data
size_t perThread = hashMemSize + 224u;
size_t maxIntensity = availableMem / perThread;
Expand All @@ -138,7 +153,7 @@ class autoAdjust
// set 8 threads per block (this is a good value for the most gpus)
conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" +
" \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" +
" \"affine_to_cpu\" : false, \"strided_index\" : 1, \"mem_chunk\" : 2,\n"
" \"affine_to_cpu\" : false, \"strided_index\" : " + (ctx.isNVIDIA ? "0" : "1") + ", \"mem_chunk\" : 2,\n"
" \"comp_mode\" : true\n" +
" },\n";
}
Expand All @@ -151,7 +166,9 @@ class autoAdjust
configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex));
configTpl.replace("GPUCONFIG",conf);
configTpl.write(params::inst().configFileAMD);
printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", params::inst().configFileAMD.c_str());

const std::string backendName = xmrstak::params::inst().openCLVendor;
printer::inst()->print_msg(L0, "%s: GPU (OpenCL) configuration stored in file '%s'", backendName.c_str(), params::inst().configFileAMD.c_str());
}

std::vector<GpuContext> devVec;
Expand Down
23 changes: 16 additions & 7 deletions xmrstak/backend/amd/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,17 +137,19 @@ std::vector<iBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_wor
for (i = 0; i < n; i++)
{
jconf::inst()->GetThreadConfig(i, cfg);

const std::string backendName = xmrstak::params::inst().openCLVendor;

if(cfg.cpu_aff >= 0)
{
#if defined(__APPLE__)
printer::inst()->print_msg(L1, "WARNING on macOS thread affinity is only advisory.");
#endif

printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, affinity: %d.", i, (int)cfg.cpu_aff);
printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, affinity: %d.", backendName.c_str(), i, (int)cfg.cpu_aff);
}
else
printer::inst()->print_msg(L1, "Starting AMD GPU thread %d, no affinity.", i);
printer::inst()->print_msg(L1, "Starting %s GPU (OpenCL) thread %d, no affinity.", backendName.c_str(), i);

minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i], cfg);
pvThreads->push_back(thd);
Expand Down Expand Up @@ -193,12 +195,13 @@ void minethd::work_main()
cpu_ctx = cpu::minethd::minethd_alloc_ctx();

// start with root algorithm and switch later if fork version is reached
auto miner_algo = ::jconf::inst()->GetMiningAlgoRoot();
auto miner_algo = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot();
cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);

globalStates::inst().iConsumeCnt++;

uint8_t version = 0;
size_t lastPoolId = 0;

while (bQuit == 0)
{
Expand All @@ -217,14 +220,20 @@ void minethd::work_main()
}

uint8_t new_version = oWork.getVersion();
if (::jconf::inst()->GetMiningAlgo() == cryptonight_ipbc) new_version = oWork.bWorkBlob[1];
if(new_version != version)
if(new_version != version || oWork.iPoolId != lastPoolId)
{
if(new_version >= ::jconf::inst()->GetMiningForkVersion())
coinDescription coinDesc = ::jconf::inst()->GetCurrentCoinSelection().GetDescription(oWork.iPoolId);
if(new_version >= coinDesc.GetMiningForkVersion())
{
miner_algo = coinDesc.GetMiningAlgo();
hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
}
else
{
miner_algo = ::jconf::inst()->GetMiningAlgo();
miner_algo = coinDesc.GetMiningAlgoRoot();
hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
}
lastPoolId = oWork.iPoolId;
version = new_version;
}

Expand Down
5 changes: 3 additions & 2 deletions xmrstak/backend/backendConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,12 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
#ifndef CONF_NO_OPENCL
if(params::inst().useAMD)
{
plugin amdplugin("AMD", "xmrstak_opencl_backend");
const std::string backendName = xmrstak::params::inst().openCLVendor;
plugin amdplugin(backendName, "xmrstak_opencl_backend");
std::vector<iBackend*>* amdThreads = amdplugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, environment::inst());
pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads));
if(amdThreads->size() == 0)
printer::inst()->print_msg(L0, "WARNING: backend AMD disabled.");
printer::inst()->print_msg(L0, "WARNING: backend %s (OpenCL) disabled.", backendName.c_str());
}
#endif

Expand Down
4 changes: 2 additions & 2 deletions xmrstak/backend/cpu/autoAdjust.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ class autoAdjust
{

const size_t hashMemSizeKB = std::max(
cn_select_memory(::jconf::inst()->GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot())
) / 1024u;
const size_t halfHashMemSizeKB = hashMemSizeKB / 2u;

Expand Down
4 changes: 2 additions & 2 deletions xmrstak/backend/cpu/autoAdjustHwloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ class autoAdjust
autoAdjust()
{
hashMemSize = std::max(
cn_select_memory(::jconf::inst()->GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetMiningAlgoRoot())
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()),
cn_select_memory(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgoRoot())
);
halfHashMemSize = hashMemSize / 2u;
}
Expand Down
Loading

0 comments on commit 7d09a6d

Please sign in to comment.