Skip to content

Commit

Permalink
Newest XMR-STAK sources + AEON changes
Browse files Browse the repository at this point in the history
  • Loading branch information
IndeedMiners committed Oct 24, 2017
1 parent b6f0637 commit f1f8c06
Show file tree
Hide file tree
Showing 26 changed files with 180 additions and 172 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
project(xmr-stak)
project(aeon-stak)

cmake_minimum_required(VERSION 3.0.1)

Expand Down Expand Up @@ -178,7 +178,7 @@ list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}")
# Find OpenCL
###############################################################################

option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" OFF)
option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" ON)
if(OpenCL_ENABLE)
# try to find AMD OpenCL before NVIDIA OpenCL
find_path(OpenCL_INCLUDE_DIR
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# XMR-Stak - Monero All-in-One Mining Software

XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NVIDIA gpus.
AEON changes based on barretts/xmr-stak

## HTML reports
<img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-hashrate.png" width="260"> <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-results.png" width="260"> <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-connection.png" width="260">
Expand Down
9 changes: 5 additions & 4 deletions doc/tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,16 @@ In the most cases a `worksize` of `16` or `8` is optimal.

### Add More GPUs

To add a new GPU you need to add a new config set to `gpu_threads_conf` and increase `gpu_thread_num"` to the number of gpus (entries in `gpu_threads_conf`).
`index` is the number of the gpu.
To add a new GPU you need to add a new config set to `gpu_threads_conf`. `index` is the OpenCL index of the gpu.
`platform_index`is the index of the OpenCL platform (Intel / AMD / Nvidia).
If you are unsure of either GPU or platform index value, you can use `clinfo` tool that comes with AMD APP SDK to dump the values.

```
"gpu_thread_num" : 2,
"gpu_threads_conf" :
[
{ "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false },
{ "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false },
],
"platform_index" : 0,
```
8 changes: 5 additions & 3 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
}
}*/

if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
size_t tmpNonce = ctx->Nonce;
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
return ERR_OCL_API;
Expand Down Expand Up @@ -896,8 +897,9 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
// round up to next multiple of w_size
BranchNonces[i] = ((BranchNonces[i] + w_size - 1u) / w_size) * w_size;
// number of global threads must be a multiple of the work group size (w_size)
assert(BranchNonces%w_size == 0);
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
assert(BranchNonces[i]%w_size == 0);
size_t tmpNonce = ctx->Nonce;
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &tmpNonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
return ERR_OCL_API;
Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/amd/amd_gpu/gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct GpuContext
int computeUnits;
std::string name;

size_t Nonce;
uint32_t Nonce;

};

Expand Down
11 changes: 1 addition & 10 deletions xmrstak/backend/amd/jconf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ using namespace rapidjson;
/*
* This enum needs to match index in oConfigValues, otherwise we will get a runtime error
*/
enum configEnum { iGpuThreadNum, aGpuThreadsConf, iPlatformIdx };
enum configEnum { aGpuThreadsConf, iPlatformIdx };

struct configVal {
configEnum iName;
Expand All @@ -58,7 +58,6 @@ struct configVal {

//Same order as in configEnum, as per comment above
configVal oConfigValues[] = {
{ iGpuThreadNum, "gpu_thread_num", kNumberType },
{ aGpuThreadsConf, "gpu_threads_conf", kArrayType },
{ iPlatformIdx, "platform_index", kNumberType }
};
Expand Down Expand Up @@ -235,14 +234,6 @@ bool jconf::parse_config(const char* sFilename)
}

size_t n_thd = prv->configValues[aGpuThreadsConf]->Size();
if(prv->configValues[iGpuThreadNum]->GetUint64() != n_thd)
{
printer::inst()->print_msg(L0,
"Invalid config file. Your GPU config array has %llu members, while you want to use %llu threads.",
int_port(n_thd), int_port(prv->configValues[iGpuThreadNum]->GetUint64()));
return false;
}

thd_cfg c;
for(size_t i=0; i < n_thd; i++)
{
Expand Down
18 changes: 11 additions & 7 deletions xmrstak/backend/amd/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,10 @@ void minethd::consume_work()
void minethd::work_main()
{
uint64_t iCount = 0;

cryptonight_ctx* cpu_ctx;
cpu_ctx = cpu::minethd::minethd_alloc_ctx();
cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);

globalStates::inst().iConsumeCnt++;
uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39);

while (bQuit == 0)
{
Expand All @@ -190,17 +187,24 @@ void minethd::work_main()
continue;
}

if(oWork.bNiceHash)
pGpuCtx->Nonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
else
pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
uint32_t h_per_round = pGpuCtx->rawIntensity;
size_t round_ctr = 0;

assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
uint32_t target = oWork.iTarget32;
XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);

if(oWork.bNiceHash)
pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);

while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
{
//Allocate a new nonce every 16 rounds
if((round_ctr++ & 0xF) == 0)
{
globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
}

cl_uint results[0x100];
memset(results,0,sizeof(cl_uint)*(0x100));

Expand Down
7 changes: 3 additions & 4 deletions xmrstak/backend/cpu/autoAdjust.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class autoAdjust
if(L3KB_size <= 0)
break;

double_mode = L3KB_size / 2048 > (int32_t)(corecnt-i);
double_mode = L3KB_size / 1024 > (int32_t)(corecnt-i);

conf += std::string(" { \"low_power_mode\" : ");
conf += std::string(double_mode ? "true" : "false");
Expand All @@ -93,10 +93,9 @@ class autoAdjust
aff_id++;

if(double_mode)

L3KB_size -= 2048;
else
L3KB_size -= 1024;
else
L3KB_size -= 1024;
}
}

Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/cpu/crypto/cryptonight.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ extern "C" {
#include <stddef.h>
#include <inttypes.h>

#define MEMORY 2097152/2
#define MEMORY 2097152 / 2

typedef struct {
uint8_t hash_state[224]; // Need only 200, explicit align
Expand Down
64 changes: 36 additions & 28 deletions xmrstak/backend/cpu/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ bool minethd::self_test()
cryptonight_free_ctx(ctx0);
return false;
}
if((ctx2 = minethd_alloc_ctx()) == nullptr)
if((ctx2 = minethd_alloc_ctx()) == nullptr)
{
cryptonight_free_ctx(ctx0);
cryptonight_free_ctx(ctx1);
Expand Down Expand Up @@ -227,20 +227,20 @@ bool minethd::self_test()

hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false);
hashf("This is a test", 14, out, ctx0);
// bResult = memcmp(out, "\x88\xe5\xe6\x84\xdb\x17\x8c\x82\x5e\x4c\xe3\x80\x9c\xcc\x1c\xda\x79\xcc\x2a\xdb\x44\x06\xbf\xf9\x3d\xeb\xea\xf2\x0a\x8b\xeb\xd9", 32) == 0;
// bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;

hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true);
hashf("This is a test", 14, out, ctx0);
// bResult &= memcmp(out, "\x88\xe5\xe6\x84\xdb\x17\x8c\x82\x5e\x4c\xe3\x80\x9c\xcc\x1c\xda\x79\xcc\x2a\xdb\x44\x06\xbf\xf9\x3d\xeb\xea\xf2\x0a\x8b\xeb\xd9", 32) == 0;
// bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;

hashdf = func_dbl_selector(::jconf::inst()->HaveHardwareAes(), false);
// hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
// bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
//bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
// "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;

hashdf = func_dbl_selector(::jconf::inst()->HaveHardwareAes(), true);
// hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
// bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
//bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
// "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;

cryptonight_free_ctx(ctx0);
Expand All @@ -253,7 +253,7 @@ bool minethd::self_test()
printer::inst()->print_msg(L0,
"Cryptonight hash self-test failed. This might be caused by bad compiler optimizations.");

return true;
return bResult;
}

std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work& pWork)
Expand All @@ -271,7 +271,7 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
{
win_exit();
}


//Launch the requested number of single and double threads, to distribute
//load evenly we need to alternate single and double threads
Expand Down Expand Up @@ -359,31 +359,36 @@ void minethd::work_main()
either because of network latency, or a socket problem. Since we are
raison d'etre of this software it us sensible to just wait until we have something*/

while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
std::this_thread::sleep_for(std::chrono::milliseconds(100));

consume_work();
continue;
}

if(oWork.bNiceHash)
result.iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
else
result.iNonce = calc_start_nonce(oWork.iResumeCnt);
size_t nonce_ctr = 0;
constexpr size_t nonce_chunk = 4096; // Needs to be a power of 2

assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
memcpy(result.sJobID, oWork.sJobID, sizeof(job_result::sJobID));

while(globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
if(oWork.bNiceHash)
result.iNonce = *piNonce;

while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
{
if ((iCount & 0x7) == 0) //Store stats every 16 hashes
if ((iCount++ & 0x7) == 0) //Store stats every 16 hashes
{
using namespace std::chrono;
uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
iHashCount.store(iCount, std::memory_order_relaxed);
iTimestamp.store(iStamp, std::memory_order_relaxed);
}
iCount++;

if((nonce_ctr++ & (nonce_chunk-1)) == 0)
{
globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk);
}

*piNonce = ++result.iNonce;

Expand Down Expand Up @@ -470,24 +475,23 @@ void minethd::double_work_main()
either because of network latency, or a socket problem. Since we are
raison d'etre of this software it us sensible to just wait until we have something*/

while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
std::this_thread::sleep_for(std::chrono::milliseconds(100));

consume_work();
memcpy(bDoubleWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
memcpy(bDoubleWorkBlob + oWork.iWorkSize, oWork.bWorkBlob, oWork.iWorkSize);
piNonce1 = prep_double_work(bDoubleWorkBlob);
continue;
}

if(oWork.bNiceHash)
iNonce = calc_nicehash_nonce(*piNonce0, oWork.iResumeCnt);
else
iNonce = calc_start_nonce(oWork.iResumeCnt);
size_t nonce_ctr = 0;
constexpr size_t nonce_chunk = 4096; //Needs to be a power of 2

assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));

while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
if(oWork.bNiceHash)
iNonce = *piNonce0;

while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
{
if ((iCount & 0x7) == 0) //Store stats every 16 hashes
{
Expand All @@ -496,8 +500,14 @@ void minethd::double_work_main()
iHashCount.store(iCount, std::memory_order_relaxed);
iTimestamp.store(iStamp, std::memory_order_relaxed);
}

iCount += 2;


if((nonce_ctr++ & (nonce_chunk/2 - 1)) == 0)
{
globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, nonce_chunk);
}


*piNonce0 = ++iNonce;
*piNonce1 = ++iNonce;
Expand All @@ -514,8 +524,6 @@ void minethd::double_work_main()
}

consume_work();
memcpy(bDoubleWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
memcpy(bDoubleWorkBlob + oWork.iWorkSize, oWork.bWorkBlob, oWork.iWorkSize);
piNonce1 = prep_double_work(bDoubleWorkBlob);
}

Expand Down
7 changes: 6 additions & 1 deletion xmrstak/backend/globalStates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace xmrstak
{


void globalStates::switch_work(miner_work& pWork)
void globalStates::switch_work(miner_work& pWork, pool_data& dat)
{
// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
// faster than threads can consume them. This should never happen in real life.
Expand All @@ -43,6 +43,11 @@ void globalStates::switch_work(miner_work& pWork)
while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
std::this_thread::sleep_for(std::chrono::milliseconds(100));

size_t xid = dat.pool_id;
dat.pool_id = pool_id;
pool_id = xid;

dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst);
oGlobalWork = pWork;
iConsumeCnt.store(0, std::memory_order_seq_cst);
iGlobalJobNo++;
Expand Down
Loading

0 comments on commit f1f8c06

Please sign in to comment.