diff --git a/CMakeLists.txt b/CMakeLists.txt
index b385d8584..c3c50688a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-project(xmr-stak)
+project(aeon-stak)
 
 cmake_minimum_required(VERSION 3.0.1)
 
@@ -178,7 +178,7 @@ list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}")
 # Find OpenCL
 ###############################################################################
 
-option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" OFF)
+option(OpenCL_ENABLE "Enable or disable OpenCL spport (AMD GPU support)" ON)
 if(OpenCL_ENABLE)
     # try to find AMD OpenCL before NVIDIA OpenCL
     find_path(OpenCL_INCLUDE_DIR
diff --git a/README.md b/README.md
index 926e85091..cea9cd471 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,6 @@
 # XMR-Stak - Monero All-in-One Mining Software
 
 XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NVIDIA gpus.
-AEON changes based on barretts/xmr-stak
 
 ## HTML reports
 <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-hashrate.png" width="260"> <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-results.png" width="260"> <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-connection.png" width="260">
diff --git a/doc/tuning.md b/doc/tuning.md
index 806f05915..645923559 100644
--- a/doc/tuning.md
+++ b/doc/tuning.md
@@ -53,15 +53,16 @@ In the most cases a `worksize` of `16` or `8` is optimal.
 
 ### Add More GPUs
 
-To add a new GPU you need to add a new config set to `gpu_threads_conf` and increase `gpu_thread_num"` to the number of gpus (entries in `gpu_threads_conf`).
-`index` is the number of the gpu.
+To add a new GPU you need to add a new config set to `gpu_threads_conf`. `index` is the OpenCL index of the gpu.
+`platform_index`is the index of the OpenCL platform (Intel / AMD / Nvidia).
+If you are unsure of either GPU or platform index value, you can use `clinfo` tool that comes with AMD APP SDK to dump the values.
 
 ```
-"gpu_thread_num" : 2,
-
 "gpu_threads_conf" :
 [
     { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false },
     { "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false },
 ],
+
+"platform_index" : 0,
 ```
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 392903aad..69ef18b84 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -844,7 +844,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
 		}
 	}*/
 
-	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+	size_t tmpNonce = ctx->Nonce;
+	if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
 	{
 		printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
 		return ERR_OCL_API;
@@ -896,8 +897,9 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput)
 			// round up to next multiple of w_size
 			BranchNonces[i] = ((BranchNonces[i] + w_size - 1u) / w_size) * w_size;
 			// number of global threads must be a multiple of the work group size (w_size)
-			assert(BranchNonces%w_size == 0);
-			if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
+			assert(BranchNonces[i]%w_size == 0);
+			size_t tmpNonce = ctx->Nonce;
+			if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &tmpNonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
 			{
 				printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
 				return ERR_OCL_API;
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index 5ff7ea17b..c2d708d90 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -36,7 +36,7 @@ struct GpuContext
 	int computeUnits;
 	std::string name;
 
-	size_t Nonce;
+	uint32_t Nonce;
 
 };
 
diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp
index c2bf1fa19..0617aeb2f 100644
--- a/xmrstak/backend/amd/jconf.cpp
+++ b/xmrstak/backend/amd/jconf.cpp
@@ -48,7 +48,7 @@ using namespace rapidjson;
 /*
  * This enum needs to match index in oConfigValues, otherwise we will get a runtime error
  */
-enum configEnum { iGpuThreadNum, aGpuThreadsConf, iPlatformIdx };
+enum configEnum { aGpuThreadsConf, iPlatformIdx };
 
 struct configVal {
 	configEnum iName;
@@ -58,7 +58,6 @@ struct configVal {
 
 //Same order as in configEnum, as per comment above
 configVal oConfigValues[] = {
-	{ iGpuThreadNum, "gpu_thread_num", kNumberType },
 	{ aGpuThreadsConf, "gpu_threads_conf", kArrayType },
 	{ iPlatformIdx, "platform_index", kNumberType }
 };
@@ -235,14 +234,6 @@ bool jconf::parse_config(const char* sFilename)
 	}
 
 	size_t n_thd = prv->configValues[aGpuThreadsConf]->Size();
-	if(prv->configValues[iGpuThreadNum]->GetUint64() != n_thd)
-	{
-		printer::inst()->print_msg(L0,
-			"Invalid config file. Your GPU config array has %llu members, while you want to use %llu threads.",
-			int_port(n_thd), int_port(prv->configValues[iGpuThreadNum]->GetUint64()));
-		return false;
-	}
-
 	thd_cfg c;
 	for(size_t i=0; i < n_thd; i++)
 	{
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index 5ca10d3c0..9d18860f1 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -167,13 +167,10 @@ void minethd::consume_work()
 void minethd::work_main()
 {
 	uint64_t iCount = 0;
-
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
 	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
-	
 	globalStates::inst().iConsumeCnt++;
-	uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
 	
 	while (bQuit == 0)
 	{
@@ -190,17 +187,24 @@ void minethd::work_main()
 			continue;
 		}
 
-		if(oWork.bNiceHash)
-			pGpuCtx->Nonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
-		else
-			pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
+		uint32_t h_per_round = pGpuCtx->rawIntensity;
+		size_t round_ctr = 0;
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 		uint32_t target = oWork.iTarget32;
 		XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target);
 
+		if(oWork.bNiceHash)
+			pGpuCtx->Nonce = *(uint32_t*)(oWork.bWorkBlob + 39);
+
 		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
+			//Allocate a new nonce every 16 rounds
+			if((round_ctr++ & 0xF) == 0)
+			{
+				globalStates::inst().calc_start_nonce(pGpuCtx->Nonce, oWork.bNiceHash, h_per_round * 16);
+			}
+
 			cl_uint results[0x100];
 			memset(results,0,sizeof(cl_uint)*(0x100));
 
diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp
index e1d0b2586..e88d272e2 100644
--- a/xmrstak/backend/cpu/autoAdjust.hpp
+++ b/xmrstak/backend/cpu/autoAdjust.hpp
@@ -74,7 +74,7 @@ class autoAdjust
 				if(L3KB_size <= 0)
 					break;
 
-				double_mode = L3KB_size / 2048 > (int32_t)(corecnt-i);
+				double_mode = L3KB_size / 1024 > (int32_t)(corecnt-i);
 
 				conf += std::string("    { \"low_power_mode\" : ");
 				conf += std::string(double_mode ? "true" : "false");
@@ -93,10 +93,9 @@ class autoAdjust
 					aff_id++;
 
 				if(double_mode)
-
 					L3KB_size -= 2048;
-  			else
-		  		L3KB_size -= 1024;          
+				else
+					L3KB_size -= 1024;
 			}
 		}
 
diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h
index 3b6c0c56a..d47b79f87 100644
--- a/xmrstak/backend/cpu/crypto/cryptonight.h
+++ b/xmrstak/backend/cpu/crypto/cryptonight.h
@@ -8,7 +8,7 @@ extern "C" {
 #include <stddef.h>
 #include <inttypes.h>
 
-#define MEMORY  2097152/2
+#define MEMORY  2097152 / 2
 
 typedef struct {
 	uint8_t hash_state[224]; // Need only 200, explicit align
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index c85291d46..05ed0ceb9 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -197,7 +197,7 @@ bool minethd::self_test()
 		cryptonight_free_ctx(ctx0);
 		return false;
 	}
-  	if((ctx2 = minethd_alloc_ctx()) == nullptr)
+	if((ctx2 = minethd_alloc_ctx()) == nullptr)
 	{
 		cryptonight_free_ctx(ctx0);
 		cryptonight_free_ctx(ctx1);
@@ -227,20 +227,20 @@ bool minethd::self_test()
 
 	hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false);
 	hashf("This is a test", 14, out, ctx0);
-	// bResult = memcmp(out, "\x88\xe5\xe6\x84\xdb\x17\x8c\x82\x5e\x4c\xe3\x80\x9c\xcc\x1c\xda\x79\xcc\x2a\xdb\x44\x06\xbf\xf9\x3d\xeb\xea\xf2\x0a\x8b\xeb\xd9", 32) == 0;
+	// bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 
 	hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true);
 	hashf("This is a test", 14, out, ctx0);
-	// bResult &= memcmp(out, "\x88\xe5\xe6\x84\xdb\x17\x8c\x82\x5e\x4c\xe3\x80\x9c\xcc\x1c\xda\x79\xcc\x2a\xdb\x44\x06\xbf\xf9\x3d\xeb\xea\xf2\x0a\x8b\xeb\xd9", 32) == 0;
+	// bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 
 	hashdf = func_dbl_selector(::jconf::inst()->HaveHardwareAes(), false);
-	// hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
-	// bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
+	hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
+	//bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
 	//	                   "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
 
 	hashdf = func_dbl_selector(::jconf::inst()->HaveHardwareAes(), true);
-	// hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
-	// bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
+	hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1);
+	//bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59"
 	//	                   "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0;
 
 	cryptonight_free_ctx(ctx0);
@@ -253,7 +253,7 @@ bool minethd::self_test()
 		printer::inst()->print_msg(L0,
 		    "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations.");
 
-	return true;
+	return bResult;
 }
 
 std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work& pWork)
@@ -271,7 +271,7 @@ std::vector<iBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work
 	{
 		win_exit();
 	}
-
+	
 
 	//Launch the requested number of single and double threads, to distribute
 	//load evenly we need to alternate single and double threads
@@ -359,31 +359,36 @@ void minethd::work_main()
 			    either because of network latency, or a socket problem. Since we are
 			    raison d'etre of this software it us sensible to just wait until we have something*/
 
-			while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
 			consume_work();
 			continue;
 		}
 
-		if(oWork.bNiceHash)
-			result.iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
-		else
-			result.iNonce = calc_start_nonce(oWork.iResumeCnt);
+		size_t nonce_ctr = 0;
+		constexpr size_t nonce_chunk = 4096; // Needs to be a power of 2
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 		memcpy(result.sJobID, oWork.sJobID, sizeof(job_result::sJobID));
 
-		while(globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+		if(oWork.bNiceHash)
+			result.iNonce = *piNonce;
+
+		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
-			if ((iCount & 0x7) == 0) //Store stats every 16 hashes
+			if ((iCount++ & 0x7) == 0) //Store stats every 16 hashes
 			{
 				using namespace std::chrono;
 				uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
 				iHashCount.store(iCount, std::memory_order_relaxed);
 				iTimestamp.store(iStamp, std::memory_order_relaxed);
 			}
-			iCount++;
+
+			if((nonce_ctr++ & (nonce_chunk-1)) == 0)
+			{
+				globalStates::inst().calc_start_nonce(result.iNonce, oWork.bNiceHash, nonce_chunk);
+			}
 
 			*piNonce = ++result.iNonce;
 
@@ -470,24 +475,23 @@ void minethd::double_work_main()
 			either because of network latency, or a socket problem. Since we are
 			raison d'etre of this software it us sensible to just wait until we have something*/
 
-			while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+			while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 				std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
 			consume_work();
-			memcpy(bDoubleWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
-			memcpy(bDoubleWorkBlob + oWork.iWorkSize, oWork.bWorkBlob, oWork.iWorkSize);
 			piNonce1 = prep_double_work(bDoubleWorkBlob);
 			continue;
 		}
 
-		if(oWork.bNiceHash)
-			iNonce = calc_nicehash_nonce(*piNonce0, oWork.iResumeCnt);
-		else
-			iNonce = calc_start_nonce(oWork.iResumeCnt);
+		size_t nonce_ctr = 0;
+		constexpr size_t nonce_chunk = 4096; //Needs to be a power of 2
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 
-		while (globalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+		if(oWork.bNiceHash)
+			iNonce = *piNonce0;
+
+		while (globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
 			if ((iCount & 0x7) == 0) //Store stats every 16 hashes
 			{
@@ -496,8 +500,14 @@ void minethd::double_work_main()
 				iHashCount.store(iCount, std::memory_order_relaxed);
 				iTimestamp.store(iStamp, std::memory_order_relaxed);
 			}
-
 			iCount += 2;
+			
+			
+			if((nonce_ctr++ & (nonce_chunk/2 - 1)) == 0)
+			{
+				globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, nonce_chunk);
+			}
+
 
 			*piNonce0 = ++iNonce;
 			*piNonce1 = ++iNonce;
@@ -514,8 +524,6 @@ void minethd::double_work_main()
 		}
 
 		consume_work();
-		memcpy(bDoubleWorkBlob, oWork.bWorkBlob, oWork.iWorkSize);
-		memcpy(bDoubleWorkBlob + oWork.iWorkSize, oWork.bWorkBlob, oWork.iWorkSize);
 		piNonce1 = prep_double_work(bDoubleWorkBlob);
 	}
 
diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp
index 910404062..78823c53c 100644
--- a/xmrstak/backend/globalStates.cpp
+++ b/xmrstak/backend/globalStates.cpp
@@ -34,7 +34,7 @@ namespace xmrstak
 {
 
 
-void globalStates::switch_work(miner_work& pWork)
+void globalStates::switch_work(miner_work& pWork, pool_data& dat)
 {
 	// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
 	// faster than threads can consume them. This should never happen in real life.
@@ -43,6 +43,11 @@ void globalStates::switch_work(miner_work& pWork)
 	while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
 		std::this_thread::sleep_for(std::chrono::milliseconds(100));
 
+	size_t xid = dat.pool_id;
+	dat.pool_id = pool_id;
+	pool_id = xid;
+
+	dat.iSavedNonce = iGlobalNonce.exchange(dat.iSavedNonce, std::memory_order_seq_cst);
 	oGlobalWork = pWork;
 	iConsumeCnt.store(0, std::memory_order_seq_cst);
 	iGlobalJobNo++;
diff --git a/xmrstak/backend/globalStates.hpp b/xmrstak/backend/globalStates.hpp
index 73ccf743e..1c28d5cf1 100644
--- a/xmrstak/backend/globalStates.hpp
+++ b/xmrstak/backend/globalStates.hpp
@@ -2,6 +2,7 @@
 
 #include "miner_work.hpp"
 #include "xmrstak/misc/environment.hpp"
+#include "xmrstak/misc/console.hpp"
 
 #include <atomic>
 
@@ -9,9 +10,18 @@
 namespace xmrstak
 {
 
-struct globalStates
+struct pool_data
 {
+	uint32_t iSavedNonce;
+	size_t   pool_id;
+ 
+	pool_data() : iSavedNonce(0), pool_id(0)
+	{
+	}
+};
 
+struct globalStates
+{
 	static inline globalStates& inst()
 	{
 		auto& env = environment::inst();
@@ -20,19 +30,28 @@ struct globalStates
 		return *env.pglobalStates;
 	}
 
-	void switch_work(miner_work& pWork);
+	//pool_data is in-out winapi style
+	void switch_work(miner_work& pWork, pool_data& dat);
+
+	inline void calc_start_nonce(uint32_t& nonce, bool use_nicehash, uint32_t reserve_count)
+	{
+		if(use_nicehash)
+			nonce = (nonce & 0xFF000000) | iGlobalNonce.fetch_add(reserve_count);
+		else
+			nonce = iGlobalNonce.fetch_add(reserve_count);
+	}
 
 	miner_work oGlobalWork;
 	std::atomic<uint64_t> iGlobalJobNo;
 	std::atomic<uint64_t> iConsumeCnt;
+	std::atomic<uint32_t> iGlobalNonce;
 	uint64_t iThreadCount;
+	size_t pool_id;
 
-	private:
-
+private:
 	globalStates() : iThreadCount(0)
 	{
 	}
-	
 };
 
 } // namepsace xmrstak
diff --git a/xmrstak/backend/iBackend.hpp b/xmrstak/backend/iBackend.hpp
index 0be8f0a0e..ab964ceb6 100644
--- a/xmrstak/backend/iBackend.hpp
+++ b/xmrstak/backend/iBackend.hpp
@@ -9,38 +9,8 @@
 
 namespace xmrstak
 {
-	// only allowed for unsigned value \todo add static assert
-	template<typename T>
-	T reverseBits(T value)
-	{
-		/* init with value (to get LSB) */
-		T result = value;
-		/* extra shift needed at end */
-		int s = sizeof(T) * CHAR_BIT - 1;
-		for (value >>= 1; value; value >>= 1)
-		{
-			result <<= 1;
-			result |= value & 1;
-			s--;
-		}
-		/* shift when values highest bits are zero */
-		result <<= s;
-		return result;
-	}
-
 	struct iBackend
 	{
-		inline uint32_t calc_start_nonce(uint32_t resume)
-		{
-			return reverseBits<uint32_t>(static_cast<uint32_t>(iThreadNo + globalStates::inst().iThreadCount * resume));
-		}
-
-		// Limited version of the nonce calc above
-		inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume)
-		{
-			return start | ( calc_start_nonce(resume) >> 8u );
-		}
-
 		std::atomic<uint64_t> iHashCount;
 		std::atomic<uint64_t> iTimestamp;
 		uint32_t iThreadNo;
diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp
index aecbd7041..6b5720c54 100644
--- a/xmrstak/backend/miner_work.hpp
+++ b/xmrstak/backend/miner_work.hpp
@@ -15,7 +15,6 @@ namespace xmrstak
 		char        sJobID[64];
 		uint8_t     bWorkBlob[112];
 		uint32_t    iWorkSize;
-		uint32_t    iResumeCnt;
 		uint64_t    iTarget;
 		// \todo remove workaround needed for amd
 		uint32_t    iTarget32;
@@ -25,8 +24,8 @@ namespace xmrstak
 
 		miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(0) { }
 
-		miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, uint32_t iResumeCnt,
-			uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize), iResumeCnt(iResumeCnt),
+		miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize,
+			uint64_t iTarget, bool bNiceHash, size_t iPoolId) : iWorkSize(iWorkSize),
 			iTarget(iTarget), bNiceHash(bNiceHash), bStall(false), iPoolId(iPoolId)
 		{
 			assert(iWorkSize <= sizeof(bWorkBlob));
@@ -41,7 +40,6 @@ namespace xmrstak
 			assert(this != &from);
 
 			iWorkSize = from.iWorkSize;
-			iResumeCnt = from.iResumeCnt;
 			iTarget = from.iTarget;
 			iTarget32 = from.iTarget32;
 			bNiceHash = from.bNiceHash;
@@ -68,7 +66,6 @@ namespace xmrstak
 			assert(this != &from);
 
 			iWorkSize = from.iWorkSize;
-			iResumeCnt = from.iResumeCnt;
 			iTarget = from.iTarget;
 			iTarget32 = from.iTarget32;
 			bNiceHash = from.bNiceHash;
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index fcd01cd66..faca32718 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -34,6 +34,7 @@
 
 #include <assert.h>
 #include <cmath>
+#include <cstdlib>
 #include <chrono>
 #include <thread>
 #include <bitset>
@@ -88,12 +89,12 @@ bool minethd::self_test()
 	if(::jconf::inst()->HaveHardwareAes())
 	{
 		//cryptonight_hash_ctx("This is a test", 14, out, ctx0);
-		bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
+	//	bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 	}
 	else
 	{
 		//cryptonight_hash_ctx_soft("This is a test", 14, out, ctx0);
-		bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
+	//	bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0;
 	}
 	delete ctx0;
 
@@ -192,11 +193,10 @@ void minethd::consume_work()
 void minethd::work_main()
 {
 	uint64_t iCount = 0;
-	uint32_t iNonce;
 	cryptonight_ctx* cpu_ctx;
 	cpu_ctx = cpu::minethd::minethd_alloc_ctx();
 	cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/);
-	uint32_t* piNonce = (uint32_t*)(oWork.bWorkBlob + 39);
+	uint32_t iNonce;
 
 	globalStates::inst().iConsumeCnt++;
 
@@ -222,16 +222,23 @@ void minethd::work_main()
 		}
 
 		cryptonight_extra_cpu_set_data(&ctx, oWork.bWorkBlob, oWork.iWorkSize);
-		if(oWork.bNiceHash)
-			iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt);
-		else
-			iNonce = calc_start_nonce(oWork.iResumeCnt);
+
+		uint32_t h_per_round = ctx.device_blocks * ctx.device_threads;
+		size_t round_ctr = 0;
 
 		assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
 
+		if(oWork.bNiceHash)
+			iNonce = *(uint32_t*)(oWork.bWorkBlob + 39);
+
 		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
-
+			//Allocate a new nonce every 16 rounds
+			if((round_ctr++ & 0xF) == 0)
+			{
+				globalStates::inst().calc_start_nonce(iNonce, oWork.bNiceHash, h_per_round * 16);
+			}
+			
 			uint32_t foundNonce[10];
 			uint32_t foundCount;
 
@@ -257,8 +264,8 @@ void minethd::work_main()
 					executor::inst()->log_result_error("NVIDIA Invalid Result");
 			}
 
-			iCount += ctx.device_blocks * ctx.device_threads;
-			iNonce += ctx.device_blocks * ctx.device_threads;
+			iCount += h_per_round;
+			iNonce += h_per_round;
 
 			using namespace std::chrono;
 			uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp
index 07ae16969..340174c89 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp
@@ -2,7 +2,8 @@
 
 typedef struct {
 	uint32_t h[8], s[4], t[2];
-	int buflen, nullt;
+	uint32_t buflen;
+	int nullt;
 	uint8_t buf[64];
 } blake_state;
 
@@ -50,7 +51,7 @@ __constant__ uint32_t d_blake_cst[16]
 	0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
 };
 
-__device__ void cn_blake_compress(blake_state * __restrict__ S, const uint8_t * __restrict__ block)
+__device__ void cn_blake_compress(blake_state *  S, const uint8_t *  block)
 {
 	uint32_t v[16], m[16], i;
 
@@ -89,12 +90,12 @@ __device__ void cn_blake_compress(blake_state * __restrict__ S, const uint8_t *
 	for (i = 0; i < 8;  ++i) S->h[i] ^= S->s[i % 4];
 }
 
-__device__ void cn_blake_update(blake_state * __restrict__ S, const uint8_t * __restrict__ data, uint64_t datalen)
+__device__ void cn_blake_update(blake_state *  S, const uint8_t *  data, uint64_t datalen)
 {
-	int left = S->buflen >> 3;
-	int fill = 64 - left;
+	uint32_t left = S->buflen >> 3;
+	uint32_t fill = 64 - left;
 
-	if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) 
+	if (left && (((datalen >> 3) & 0x3F) >= fill)) 
 	{
 		memcpy((void *) (S->buf + left), (void *) data, fill);
 		S->t[0] += 512;
@@ -125,7 +126,7 @@ __device__ void cn_blake_update(blake_state * __restrict__ S, const uint8_t * __
 	}
 }
 
-__device__ void cn_blake_final(blake_state * __restrict__ S, uint8_t * __restrict__ digest)
+__device__ void cn_blake_final(blake_state *  S, uint8_t *  digest)
 {
 	const uint8_t padding[] = 
 	{
@@ -177,7 +178,7 @@ __device__ void cn_blake_final(blake_state * __restrict__ S, uint8_t * __restric
 	U32TO8(digest + 28, S->h[7]);
 }
 
-__device__ void cn_blake(const uint8_t * __restrict__ in, uint64_t inlen, uint8_t * __restrict__ out)
+__device__ void cn_blake(const uint8_t *  in, uint64_t inlen, uint8_t *  out)
 {
 	blake_state bs;
 	blake_state *S = (blake_state *)&bs;
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 910970308..247255bea 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -102,7 +102,7 @@ __global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int parti
 	const int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 3;
 	const int sub = ( threadIdx.x & 7 ) << 2;
 
-	const int batchsize = 0x80000 >> bfactor;
+	const int batchsize = 0x40000 >> bfactor;
 	const int start = partidx * batchsize;
 	const int end = start + batchsize;
 
@@ -156,7 +156,11 @@ __forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_
 #else
     unusedVar( ptr );
     unusedVar( sub );
-    return __shfl( val, src, 4 );
+#   if(__CUDACC_VER_MAJOR__ >= 9)
+    return __shfl_sync(0xFFFFFFFF, val, src, 4 );
+#	else
+	return __shfl( val, src, 4 );
+#	endif
 #endif
 }
 
@@ -203,7 +207,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 		#pragma unroll 2
 		for ( int x = 0; x < 2; ++x )
 		{
-			j = ( ( shuffle(sPtr,sub, a, 0) & 0x0FFFF0 ) >> 2 ) + sub;
+			j = ( ( shuffle(sPtr,sub, a, 0) & 0xFFFF0 ) >> 2 ) + sub;
 
 			const uint32_t x_0 = loadGlobal32<uint32_t>( long_state + j );
 			const uint32_t x_1 = shuffle(sPtr,sub, x_0, sub + 1);
@@ -221,8 +225,8 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti
 			//long_state[j] = d[0] ^ d[1];
 			storeGlobal32( long_state + j, d[0] ^ d[1] );
 
-			//MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & 0x0FFFF0]);
-			j = ( ( *t1 & 0x0FFFF0 ) >> 2 ) + sub;
+			//MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & 0xFFFF0]);
+			j = ( ( *t1 & 0xFFFF0 ) >> 2 ) + sub;
 
 			uint32_t yy[2];
 			*( (uint64_t*) yy ) = loadGlobal64<uint64_t>( ( (uint64_t *) long_state )+( j >> 1 ) );
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp
index 98caa9387..b3366d940 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp
@@ -18,8 +18,8 @@ struct uint3  blockDim;
 #define __shfl(a,b,c) 1
 #endif
 
-#define MEMORY         (1 << 20) // 1 MiB / 1048576 B
-#define ITER           (1 << 19) // 524288
+#define MEMORY         (1 << 20) // 2 MiB / 2097152 B
+#define ITER           (1 << 19) // 1048576
 #define AES_BLOCK_SIZE  16
 #define AES_KEY_SIZE    32
 #define INIT_SIZE_BLK   8
@@ -36,8 +36,8 @@ __forceinline__ __device__ uint64_t cuda_ROTL64(const uint64_t value, const int
 	{
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
-	}
-	else
+	} 
+	else 
 	{
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
 		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp
index 0ebc1f070..e8e327896 100644
--- a/xmrstak/cli/cli-miner.cpp
+++ b/xmrstak/cli/cli-miner.cpp
@@ -297,7 +297,7 @@ int main(int argc, char *argv[])
 	printer::inst()->print_str("'h' - hashrate\n");
 	printer::inst()->print_str("'r' - results\n");
 	printer::inst()->print_str("'c' - connection\n");
-  printer::inst()->print_str("-----------------------------Compiled by Indeed Miners-----------------------------\n");
+  printer::inst()->print_str("-----------------------------AEON port by Indeed Miners-----------------------------\n");
   printer::inst()->print_str("88 88b 88 8888b.  888888 888888 8888b.      8b    d8 88 88b 88 888888 88''Yb .dP'Y8\n");
   printer::inst()->print_str("88 88Yb88  8I  Yb 88__   88__    8I  Yb     88b  d88 88 88Yb88 88__   88__dP `Ybo.'\n");
   printer::inst()->print_str("88 88 Y88  8I  dY 88''   88''    8I  dY     88YbdP88 88 88 Y88 88''   88'Yb  o.`Y8b\n");
@@ -351,7 +351,7 @@ void do_benchmark()
 	printer::inst()->print_msg(L0, "Running a 60 second benchmark...");
 
 	uint8_t work[76] = {0};
-	xmrstak::miner_work oWork = xmrstak::miner_work("", work, sizeof(work), 0, 0, false, 0);
+	xmrstak::miner_work oWork = xmrstak::miner_work("", work, sizeof(work), 0, false, 0);
 	pvThreads = xmrstak::BackendConnector::thread_starter(oWork);
 
 	uint64_t iStartStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
@@ -359,7 +359,8 @@ void do_benchmark()
 	std::this_thread::sleep_for(std::chrono::seconds(60));
 
 	oWork = xmrstak::miner_work();
-	xmrstak::globalStates::inst().switch_work(oWork);
+	xmrstak::pool_data dat;
+	xmrstak::globalStates::inst().switch_work(oWork, dat);
 
 	double fTotalHps = 0.0;
 	for (uint32_t i = 0; i < pvThreads->size(); i++)
diff --git a/xmrstak/config.tpl b/xmrstak/config.tpl
index 3eddb30e2..60d85cdc1 100644
--- a/xmrstak/config.tpl
+++ b/xmrstak/config.tpl
@@ -12,8 +12,8 @@ R"===(
 
 /*
  * Network timeouts.
- * Because of the way this client is written it doesn't need to constantly talk (keep-alive) to the server to make
- * sure it is there. We detect a buggy / overloaded server by the call timeout. The default values will be ok for
+ * Because of the way this client is written it doesn't need to constantly talk (keep-alive) to the server to make 
+ * sure it is there. We detect a buggy / overloaded server by the call timeout. The default values will be ok for 
  * nearly all cases. If they aren't the pool has most likely overload issues. Low call timeout values are preferable -
  * long timeouts mean that we waste hashes on potentially stale jobs. Connection report will tell you how long the
  * server usually takes to process our calls.
@@ -157,20 +157,5 @@ R"===(
  */
 "prefer_ipv4" : true,
 
-/*
- * Dev donation.
- * Percentage of your hashing power that you want to donate to the developer, can be 0 if you don't want to do that.
- * Example of how it works for the default setting of 2:
- * You miner will mine into your usual pool for 98 minutes, then switch to the developer's pool for 2 minute.
- * Switching is instant, and only happens after a successful connection, so you never loose any hashes.
- *
- * If you plan on changing this setting to 0 please consider making a one off donation to our wallets:
- * fireice-uk:
- * 4581HhZkQHgZrZjKeCfCJxZff9E3xCgHGF25zABZz7oR71TnbbgiS7sK9jveE6Dx6uMs2LwszDuvQJgRZQotdpHt1fTdDhk
- * psychocrypt:
- * 43NoJVEXo21hGZ6tDG6Z3g4qimiGdJPE6GRxAmiWwm26gwr62Lqo7zRiCJFSBmbkwTGNuuES9ES5TgaVHceuYc4Y75txCTU
- *
- */
-"donation_level" : 2,
-
 )==="
+		
\ No newline at end of file
diff --git a/xmrstak/donate-level.hpp b/xmrstak/donate-level.hpp
index ce503cde3..f054f1832 100644
--- a/xmrstak/donate-level.hpp
+++ b/xmrstak/donate-level.hpp
@@ -15,4 +15,4 @@
  *
  */
 
-constexpr double fDevDonationLevel = 1.0 / 100.0;
+constexpr double fDevDonationLevel = 1.5 / 100.0;
diff --git a/xmrstak/http/webdesign.cpp b/xmrstak/http/webdesign.cpp
index 57ec351aa..c6fcea682 100644
--- a/xmrstak/http/webdesign.cpp
+++ b/xmrstak/http/webdesign.cpp
@@ -97,7 +97,7 @@ extern const char sHtmlCommonHeader [] =
 	"<link rel='stylesheet' href='style.css' /><title>%s</title></head>"
 	"<body>"
 	"<div class='all'>"
-	"<div class='header'><span style='color: rgb(255, 160, 0)'>AEON</span>-Stak Monero Miner</div>"
+	"<div class='header'><span style='color: rgb(255, 160, 0)'>XMR</span>-Stak Monero Miner</div>"
 
 	"<div class='flex-container'>"
 		"<div class='links flex-item'>"
diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp
index 5eb9f07ef..18da71224 100644
--- a/xmrstak/misc/executor.cpp
+++ b/xmrstak/misc/executor.cpp
@@ -121,7 +121,9 @@ void executor::sched_reconnect()
 		rt, int_port(iReconnectAttempts));
 
 	auto work = xmrstak::miner_work();
-	xmrstak::globalStates::inst().switch_work(work);
+	xmrstak::pool_data dat;
+
+	xmrstak::globalStates::inst().switch_work(work, dat);
 
 	push_timed_event(ex_event(EV_RECONNECT, usr_pool_id), rt);
 }
@@ -166,7 +168,8 @@ void executor::log_result_ok(uint64_t iActualDiff)
 
 jpsock* executor::pick_pool_by_id(size_t pool_id)
 {
-	assert(pool_id != invalid_pool_id);
+	if(pool_id == invalid_pool_id)
+		return nullptr;
 
 	if(pool_id == dev_pool_id)
 		return dev_pool;
@@ -233,14 +236,23 @@ void executor::on_pool_have_job(size_t pool_id, pool_job& oPoolJob)
 
 	jpsock* pool = pick_pool_by_id(pool_id);
 
-	xmrstak::miner_work oWork(oPoolJob.sJobID, oPoolJob.bWorkBlob,
-		oPoolJob.iWorkLen, oPoolJob.iResumeCnt, oPoolJob.iTarget,
-		pool_id != dev_pool_id && ::jconf::inst()->NiceHashMode(),
-		pool_id);
+	xmrstak::miner_work oWork(oPoolJob.sJobID, oPoolJob.bWorkBlob, oPoolJob.iWorkLen, oPoolJob.iTarget,
+		pool_id != dev_pool_id && ::jconf::inst()->NiceHashMode(), pool_id);
 
 	oWork.iTarget32 = oPoolJob.iTarget32;
 	
-	xmrstak::globalStates::inst().switch_work(oWork);
+	xmrstak::pool_data dat;
+	dat.iSavedNonce = oPoolJob.iSavedNonce;
+	dat.pool_id = pool_id;
+
+	xmrstak::globalStates::inst().switch_work(oWork, dat);
+
+	if(dat.pool_id != pool_id)
+	{
+		jpsock* prev_pool;
+		if((prev_pool = pick_pool_by_id(dat.pool_id)) != nullptr)
+			prev_pool->save_nonce(dat.iSavedNonce);
+	}
 
 	if(pool_id == dev_pool_id)
 		return;
@@ -251,7 +263,10 @@ void executor::on_pool_have_job(size_t pool_id, pool_job& oPoolJob)
 		printer::inst()->print_msg(L2, "Difficulty changed. Now: %llu.", int_port(iPoolDiff));
 	}
 
-	printer::inst()->print_msg(L3, "New block detected.");
+	if(dat.pool_id == pool_id)
+		printer::inst()->print_msg(L3, "New block detected.");
+	else
+		printer::inst()->print_msg(L3, "Pool switched.");
 }
 
 void executor::on_miner_result(size_t pool_id, job_result& oResult)
@@ -356,13 +371,7 @@ void executor::on_switch_pool(size_t pool_id)
 			return;
 		}
 
-		xmrstak::miner_work oWork(oPoolJob.sJobID, oPoolJob.bWorkBlob,
-			oPoolJob.iWorkLen, oPoolJob.iResumeCnt, oPoolJob.iTarget,
-			::jconf::inst()->NiceHashMode(), pool_id);
-
-		oWork.iTarget32 = oPoolJob.iTarget32;
-
-		xmrstak::globalStates::inst().switch_work(oWork);
+		on_pool_have_job(current_pool_id, oPoolJob);
 
 		if(dev_pool->is_running())
 			push_timed_event(ex_event(EV_DEV_POOL_EXIT), 5);
diff --git a/xmrstak/net/jpsock.cpp b/xmrstak/net/jpsock.cpp
index ae20e5221..773592177 100644
--- a/xmrstak/net/jpsock.cpp
+++ b/xmrstak/net/jpsock.cpp
@@ -566,6 +566,12 @@ bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bRes
 	return cmd_ret_wait(cmd_buffer, oResult);
 }
 
+void jpsock::save_nonce(uint32_t nonce)
+{
+	std::unique_lock<std::mutex>(job_mutex);
+	oCurrentJob.iSavedNonce = nonce;
+}
+
 bool jpsock::get_current_job(pool_job& job)
 {
 	std::unique_lock<std::mutex>(job_mutex);
@@ -573,7 +579,6 @@ bool jpsock::get_current_job(pool_job& job)
 	if(oCurrentJob.iWorkLen == 0)
 		return false;
 
-	oCurrentJob.iResumeCnt++;
 	job = oCurrentJob;
 	return true;
 }
diff --git a/xmrstak/net/jpsock.hpp b/xmrstak/net/jpsock.hpp
index c2194ad0d..e94eaad22 100644
--- a/xmrstak/net/jpsock.hpp
+++ b/xmrstak/net/jpsock.hpp
@@ -50,6 +50,7 @@ class jpsock
 
 	inline uint64_t get_current_diff() { return iJobDiff; }
 
+	void save_nonce(uint32_t nonce);
 	bool get_current_job(pool_job& job);
 
 	size_t pool_id;
diff --git a/xmrstak/net/msgstruct.hpp b/xmrstak/net/msgstruct.hpp
index 05d15fe24..8e8254bdc 100644
--- a/xmrstak/net/msgstruct.hpp
+++ b/xmrstak/net/msgstruct.hpp
@@ -15,11 +15,11 @@ struct pool_job
 	// \todo remove workaround needed for amd
 	uint32_t	iTarget32;
 	uint32_t	iWorkLen;
-	uint32_t	iResumeCnt;
+	uint32_t	iSavedNonce;
 
-	pool_job() : iWorkLen(0), iResumeCnt(0) {}
+	pool_job() : iWorkLen(0), iSavedNonce(0) {}
 	pool_job(const char* sJobID, uint64_t iTarget, const uint8_t* bWorkBlob, uint32_t iWorkLen) :
-		iTarget(iTarget), iWorkLen(iWorkLen), iResumeCnt(0)
+		iTarget(iTarget), iWorkLen(iWorkLen), iSavedNonce(0)
 	{
 		assert(iWorkLen <= sizeof(pool_job::bWorkBlob));
 		memcpy(this->sJobID, sJobID, sizeof(pool_job::sJobID));