From aaaf38722210048e7a5e80d97872661d48de13c9 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 24 Apr 2014 13:05:02 +0200 Subject: [PATCH 01/63] handle mining.set_extranonce method --- util.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/util.c b/util.c index 97ad9a79d..36866e5c2 100644 --- a/util.c +++ b/util.c @@ -1664,6 +1664,48 @@ static bool parse_diff(struct pool *pool, json_t *val) return true; } +static bool parse_extranonce(struct pool *pool, json_t *val) +{ + char s[RBUFSIZE], *nonce1; + int n2size; + int id = json_integer_value(json_object_get(val, "id")); + json_t *params = json_object_get(val, "params"); + + if (!id) + return false; + + nonce1 = json_array_string(params, 0); + if (!nonce1) { +// applog(LOG_INFO, "Failed to get nonce1 in "); + return false; + } + n2size = json_integer_value(json_array_get(params, 1)); + if (!n2size) { +// applog(LOG_INFO, "Failed to get n2size in "); + free(nonce1); + return false; + } + + cg_wlock(&pool->data_lock); + pool->nonce1 = nonce1; + pool->n1_len = strlen(nonce1) / 2; + free(pool->nonce1bin); + pool->nonce1bin = (unsigned char *)calloc(pool->n1_len, 1); + if (unlikely(!pool->nonce1bin)) + quithere(1, "Failed to calloc pool->nonce1bin"); + hex2bin(pool->nonce1bin, pool->nonce1, pool->n1_len); + pool->n2size = n2size; + cg_wunlock(&pool->data_lock); + + sprintf(s, "{\"id\": %d, \"result\": \"true\", \"error\": null}", id); + if (!stratum_send(pool, s, strlen(s))) + return false; + + applog(LOG_NOTICE, "%s coin change requested", get_pool_name(pool)); + + return true; +} + static void __suspend_stratum(struct pool *pool) { clear_sockbuf(pool); @@ -1807,6 +1849,12 @@ bool parse_method(struct pool *pool, char *s) return ret; } + if (!strncasecmp(buf, "mining.set_extranonce", 21) && parse_extranonce(pool, val)) { + ret = true; + json_decref(val); + return ret; + } + if (!strncasecmp(buf, "client.reconnect", 16) && parse_reconnect(pool, params)) { ret = true; json_decref(val); From b742ced4ebc243410bf6d8a75a852cdb25212be6 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 24 Apr 2014 14:18:38 +0200 Subject: [PATCH 02/63] subscribe for mining.extranonce --- sgminer.c | 11 ++++++++++- util.c | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/sgminer.c b/sgminer.c index 980b4e39f..3bdfb0301 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5204,6 +5204,15 @@ static bool parse_stratum_response(struct pool *pool, char *s) id = json_integer_value(id_val); + if (err_val && !json_is_null(err_val)) { + char *ss; + ss = (char *)json_string_value(json_array_get(err_val, 1)); + if (strcmp(ss, "Method 'subscribe' not found for service 'mining.extranonce'") == 0) { + applog(LOG_INFO, "Cannot subscribe to mining.extranonce on %s", get_pool_name(pool)); + goto out; + } + } + mutex_lock(&sshare_lock); HASH_FIND_INT(stratum_shares, &id, sshare); if (sshare) { @@ -5673,7 +5682,7 @@ static bool pool_active(struct pool *pool, bool pinging) bool init = pool_tset(pool, &pool->stratum_init); if (!init) { - bool ret = initiate_stratum(pool) && auth_stratum(pool); + bool ret = initiate_stratum(pool) && auth_stratum(pool) && subscribe_extranonce(pool); if (ret) init_stratum_threads(pool); diff --git a/util.c b/util.c index 36866e5c2..0d08b5851 100644 --- a/util.c +++ b/util.c @@ -1876,6 +1876,22 @@ bool parse_method(struct pool *pool, char *s) return ret; } +bool subscribe_extranonce(struct pool *pool) +{ + char s[RBUFSIZE]; + bool ret = false; + + sprintf(s, "{\"id\": %d, \"method\": \"mining.extranonce.subscribe\", \"params\": []}", + swork_id++); + + if (!stratum_send(pool, s, strlen(s))) + return ret; + + ret = true; + applog(LOG_INFO, "Stratum extranonce subscribe for %s", get_pool_name(pool)); + return ret; +} + bool auth_stratum(struct pool *pool) { json_t *val = NULL, *res_val, *err_val; @@ -2506,6 +2522,8 @@ bool restart_stratum(struct pool *pool) return false; if (!auth_stratum(pool)) return false; + if (!subscribe_extranonce(pool)) + return false; return true; } From 4ad62b9a78a1adfbff7850e2265076ad6e696199 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 24 Apr 2014 15:44:50 +0200 Subject: [PATCH 03/63] Allow disable extranonce subscribe --- miner.h | 1 + sgminer.c | 8 ++++++-- util.c | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/miner.h b/miner.h index 7db088fdf..be1be447f 100644 --- a/miner.h +++ b/miner.h @@ -1078,6 +1078,7 @@ extern int nDevs; extern int hw_errors; extern bool use_syslog; extern bool opt_quiet; +extern bool opt_extranonce_subscribe; extern struct thr_info *control_thr; extern struct thr_info **mining_thr; extern struct cgpu_info gpus[MAX_GPUDEVICES]; diff --git a/sgminer.c b/sgminer.c index 3bdfb0301..0765cba8b 100644 --- a/sgminer.c +++ b/sgminer.c @@ -151,6 +151,7 @@ bool opt_api_network; bool opt_delaynet; bool opt_disable_pool; bool opt_disable_client_reconnect = false; +bool opt_extranonce_subscribe = true; static bool no_work; bool opt_worktime; #if defined(HAVE_LIBCURL) && defined(CURL_HAS_KEEPALIVE) @@ -1327,6 +1328,9 @@ static struct opt_table opt_config_table[] = { OPT_WITHOUT_ARG("--no-submit-stale", opt_set_invbool, &opt_submit_stale, "Don't submit shares if they are detected as stale"), + OPT_WITHOUT_ARG("--no-extranonce-subscribe", + opt_set_invbool, &opt_extranonce_subscribe, + "Disable 'extranonce' stratum subscribe"), OPT_WITH_ARG("--pass|-p", set_pass, NULL, NULL, "Password for bitcoin JSON-RPC server"), @@ -5207,7 +5211,7 @@ static bool parse_stratum_response(struct pool *pool, char *s) if (err_val && !json_is_null(err_val)) { char *ss; ss = (char *)json_string_value(json_array_get(err_val, 1)); - if (strcmp(ss, "Method 'subscribe' not found for service 'mining.extranonce'") == 0) { + if (opt_extranonce_subscribe && strcmp(ss, "Method 'subscribe' not found for service 'mining.extranonce'") == 0) { applog(LOG_INFO, "Cannot subscribe to mining.extranonce on %s", get_pool_name(pool)); goto out; } @@ -5682,7 +5686,7 @@ static bool pool_active(struct pool *pool, bool pinging) bool init = pool_tset(pool, &pool->stratum_init); if (!init) { - bool ret = initiate_stratum(pool) && auth_stratum(pool) && subscribe_extranonce(pool); + bool ret = initiate_stratum(pool) && auth_stratum(pool) && (!opt_extranonce_subscribe || subscribe_extranonce(pool)); if (ret) init_stratum_threads(pool); diff --git a/util.c b/util.c index 0d08b5851..5de9669c7 100644 --- a/util.c +++ b/util.c @@ -2522,7 +2522,7 @@ bool restart_stratum(struct pool *pool) return false; if (!auth_stratum(pool)) return false; - if (!subscribe_extranonce(pool)) + if (opt_extranonce_subscribe && !subscribe_extranonce(pool)) return false; return true; From 281bcd75e92cd9824a3b62f90821c6c5ddb89083 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 24 Apr 2014 16:34:00 +0200 Subject: [PATCH 04/63] Send subscribe request before auth --- sgminer.c | 2 +- util.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/sgminer.c b/sgminer.c index 0765cba8b..4b3bf7cf4 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5686,7 +5686,7 @@ static bool pool_active(struct pool *pool, bool pinging) bool init = pool_tset(pool, &pool->stratum_init); if (!init) { - bool ret = initiate_stratum(pool) && auth_stratum(pool) && (!opt_extranonce_subscribe || subscribe_extranonce(pool)); + bool ret = initiate_stratum(pool) && (!opt_extranonce_subscribe || subscribe_extranonce(pool)) && auth_stratum(pool); if (ret) init_stratum_threads(pool); diff --git a/util.c b/util.c index 5de9669c7..c6179c1fa 100644 --- a/util.c +++ b/util.c @@ -1878,7 +1878,9 @@ bool parse_method(struct pool *pool, char *s) bool subscribe_extranonce(struct pool *pool) { - char s[RBUFSIZE]; + json_t *val = NULL, *res_val, *err_val; + char s[RBUFSIZE], *sret = NULL; + json_error_t err; bool ret = false; sprintf(s, "{\"id\": %d, \"method\": \"mining.extranonce.subscribe\", \"params\": []}", @@ -1887,8 +1889,54 @@ bool subscribe_extranonce(struct pool *pool) if (!stratum_send(pool, s, strlen(s))) return ret; + /* Parse all data in the queue and anything left should be auth */ + while (42) { + if (!socket_full(pool, DEFAULT_SOCKWAIT)) { + applog(LOG_DEBUG, "Timed out waiting for response extranonce.subscribe"); + /* some pool doesnt send anything, so this is normal */ + ret = true; + goto out; + } + + sret = recv_line(pool); + if (!sret) + return ret; + if (parse_method(pool, sret)) + free(sret); + else + break; + } + + val = JSON_LOADS(sret, &err); + free(sret); + res_val = json_object_get(val, "result"); + err_val = json_object_get(val, "error"); + + if (!res_val || json_is_false(res_val) || (err_val && !json_is_null(err_val))) { + char *ss; + + if (err_val) { + ss = (char *)json_string_value(json_array_get(err_val, 1)); + if (opt_extranonce_subscribe && strcmp(ss, "Method 'subscribe' not found for service 'mining.extranonce'") == 0) { + applog(LOG_INFO, "Cannot subscribe to mining.extranonce on %s", get_pool_name(pool)); + ret = true; + goto out; + } + ss = json_dumps(err_val, JSON_INDENT(3)); + } + else + ss = strdup("(unknown reason)"); + applog(LOG_INFO, "%s JSON stratum auth failed: %s", get_pool_name(pool), ss); + free(ss); + + goto out; + } + ret = true; applog(LOG_INFO, "Stratum extranonce subscribe for %s", get_pool_name(pool)); + +out: + json_decref(val); return ret; } @@ -2520,10 +2568,10 @@ bool restart_stratum(struct pool *pool) suspend_stratum(pool); if (!initiate_stratum(pool)) return false; - if (!auth_stratum(pool)) - return false; if (opt_extranonce_subscribe && !subscribe_extranonce(pool)) return false; + if (!auth_stratum(pool)) + return false; return true; } From e1dcbfab15db334620a8eca8f14c5cd2265a46b7 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 24 Apr 2014 20:38:37 +0200 Subject: [PATCH 05/63] Change mining.set_extranonce method as stratum notification --- util.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/util.c b/util.c index c6179c1fa..8f0bcbf54 100644 --- a/util.c +++ b/util.c @@ -1668,18 +1668,13 @@ static bool parse_extranonce(struct pool *pool, json_t *val) { char s[RBUFSIZE], *nonce1; int n2size; - int id = json_integer_value(json_object_get(val, "id")); - json_t *params = json_object_get(val, "params"); - - if (!id) - return false; - nonce1 = json_array_string(params, 0); + nonce1 = json_array_string(val, 0); if (!nonce1) { // applog(LOG_INFO, "Failed to get nonce1 in "); return false; } - n2size = json_integer_value(json_array_get(params, 1)); + n2size = json_integer_value(json_array_get(val, 1)); if (!n2size) { // applog(LOG_INFO, "Failed to get n2size in "); free(nonce1); @@ -1697,10 +1692,6 @@ static bool parse_extranonce(struct pool *pool, json_t *val) pool->n2size = n2size; cg_wunlock(&pool->data_lock); - sprintf(s, "{\"id\": %d, \"result\": \"true\", \"error\": null}", id); - if (!stratum_send(pool, s, strlen(s))) - return false; - applog(LOG_NOTICE, "%s coin change requested", get_pool_name(pool)); return true; @@ -1849,7 +1840,7 @@ bool parse_method(struct pool *pool, char *s) return ret; } - if (!strncasecmp(buf, "mining.set_extranonce", 21) && parse_extranonce(pool, val)) { + if (!strncasecmp(buf, "mining.set_extranonce", 21) && parse_extranonce(pool, params)) { ret = true; json_decref(val); return ret; From 75a86fbf1049c707e2a67b4661427551c99f44d3 Mon Sep 17 00:00:00 2001 From: Elbandi Date: Thu, 24 Apr 2014 20:57:09 +0200 Subject: [PATCH 06/63] Set extranonce.subscribe timeout to 2sec --- util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util.c b/util.c index 8f0bcbf54..ce9c122ad 100644 --- a/util.c +++ b/util.c @@ -1882,7 +1882,7 @@ bool subscribe_extranonce(struct pool *pool) /* Parse all data in the queue and anything left should be auth */ while (42) { - if (!socket_full(pool, DEFAULT_SOCKWAIT)) { + if (!socket_full(pool, DEFAULT_SOCKWAIT / 30)) { applog(LOG_DEBUG, "Timed out waiting for response extranonce.subscribe"); /* some pool doesnt send anything, so this is normal */ ret = true; From f9a2c8b929d4ed6bfed76c4a604b7e426468ba17 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sat, 26 Apr 2014 00:46:01 +0200 Subject: [PATCH 07/63] Protect pool name with lock --- api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api.c b/api.c index da6e2fc60..50b75287b 100644 --- a/api.c +++ b/api.c @@ -1818,7 +1818,9 @@ static void poolstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m lp = (char *)NO; root = api_add_int(root, "POOL", &i, false); - root = api_add_string(root, "Name", get_pool_name(pool), false); + mutex_lock(&pool->stratum_lock); + root = api_add_string(root, "Name", get_pool_name(pool), true); + mutex_unlock(&pool->stratum_lock); root = api_add_escape(root, "URL", pool->rpc_url, false); root = api_add_string(root, "Algorithm", pool->algorithm.name, false); root = api_add_string(root, "Description", pool->description, false); From 77545de0e8b0d0a6e57cf518920de5e6b818e290 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sat, 26 Apr 2014 00:47:53 +0200 Subject: [PATCH 08/63] Disconnect from pool, if auth is failed --- util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util.c b/util.c index 97ad9a79d..128a0479f 100644 --- a/util.c +++ b/util.c @@ -1867,6 +1867,8 @@ bool auth_stratum(struct pool *pool) applog(LOG_INFO, "%s JSON stratum auth failed: %s", get_pool_name(pool), ss); free(ss); + suspend_stratum(pool); + goto out; } From e7bca3194177e5ba799a963dd360effef3041474 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sat, 26 Apr 2014 01:17:14 +0200 Subject: [PATCH 09/63] Some tweaks for extranonce --- sgminer.c | 9 --------- util.c | 6 ++---- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/sgminer.c b/sgminer.c index 4b3bf7cf4..e3854ad36 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5208,15 +5208,6 @@ static bool parse_stratum_response(struct pool *pool, char *s) id = json_integer_value(id_val); - if (err_val && !json_is_null(err_val)) { - char *ss; - ss = (char *)json_string_value(json_array_get(err_val, 1)); - if (opt_extranonce_subscribe && strcmp(ss, "Method 'subscribe' not found for service 'mining.extranonce'") == 0) { - applog(LOG_INFO, "Cannot subscribe to mining.extranonce on %s", get_pool_name(pool)); - goto out; - } - } - mutex_lock(&sshare_lock); HASH_FIND_INT(stratum_shares, &id, sshare); if (sshare) { diff --git a/util.c b/util.c index 85557f13b..692b92a22 100644 --- a/util.c +++ b/util.c @@ -1671,12 +1671,10 @@ static bool parse_extranonce(struct pool *pool, json_t *val) nonce1 = json_array_string(val, 0); if (!nonce1) { -// applog(LOG_INFO, "Failed to get nonce1 in "); return false; } n2size = json_integer_value(json_array_get(val, 1)); if (!n2size) { -// applog(LOG_INFO, "Failed to get n2size in "); free(nonce1); return false; } @@ -1692,7 +1690,7 @@ static bool parse_extranonce(struct pool *pool, json_t *val) pool->n2size = n2size; cg_wunlock(&pool->data_lock); - applog(LOG_NOTICE, "%s coin change requested", get_pool_name(pool)); + applog(LOG_NOTICE, "%s extranonce change requested", get_pool_name(pool)); return true; } @@ -1880,7 +1878,7 @@ bool subscribe_extranonce(struct pool *pool) if (!stratum_send(pool, s, strlen(s))) return ret; - /* Parse all data in the queue and anything left should be auth */ + /* Parse all data in the queue and anything left should be the response */ while (42) { if (!socket_full(pool, DEFAULT_SOCKWAIT / 30)) { applog(LOG_DEBUG, "Timed out waiting for response extranonce.subscribe"); From 91be2e9e079dd85a4d6a9d5965946f4562046b4c Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 27 Apr 2014 19:22:41 +0200 Subject: [PATCH 10/63] Handle pool response, who send errors in invalid format --- util.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/util.c b/util.c index 692b92a22..a7e2432ec 100644 --- a/util.c +++ b/util.c @@ -1905,8 +1905,15 @@ bool subscribe_extranonce(struct pool *pool) char *ss; if (err_val) { - ss = (char *)json_string_value(json_array_get(err_val, 1)); - if (opt_extranonce_subscribe && strcmp(ss, "Method 'subscribe' not found for service 'mining.extranonce'") == 0) { + ss = __json_array_string(err_val, 1); + if (!ss) + ss = (char *)json_string_value(err_val); + if (ss && (strcmp(ss, "Method 'subscribe' not found for service 'mining.extranonce'") == 0)) { + applog(LOG_INFO, "Cannot subscribe to mining.extranonce on %s", get_pool_name(pool)); + ret = true; + goto out; + } + if (ss && (strcmp(ss, "Unrecognized request provided") == 0)) { applog(LOG_INFO, "Cannot subscribe to mining.extranonce on %s", get_pool_name(pool)); ret = true; goto out; From a32ec1616941b616ad809784e10852aed3f9e134 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 27 Apr 2014 19:31:05 +0200 Subject: [PATCH 11/63] Move no-extranonce-subscribe parameter to a pool config --- miner.h | 1 + sgminer.c | 18 +++++++++++++++--- util.c | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/miner.h b/miner.h index be1be447f..f7227e4a1 100644 --- a/miner.h +++ b/miner.h @@ -1175,6 +1175,7 @@ struct pool { char *name; char *description; int prio; + bool extranonce_subscribe; int accepted, rejected; int seq_rejects; int seq_getfails; diff --git a/sgminer.c b/sgminer.c index e3854ad36..d827c2859 100644 --- a/sgminer.c +++ b/sgminer.c @@ -151,7 +151,6 @@ bool opt_api_network; bool opt_delaynet; bool opt_disable_pool; bool opt_disable_client_reconnect = false; -bool opt_extranonce_subscribe = true; static bool no_work; bool opt_worktime; #if defined(HAVE_LIBCURL) && defined(CURL_HAS_KEEPALIVE) @@ -563,6 +562,7 @@ struct pool *add_pool(void) pool->rpc_proxy = NULL; pool->quota = 1; adjust_quota_gcd(); + pool->extranonce_subscribe = true; pool->description = ""; @@ -951,6 +951,16 @@ static char *set_userpass(const char *arg) return NULL; } +static char *set_no_extranonce_subscribe(char *arg) +{ + struct pool *pool = get_current_pool(); + + applog(LOG_DEBUG, "Disable extranonce subscribe on %d", pool->pool_no); + opt_set_invbool(&pool->extranonce_subscribe); + + return NULL; +} + static char *set_pool_priority(char *arg) { struct pool *pool = get_current_pool(); @@ -1329,7 +1339,7 @@ static struct opt_table opt_config_table[] = { opt_set_invbool, &opt_submit_stale, "Don't submit shares if they are detected as stale"), OPT_WITHOUT_ARG("--no-extranonce-subscribe", - opt_set_invbool, &opt_extranonce_subscribe, + set_no_extranonce_subscribe, NULL, "Disable 'extranonce' stratum subscribe"), OPT_WITH_ARG("--pass|-p", set_pass, NULL, NULL, @@ -4292,6 +4302,8 @@ void write_config(FILE *fcfg) pool->rpc_proxy ? "|" : "", json_escape(pool->rpc_url)); } + if (!pool->extranonce_subscribe) + fputs("\n\t\t\"no-extranonce-subscribe\" : true,", fcfg); fprintf(fcfg, "\n\t\t\"user\" : \"%s\",", json_escape(pool->rpc_user)); fprintf(fcfg, "\n\t\t\"pass\" : \"%s\",", json_escape(pool->rpc_pass)); /* Using get_pool_name() here is unsafe if opt_incognito is true. */ @@ -5677,7 +5689,7 @@ static bool pool_active(struct pool *pool, bool pinging) bool init = pool_tset(pool, &pool->stratum_init); if (!init) { - bool ret = initiate_stratum(pool) && (!opt_extranonce_subscribe || subscribe_extranonce(pool)) && auth_stratum(pool); + bool ret = initiate_stratum(pool) && (!pool->extranonce_subscribe || subscribe_extranonce(pool)) && auth_stratum(pool); if (ret) init_stratum_threads(pool); diff --git a/util.c b/util.c index a7e2432ec..690c589df 100644 --- a/util.c +++ b/util.c @@ -2566,7 +2566,7 @@ bool restart_stratum(struct pool *pool) suspend_stratum(pool); if (!initiate_stratum(pool)) return false; - if (opt_extranonce_subscribe && !subscribe_extranonce(pool)) + if (pool->extranonce_subscribe && !subscribe_extranonce(pool)) return false; if (!auth_stratum(pool)) return false; From c2af5fec1dc870156bcb8aca0d81b07d75c2f599 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 27 Apr 2014 20:17:30 +0200 Subject: [PATCH 12/63] Include pool.h --- api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/api.c b/api.c index 50b75287b..8a6a28c14 100644 --- a/api.c +++ b/api.c @@ -27,6 +27,7 @@ #include "compat.h" #include "miner.h" +#include "pool.h" #include "util.h" // BUFSIZ varies on Windows and Linux From c72cd9160e04bf6a953e291c158dd847503f0ac3 Mon Sep 17 00:00:00 2001 From: elbandi Date: Mon, 28 Apr 2014 18:10:19 +0200 Subject: [PATCH 13/63] export PKG_CONFIG_PATH variable --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index e7eed7412..4665da207 100644 --- a/configure.ac +++ b/configure.ac @@ -258,6 +258,7 @@ if test "x$have_sgminer_sdk" = "xtrue"; then fi PKG_CONFIG="${PKG_CONFIG:-pkg-config} --define-variable=arch=$ARCH_DIR --define-variable=target=$target --define-variable=sgminersdkdir=$SGMINER_SDK" PKG_CONFIG_PATH="$SGMINER_SDK/lib/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}" + export PKG_CONFIG_PATH fi AC_ARG_ENABLE([libcurl], From 28ff05e3271cdf456e865f58385c0c3caa2cc088 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 27 Apr 2014 17:17:49 +0200 Subject: [PATCH 14/63] Set the pool idle and switch pool, if need --- miner.h | 1 + sgminer.c | 10 ++++++++++ util.c | 4 +++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/miner.h b/miner.h index f7227e4a1..6094e7f2a 100644 --- a/miner.h +++ b/miner.h @@ -1420,6 +1420,7 @@ extern bool log_curses_only(int prio, const char *datetime, const char *str); extern void clear_logwin(void); extern void logwin_update(void); extern bool pool_tclear(struct pool *pool, bool *var); +extern void pool_failed(struct pool *pool); extern struct thread_q *tq_new(void); extern void tq_free(struct thread_q *tq); extern bool tq_push(struct thread_q *tq, void *data); diff --git a/sgminer.c b/sgminer.c index d827c2859..d187e7b98 100644 --- a/sgminer.c +++ b/sgminer.c @@ -3619,6 +3619,16 @@ struct work *copy_work_noffset(struct work *base_work, int noffset) return work; } +void pool_failed(struct pool *pool) +{ + if (!pool_tset(pool, &pool->idle)) { + cgtime(&pool->tv_idle); + if (pool == current_pool()) { + switch_pools(NULL); + } + } +} + static void pool_died(struct pool *pool) { if (!pool_tset(pool, &pool->idle)) { diff --git a/util.c b/util.c index 690c589df..8d78481cf 100644 --- a/util.c +++ b/util.c @@ -1743,8 +1743,10 @@ static bool parse_reconnect(struct pool *pool, json_t *val) free(tmp); mutex_unlock(&pool->stratum_lock); - if (!restart_stratum(pool)) + if (!restart_stratum(pool)) { + pool_failed(pool); return false; + } return true; } From c00e4bcaf6212eb7547ffccce9bd5b187ec7e7d2 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sat, 3 May 2014 20:12:39 +0200 Subject: [PATCH 15/63] Put nicehash to version string --- configure.ac | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 4665da207..2266045e7 100644 --- a/configure.ac +++ b/configure.ac @@ -3,8 +3,9 @@ m4_define([v_maj], [4]) m4_define([v_min], [1]) m4_define([v_mic], [271]) +m4_define([v_rev], [nicehash]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## -m4_define([v_ver], [v_maj.v_min.v_mic]) +m4_ifdef([v_rev], [m4_define([v_ver], [v_maj.v_min.v_mic-v_rev])], [m4_define([v_ver], [v_maj.v_min.v_mic])]) m4_define([lt_rev], m4_eval(v_maj + v_min)) m4_define([lt_cur], v_mic) m4_define([lt_age], v_min) @@ -30,6 +31,7 @@ m4_ifdef([v_rel], , [m4_define([v_rel], [])]) AC_DEFINE_UNQUOTED(SGMINER_MAJOR_VERSION, [v_maj], [Major version]) AC_DEFINE_UNQUOTED(SGMINER_MINOR_VERSION, [v_min], [Minor version]) AC_DEFINE_UNQUOTED(SGMINER_MINOR_SUBVERSION, [v_mic], [Micro version]) +AC_DEFINE_UNQUOTED(SGMINER_REVISON, [v_rev], [Revison]) version_info="lt_rev:lt_cur:lt_age" release_info="v_rel" AC_SUBST(version_info) From 08b7ab1dd4d12c5fd5420a0e22e26d57bf684fe2 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 4 May 2014 01:29:28 +0200 Subject: [PATCH 16/63] Add build_number to version string --- Makefile.am | 4 ++++ api.c | 2 +- configure.ac | 7 +++++++ miner.h | 11 +++++++++++ sgminer.c | 9 ++------- util.c | 6 +++--- 6 files changed, 28 insertions(+), 11 deletions(-) diff --git a/Makefile.am b/Makefile.am index 95b8fd41b..57bb2ad7d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -31,6 +31,10 @@ GIT_VERSION := $(shell sh -c 'git describe --abbrev=4 --dirty') sgminer_CPPFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" endif +if USE_BUILD_NUMBER +sgminer_CPPFLAGS += -DBUILD_NUMBER=\"$(BUILD_NUMBER)\" +endif + sgminer_SOURCES := sgminer.c sgminer_SOURCES += api.c sgminer_SOURCES += elist.h miner.h compat.h bench_block.h diff --git a/api.c b/api.c index 8a6a28c14..e2cb6965f 100644 --- a/api.c +++ b/api.c @@ -1555,7 +1555,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m message(io_data, MSG_VERSION, 0, NULL, isjson); io_open = io_add(io_data, isjson ? COMSTR JSON_VERSION : _VERSION COMSTR); - root = api_add_string(root, "SGMiner", VERSION, false); + root = api_add_string(root, "SGMiner", CGMINER_VERSION, false); root = api_add_const(root, "API", APIVERSION, false); root = print_data(root, buf, isjson, false); diff --git a/configure.ac b/configure.ac index 2266045e7..221e3652e 100644 --- a/configure.ac +++ b/configure.ac @@ -219,6 +219,13 @@ AC_ARG_ENABLE([git-version], [Disable use of git version in version string even if available]), [wantgitver=$enableval], [wantgitver=yes]) +AC_ARG_WITH([build_number], + [AC_HELP_STRING([--with-build-number], [Specify a build number.])], + [BUILD_NUMBER="$withval"] + ) +AC_SUBST([BUILD_NUMBER]) +AM_CONDITIONAL([USE_BUILD_NUMBER], [test x$BUILD_NUMBER != x]) + curses="auto" AC_ARG_WITH([curses], diff --git a/miner.h b/miner.h index 6094e7f2a..109714d31 100644 --- a/miner.h +++ b/miner.h @@ -3,6 +3,17 @@ #include "config.h" +#ifdef GIT_VERSION +#undef VERSION +#define VERSION GIT_VERSION +#endif + +#ifdef BUILD_NUMBER +#define CGMINER_VERSION VERSION "-" BUILD_NUMBER +#else +#define CGMINER_VERSION VERSION +#endif + #include "algorithm.h" #include diff --git a/sgminer.c b/sgminer.c index d187e7b98..50d9d4bfc 100644 --- a/sgminer.c +++ b/sgminer.c @@ -65,11 +65,6 @@ char *curly = ":D"; #include #endif -#ifdef GIT_VERSION -#undef VERSION -#define VERSION GIT_VERSION -#endif - struct strategies strategies[] = { { "Failover" }, { "Round Robin" }, @@ -2271,7 +2266,7 @@ static void curses_print_status(void) unsigned short int line = 0; wattron(statuswin, A_BOLD); - cg_mvwprintw(statuswin, line, 0, PACKAGE " " VERSION " - Started: %s", datestamp); + cg_mvwprintw(statuswin, line, 0, PACKAGE " " CGMINER_VERSION " - Started: %s", datestamp); wattroff(statuswin, A_BOLD); mvwhline(statuswin, ++line, 0, '-', 80); @@ -8007,7 +8002,7 @@ int main(int argc, char *argv[]) /* We use the getq mutex as the staged lock */ stgd_lock = &getq->mutex; - snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, VERSION); + snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, CGMINER_VERSION); #ifndef WIN32 signal(SIGPIPE, SIG_IGN); diff --git a/util.c b/util.c index 8d78481cf..a7d241070 100644 --- a/util.c +++ b/util.c @@ -1759,7 +1759,7 @@ static bool send_version(struct pool *pool, json_t *val) if (!id) return false; - sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"VERSION"\", \"error\": null}", id); + sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"CGMINER_VERSION"\", \"error\": null}", id); if (!stratum_send(pool, s, strlen(s))) return false; @@ -2443,9 +2443,9 @@ bool initiate_stratum(struct pool *pool) sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": []}", swork_id++); } else { if (pool->sessionid) - sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\", \"%s\"]}", swork_id++, pool->sessionid); + sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\", \"%s\"]}", swork_id++, pool->sessionid); else - sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\"]}", swork_id++); + sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\"]}", swork_id++); } if (__stratum_send(pool, s, strlen(s)) != SEND_OK) { From a8d683cda9052d9e04fcb8fa062358afa8994f56 Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 7 May 2014 00:02:10 +0200 Subject: [PATCH 17/63] Mark pool as idle if stratum restart is failed --- sgminer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sgminer.c b/sgminer.c index 50d9d4bfc..e15a2e34f 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5440,6 +5440,7 @@ static void *stratum_rthread(void *userdata) if (!restart_stratum(pool)) { pool_died(pool); while (!restart_stratum(pool)) { + pool_failed(pool); if (pool->removed) goto out; cgsleep_ms(30000); @@ -5480,6 +5481,7 @@ static void *stratum_rthread(void *userdata) pool_died(pool); while (!restart_stratum(pool)) { + pool_failed(pool); if (pool->removed) goto out; cgsleep_ms(30000); From f8261b12156081d70548d208166def60836a3620 Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 6 Jun 2014 19:39:52 +0200 Subject: [PATCH 18/63] Fix nonce1 and sessionid memory leak --- util.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util.c b/util.c index a7d241070..307f7b5ee 100644 --- a/util.c +++ b/util.c @@ -1680,6 +1680,7 @@ static bool parse_extranonce(struct pool *pool, json_t *val) } cg_wlock(&pool->data_lock); + free(pool->nonce1); pool->nonce1 = nonce1; pool->n1_len = strlen(nonce1) / 2; free(pool->nonce1bin); @@ -2508,6 +2509,8 @@ bool initiate_stratum(struct pool *pool) } cg_wlock(&pool->data_lock); + free(pool->nonce1); + free(pool->sessionid); pool->sessionid = sessionid; pool->nonce1 = nonce1; pool->n1_len = strlen(nonce1) / 2; From 5b37b52e243900dc2e6c779c4696a62bfacc79d4 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 18 Jun 2015 13:36:20 +0200 Subject: [PATCH 19/63] store the next difficulty --- miner.h | 1 + util.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/miner.h b/miner.h index 109714d31..4bcce4730 100644 --- a/miner.h +++ b/miner.h @@ -1306,6 +1306,7 @@ struct pool { unsigned char *coinbase; size_t nonce2_offset; unsigned char header_bin[128]; + double next_diff; int merkle_offset; struct timeval tv_lastwork; diff --git a/util.c b/util.c index 307f7b5ee..a551a8974 100644 --- a/util.c +++ b/util.c @@ -1550,6 +1550,7 @@ static bool parse_notify(struct pool *pool, json_t *val) pool->swork.nbit = nbit; pool->swork.ntime = ntime; pool->swork.clean = clean; + pool->swork.diff = pool->next_diff; alloc_len = pool->swork.cb_len = cb1_len + pool->n1_len + pool->n2size + cb2_len; pool->nonce2_offset = cb1_len + pool->n1_len; @@ -1647,8 +1648,8 @@ static bool parse_diff(struct pool *pool, json_t *val) return false; cg_wlock(&pool->data_lock); - old_diff = pool->swork.diff; - pool->swork.diff = diff; + old_diff = pool->next_diff; + pool->next_diff = diff; cg_wunlock(&pool->data_lock); if (old_diff != diff) { @@ -2531,7 +2532,7 @@ bool initiate_stratum(struct pool *pool) if (!pool->stratum_url) pool->stratum_url = pool->sockaddr_url; pool->stratum_active = true; - pool->swork.diff = 1; + pool->next_diff = pool->swork.diff = 1; if (opt_protocol) { applog(LOG_DEBUG, "%s confirmed mining.subscribe with extranonce1 %s extran2size %d", get_pool_name(pool), pool->nonce1, pool->n2size); From 580676affbc7b5a2b0cc60356e643174bd3a3861 Mon Sep 17 00:00:00 2001 From: elbandi Date: Mon, 9 Nov 2015 17:15:15 +0100 Subject: [PATCH 20/63] Get some algos from 'master' of https://github.com/djm34/sgminer --- Makefile.am | 3 + algorithm.c | 206 ++++- algorithm.h | 6 +- algorithm/credits.c | 148 ++++ algorithm/credits.h | 10 + algorithm/lyra2.c | 22 +- algorithm/lyra2.h | 8 - algorithm/lyra2_old.c | 208 ++++++ algorithm/lyra2_old.h | 50 ++ algorithm/lyra2re.c | 25 +- algorithm/lyra2re.h | 2 + algorithm/lyra2re_old.c | 169 +++++ algorithm/lyra2re_old.h | 10 + algorithm/pluck.h | 2 + algorithm/sponge.c | 22 +- algorithm/sponge.h | 8 +- algorithm/sponge_old.c | 405 ++++++++++ algorithm/sponge_old.h | 98 +++ algorithm/sysendian.h | 140 ++++ algorithm/yescrypt-opt.c | 1364 ++++++++++++++++++++++++++++++++++ algorithm/yescrypt.c | 128 ++++ algorithm/yescrypt.h | 10 + algorithm/yescrypt_core.h | 376 ++++++++++ algorithm/yescryptcommon.c | 360 +++++++++ driver-opencl.c | 13 +- findnonce.c | 6 +- kernel/bmw256.cl | 162 ++++ kernel/credits.cl | 232 ++++++ kernel/cubehash256.cl | 132 ++++ kernel/lyra2rev2.cl | 525 +++++++++++++ kernel/lyra2v2.cl | 184 +++++ kernel/yescrypt-multi.cl | 314 ++++++++ kernel/yescrypt.cl | 253 +++++++ kernel/yescrypt_essential.cl | 760 +++++++++++++++++++ miner.h | 27 +- ocl.c | 242 +++++- ocl.h | 5 +- sgminer.c | 26 +- sph/Makefile.am | 2 +- sph/sha256_Y.c | 418 +++++++++++ sph/sha256_Y.h | 63 ++ 41 files changed, 7083 insertions(+), 61 deletions(-) create mode 100644 algorithm/credits.c create mode 100644 algorithm/credits.h create mode 100644 algorithm/lyra2_old.c create mode 100644 algorithm/lyra2_old.h create mode 100644 algorithm/lyra2re_old.c create mode 100644 algorithm/lyra2re_old.h create mode 100644 algorithm/sponge_old.c create mode 100644 algorithm/sponge_old.h create mode 100644 algorithm/sysendian.h create mode 100644 algorithm/yescrypt-opt.c create mode 100644 algorithm/yescrypt.c create mode 100644 algorithm/yescrypt.h create mode 100644 algorithm/yescrypt_core.h create mode 100644 algorithm/yescryptcommon.c create mode 100644 kernel/bmw256.cl create mode 100644 kernel/credits.cl create mode 100644 kernel/cubehash256.cl create mode 100644 kernel/lyra2rev2.cl create mode 100644 kernel/lyra2v2.cl create mode 100644 kernel/yescrypt-multi.cl create mode 100644 kernel/yescrypt.cl create mode 100644 kernel/yescrypt_essential.cl create mode 100644 sph/sha256_Y.c create mode 100644 sph/sha256_Y.h diff --git a/Makefile.am b/Makefile.am index af6915e82..5da38cc59 100644 --- a/Makefile.am +++ b/Makefile.am @@ -73,7 +73,10 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h +sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h algorithm/lyra2_old.c algorithm/lyra2_old.h algorithm/sponge_old.c algorithm/sponge_old.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h +sgminer_SOURCES += algorithm/credits.c algorithm/credits.h +sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index fc0ee4a97..6acab9244 100644 --- a/algorithm.c +++ b/algorithm.c @@ -33,7 +33,10 @@ #include "algorithm/neoscrypt.h" #include "algorithm/whirlpoolx.h" #include "algorithm/lyra2re.h" +#include "algorithm/lyra2re_old.h" #include "algorithm/pluck.h" +#include "algorithm/yescrypt.h" +#include "algorithm/credits.h" #include "compat.h" @@ -42,6 +45,7 @@ const char *algorithm_type_str[] = { "Unknown", + "Credits", "Scrypt", "NScrypt", "X11", @@ -58,7 +62,10 @@ const char *algorithm_type_str[] = { "Neoscrypt", "WhirlpoolX", "Lyra2RE", + "Lyra2REv2" "Pluck" + "Yescrypt", + "Yescrypt-multi" }; void sha256(const unsigned char *message, unsigned int len, unsigned char *digest) @@ -184,6 +191,125 @@ static cl_int queue_neoscrypt_kernel(_clState *clState, dev_blk_ctx *blk, __mayb return status; } +static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_ulong le_target; + cl_int status = 0; + + + // le_target = (*(cl_uint *)(blk->work->device_target + 24)); + le_target = (cl_ulong)le64toh(((uint64_t *)blk->work->/*device_*/target)[3]); + // le_target = (cl_uint)((uint32_t *)blk->work->target)[6]; + + + memcpy(clState->cldata, blk->work->data, 168); +// flip168(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 168, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + CL_SET_ARG(blk->work->midstate); + + return status; +} + +static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_uint le_target; + cl_int status = 0; + + +// le_target = (*(cl_uint *)(blk->work->device_target + 28)); + le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]); +// le_target = (cl_uint)((uint32_t *)blk->work->target)[7]; + + +// memcpy(clState->cldata, blk->work->data, 80); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->buffer1); + CL_SET_ARG(clState->buffer2); + CL_SET_ARG(le_target); + + return status; +} + +static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ +// cl_kernel *kernel = &clState->kernel; + cl_kernel *kernel; + unsigned int num = 0; + cl_uint le_target; + cl_int status = 0; + + + // le_target = (*(cl_uint *)(blk->work->device_target + 28)); + le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]); + memcpy(clState->cldata, blk->work->data, 80); +// flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); +//pbkdf and initial sha + kernel = &clState->kernel; + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->buffer1); + CL_SET_ARG(clState->buffer2); + CL_SET_ARG(clState->buffer3); + CL_SET_ARG(le_target); + +//inactive kernel + num = 0; + kernel = clState->extra_kernels; + CL_SET_ARG_N(0,clState->buffer1); + CL_SET_ARG_N(1,clState->buffer2); +// CL_SET_ARG_N(3, clState->buffer3); + +//mix2_2 + num = 0; + CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8); + CL_SET_ARG_N(1,clState->buffer1); + CL_SET_ARG_N(2,clState->buffer2); + //mix2_2 +//inactive kernel + num = 0; + CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1); + CL_SET_ARG_N(1, clState->buffer2); + //mix2_2 + + num = 0; + CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8); + CL_SET_ARG_N(1, clState->buffer1); + CL_SET_ARG_N(2, clState->buffer2); + + //inactive kernel + num = 0; + CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1); + CL_SET_ARG_N(1, clState->buffer2); + //mix2_2 + + +//pbkdf and finalization + num=0; + CL_NEXTKERNEL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(clState->buffer2); + CL_SET_ARG(clState->buffer3); + CL_SET_ARG(le_target); + + return status; +} + static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; @@ -716,6 +842,60 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct return status; } +static cl_int queue_lyra2REv2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_int status = 0; + cl_ulong le_target; + + // le_target = *(cl_uint *)(blk->work->device_target + 28); + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + // CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->buffer1); + CL_SET_ARG(blk->work->blk.ctx_a); + CL_SET_ARG(blk->work->blk.ctx_b); + CL_SET_ARG(blk->work->blk.ctx_c); + CL_SET_ARG(blk->work->blk.ctx_d); + CL_SET_ARG(blk->work->blk.ctx_e); + CL_SET_ARG(blk->work->blk.ctx_f); + CL_SET_ARG(blk->work->blk.ctx_g); + CL_SET_ARG(blk->work->blk.ctx_h); + CL_SET_ARG(blk->work->blk.cty_a); + CL_SET_ARG(blk->work->blk.cty_b); + CL_SET_ARG(blk->work->blk.cty_c); + + // keccak - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->buffer1); + // cubehash - search2 + num = 0; + CL_NEXTKERNEL_SET_ARG_0(clState->buffer1); + // lyra - search3 + num = 0; + CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1); + CL_SET_ARG_N(1, clState->padbuffer8); + // skein -search4 + num = 0; + CL_NEXTKERNEL_SET_ARG_0(clState->buffer1); + // cubehash - search5 + num = 0; + CL_NEXTKERNEL_SET_ARG_0(clState->buffer1); + // bmw - search6 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->buffer1); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; @@ -757,6 +937,25 @@ static algorithm_settings_t algos[] = { { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options } A_PLUCK("pluck"), #undef A_PLUCK + +#define A_CREDITS(a) \ + { a, ALGO_CRE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, credits_regenhash, queue_credits_kernel, gen_hash, NULL} + A_CREDITS("credits"), +#undef A_CREDITS + + + +#define A_YESCRYPT(a) \ + { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} + A_YESCRYPT("yescrypt"), +#undef A_YESCRYPT + +#define A_YESCRYPT_MULTI(a) \ + { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options} + A_YESCRYPT_MULTI("yescrypt-multi"), +#undef A_YESCRYPT_MULTI + + // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options } @@ -793,7 +992,10 @@ static algorithm_settings_t algos[] = { { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL }, - { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL }, + { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2reold_regenhash, queue_lyra2RE_kernel, gen_hash, NULL }, + + { "lyra2rev2", ALGO_LYRA2REv2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2re_regenhash, queue_lyra2REv2_kernel, gen_hash, append_neoscrypt_compiler_options }, + // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ @@ -877,8 +1079,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa ALGO_ALIAS("nist5", "talkcoin-mod"); ALGO_ALIAS("keccak", "maxcoin"); ALGO_ALIAS("whirlpool", "whirlcoin"); - ALGO_ALIAS("Lyra2RE", "lyra2re"); ALGO_ALIAS("lyra2", "lyra2re"); + ALGO_ALIAS("lyra2v2", "lyra2rev2"); #undef ALGO_ALIAS #undef ALGO_ALIAS_NF diff --git a/algorithm.h b/algorithm.h index b2527d17d..8b7185a44 100644 --- a/algorithm.h +++ b/algorithm.h @@ -13,6 +13,7 @@ typedef enum { ALGO_UNK, + ALGO_CRE, ALGO_SCRYPT, ALGO_NSCRYPT, ALGO_X11, @@ -29,7 +30,10 @@ typedef enum { ALGO_NEOSCRYPT, ALGO_WHIRLPOOLX, ALGO_LYRA2RE, - ALGO_PLUCK + ALGO_LYRA2REv2, + ALGO_PLUCK, + ALGO_YESCRYPT, + ALGO_YESCRYPT_MULTI, } algorithm_type_t; extern const char *algorithm_type_str[]; diff --git a/algorithm/credits.c b/algorithm/credits.c new file mode 100644 index 000000000..b69514bca --- /dev/null +++ b/algorithm/credits.c @@ -0,0 +1,148 @@ +/*- + * Copyright 2015 djm34 + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "sph/sph_sha2.h" + +static const uint32_t diff1targ = 0x0000ffff; + + + +inline void credits_hash(void *state, const void *input) +{ + sph_sha256_context sha1, sha2; + uint32_t hash[8], hash2[8]; + + sph_sha256_init(&sha1); + sph_sha256(&sha1, input, 168); + sph_sha256_close(&sha1, hash); + + + sph_sha256_init(&sha2); + sph_sha256(&sha2, hash, 32); + sph_sha256_close(&sha2, hash2); + + memcpy(state, hash2, 32); + +} +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + +/* Used externally as confirmation of correct OCL code */ +int credits_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[42], ohash[8]; + printf("coming here credits test\n"); + + be32enc_vect(data, (const uint32_t *)pdata, 42); + data[35] = htobe32(nonce); + credits_hash((unsigned char*)data,(unsigned char*)ohash); + + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + + if (tmp_hash7 > diff1targ) + return -1; + + if (tmp_hash7 > Htarg) + return 0; + + return 1; +} + +void credits_regenhash(struct work *work) +{ + uint32_t data[42]; + uint32_t *nonce = (uint32_t *)(work->data + 140); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 42); + data[35] = htobe32(*nonce); + + credits_hash((unsigned char*)ohash, (unsigned char*)data); + +} + + +bool scanhash_credits(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 140); + uint32_t data[42]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 35); + + + while (1) + { + uint32_t ostate[8]; + + *nonce = ++n; + data[35] = (n); + credits_hash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", (long unsigned int)ostate[7]); + + if (unlikely(tmp_hash7 <= Htarg)) + { + ((uint32_t *)pdata)[35] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) + { + *last_nonce = n; + break; + } + } + + return ret; +} \ No newline at end of file diff --git a/algorithm/credits.h b/algorithm/credits.h new file mode 100644 index 000000000..9d74ad20d --- /dev/null +++ b/algorithm/credits.h @@ -0,0 +1,10 @@ +#ifndef CREDITS_H +#define CREDITS_H + +#include "miner.h" + + +extern int credits_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void credits_regenhash(struct work *work); + +#endif /* CREDITS_H */ diff --git a/algorithm/lyra2.c b/algorithm/lyra2.c index 6944b22f3..42640e760 100644 --- a/algorithm/lyra2.c +++ b/algorithm/lyra2.c @@ -58,15 +58,19 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * //========== Initializing the Memory Matrix and pointers to it =============// //Tries to allocate enough space for the whole memory matrix + + const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols; + const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8; + i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); - uint64_t *wholeMatrix = (uint64_t*)malloc(i); + uint64_t *wholeMatrix = malloc(i); if (wholeMatrix == NULL) { return -1; } memset(wholeMatrix, 0, i); //Allocates pointers to each row of the matrix - uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*)); + uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*)); if (memMatrix == NULL) { return -1; } @@ -118,7 +122,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * //======================= Initializing the Sponge State ====================// //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) - uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t)); + uint64_t *state = malloc(16 * sizeof (uint64_t)); if (state == NULL) { return -1; } @@ -130,16 +134,16 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * ptrWord = wholeMatrix; for (i = 0; i < nBlocksInput; i++) { absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) - ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil) + ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil) } //Initializes M[0] and M[1] - reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here - reducedDuplexRow1(state, memMatrix[0], memMatrix[1]); + reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here + reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols); do { //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) - reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols); //updates the value of row* (deterministically picked during Setup)) @@ -172,7 +176,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * //------------------------------------------------------------------------------------------ //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] - reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols); //update prev: it now points to the last row ever computed prev = row; @@ -192,7 +196,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * absorbBlock(state, memMatrix[rowa]); //Squeezes the key - squeeze(state, (unsigned char*)K, kLen); + squeeze(state, K, kLen); //==========================================================================/ //========================= Freeing the memory =============================// diff --git a/algorithm/lyra2.h b/algorithm/lyra2.h index 13c7dbd3b..798e6af11 100644 --- a/algorithm/lyra2.h +++ b/algorithm/lyra2.h @@ -37,14 +37,6 @@ typedef unsigned char byte; #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes #endif -#ifndef N_COLS - #define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default -#endif - -#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks -#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row - - int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); #endif /* LYRA2_H_ */ diff --git a/algorithm/lyra2_old.c b/algorithm/lyra2_old.c new file mode 100644 index 000000000..3b3819878 --- /dev/null +++ b/algorithm/lyra2_old.c @@ -0,0 +1,208 @@ +/** + * Implementation of the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include "lyra2_old.h" +#include "sponge_old.h" + +/** + * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords + * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, + * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all + * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value + * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols). + * + * @param K The derived key to be output by the algorithm + * @param kLen Desired key length + * @param pwd User password + * @param pwdlen Password length + * @param salt Salt + * @param saltlen Salt length + * @param timeCost Parameter to determine the processing time (T) + * @param nRows Number or rows of the memory matrix (R) + * @param nCols Number of columns of the memory matrix (C) + * + * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) + */ +int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) { + + //============================= Basic variables ============================// + int64_t row = 2; //index of row to be processed + int64_t prev = 1; //index of prev (last row ever computed/modified) + int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering) + int64_t tau; //Time Loop iterator + int64_t step = 1; //Visitation step (used during Setup and Wandering phases) + int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) + int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 + int64_t i; //auxiliary iteration counter + //==========================================================================/ + + //========== Initializing the Memory Matrix and pointers to it =============// + //Tries to allocate enough space for the whole memory matrix + i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); + uint64_t *wholeMatrix = malloc(i); + if (wholeMatrix == NULL) { + return -1; + } + memset(wholeMatrix, 0, i); + + //Allocates pointers to each row of the matrix + uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*)); + if (memMatrix == NULL) { + return -1; + } + //Places the pointers in the correct positions + uint64_t *ptrWord = wholeMatrix; + for (i = 0; i < nRows; i++) { + memMatrix[i] = ptrWord; + ptrWord += ROW_LEN_INT64; + } + //==========================================================================/ + + //============= Getting the password + salt + basil padded with 10*1 ===============// + //OBS.:The memory matrix will temporarily hold the password: not for saving memory, + //but this ensures that the password copied locally will be overwritten as soon as possible + + //First, we clean enough blocks for the password, salt, basil and padding + uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; + byte *ptrByte = (byte*) wholeMatrix; + memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); + + //Prepends the password + memcpy(ptrByte, pwd, pwdlen); + ptrByte += pwdlen; + + //Concatenates the salt + memcpy(ptrByte, salt, saltlen); + ptrByte += saltlen; + + //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface + memcpy(ptrByte, &kLen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &pwdlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &saltlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &timeCost, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nRows, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nCols, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + + //Now comes the padding + *ptrByte = 0x80; //first byte of padding: right after the password + ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix + ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block + *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block + //==========================================================================/ + + //======================= Initializing the Sponge State ====================// + //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) + uint64_t *state = malloc(16 * sizeof (uint64_t)); + if (state == NULL) { + return -1; + } + initStateO(state); + //==========================================================================/ + + //================================ Setup Phase =============================// + //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits + ptrWord = wholeMatrix; + for (i = 0; i < nBlocksInput; i++) { + absorbBlockBlake2SafeO(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) + ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil) + } + + //Initializes M[0] and M[1] + reducedSqueezeRow0O(state, memMatrix[0]); //The locally copied password is most likely overwritten here + reducedDuplexRow1O(state, memMatrix[0], memMatrix[1]); + + do { + //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) + reducedDuplexRowSetupO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + + + //updates the value of row* (deterministically picked during Setup)) + rowa = (rowa + step) & (window - 1); + //update prev: it now points to the last row ever computed + prev = row; + //updates row: goes to the next row to be computed + row++; + + //Checks if all rows in the window where visited. + if (rowa == 0) { + step = window + gap; //changes the step: approximately doubles its value + window *= 2; //doubles the size of the re-visitation window + gap = -gap; //inverts the modifier to the step + } + + } while (row < nRows); + //==========================================================================/ + + //============================ Wandering Phase =============================// + row = 0; //Resets the visitation to the first row of the memory matrix + for (tau = 1; tau <= timeCost; tau++) { + //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1 + step = (tau % 2 == 0) ? -1 : nRows / 2 - 1; + do { + //Selects a pseudorandom index row* + //------------------------------------------------------------------------------------------ + //rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] + reducedDuplexRowO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + + //update prev: it now points to the last row ever computed + prev = row; + + //updates row: goes to the next row to be computed + //------------------------------------------------------------------------------------------ + //row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + } while (row != 0); + } + //==========================================================================/ + + //============================ Wrap-up Phase ===============================// + //Absorbs the last block of the memory matrix + absorbBlockO(state, memMatrix[rowa]); + + //Squeezes the key + squeezeO(state, K, kLen); + //==========================================================================/ + + //========================= Freeing the memory =============================// + free(memMatrix); + free(wholeMatrix); + + //Wiping out the sponge's internal state before freeing it + memset(state, 0, 16 * sizeof (uint64_t)); + free(state); + //==========================================================================/ + + return 0; +} diff --git a/algorithm/lyra2_old.h b/algorithm/lyra2_old.h new file mode 100644 index 000000000..9dbe56682 --- /dev/null +++ b/algorithm/lyra2_old.h @@ -0,0 +1,50 @@ +/** + * Header file for the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef LYRA2OLD_H_ +#define LYRA2OLD_H_ + +#include + +typedef unsigned char byte; + +//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED) +#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t) +#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes + + +#ifdef BLOCK_LEN_BITS + #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes +#else //default block lenght: 768 bits + #define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes +#endif + +#ifndef N_COLS + #define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default +#endif + +#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks +#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row + + +int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); + +#endif /* LYRA2_H_ */ diff --git a/algorithm/lyra2re.c b/algorithm/lyra2re.c index ba37094ab..9d89853fe 100644 --- a/algorithm/lyra2re.c +++ b/algorithm/lyra2re.c @@ -36,6 +36,8 @@ #include "sph/sph_groestl.h" #include "sph/sph_skein.h" #include "sph/sph_keccak.h" +#include "sph/sph_bmw.h" +#include "sph/sph_cubehash.h" #include "lyra2.h" /* @@ -55,9 +57,10 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) inline void lyra2rehash(void *state, const void *input) { sph_blake256_context ctx_blake; - sph_groestl256_context ctx_groestl; + sph_bmw256_context ctx_bmw; sph_keccak256_context ctx_keccak; sph_skein256_context ctx_skein; + sph_cubehash256_context ctx_cube; uint32_t hashA[8], hashB[8]; @@ -72,17 +75,23 @@ inline void lyra2rehash(void *state, const void *input) sph_keccak256 (&ctx_keccak,hashA, 32); sph_keccak256_close(&ctx_keccak, hashB); - LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); + sph_cubehash256_init(&ctx_cube); + sph_cubehash256(&ctx_cube, hashB, 32); + sph_cubehash256_close(&ctx_cube, hashA); + LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4); - sph_skein256_init(&ctx_skein); - sph_skein256 (&ctx_skein, hashA, 32); - sph_skein256_close(&ctx_skein, hashB); + sph_skein256_init(&ctx_skein); + sph_skein256 (&ctx_skein, hashB, 32); + sph_skein256_close(&ctx_skein, hashA); + sph_cubehash256_init(&ctx_cube); + sph_cubehash256(&ctx_cube, hashA, 32); + sph_cubehash256_close(&ctx_cube, hashB); - sph_groestl256_init(&ctx_groestl); - sph_groestl256 (&ctx_groestl, hashB, 32); - sph_groestl256_close(&ctx_groestl, hashA); + sph_bmw256_init(&ctx_bmw); + sph_bmw256 (&ctx_bmw, hashB, 32); + sph_bmw256_close(&ctx_bmw, hashA); //printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]); diff --git a/algorithm/lyra2re.h b/algorithm/lyra2re.h index 8a58e7471..8bc52ac4f 100644 --- a/algorithm/lyra2re.h +++ b/algorithm/lyra2re.h @@ -2,6 +2,8 @@ #define LYRA2RE_H #include "miner.h" +#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent +#define LYRA_SECBUF_SIZE (4) // (not used) extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); diff --git a/algorithm/lyra2re_old.c b/algorithm/lyra2re_old.c new file mode 100644 index 000000000..3aa4be9a7 --- /dev/null +++ b/algorithm/lyra2re_old.c @@ -0,0 +1,169 @@ +/*- + * Copyright 2014 James Lovejoy + * Copyright 2014 phm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "sph/sph_blake.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_keccak.h" +#include "lyra2_old.h" + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + + +inline void lyra2rehash_old(void *state, const void *input) +{ + sph_blake256_context ctx_blake; + sph_groestl256_context ctx_groestl; + sph_keccak256_context ctx_keccak; + sph_skein256_context ctx_skein; + + uint32_t hashA[8], hashB[8]; + + sph_blake256_init(&ctx_blake); + sph_blake256 (&ctx_blake, input, 80); + sph_blake256_close (&ctx_blake, hashA); + + + + + sph_keccak256_init(&ctx_keccak); + sph_keccak256 (&ctx_keccak,hashA, 32); + sph_keccak256_close(&ctx_keccak, hashB); + + LYRA2O(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); + + + sph_skein256_init(&ctx_skein); + sph_skein256 (&ctx_skein, hashA, 32); + sph_skein256_close(&ctx_skein, hashB); + + + sph_groestl256_init(&ctx_groestl); + sph_groestl256 (&ctx_groestl, hashB, 32); + sph_groestl256_close(&ctx_groestl, hashA); + +//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]); + + memcpy(state, hashA, 32); +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + lyra2rehash_old(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void lyra2reold_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + lyra2rehash_old(ohash, data); +} + +bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + lyra2rehash_old(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} + + + diff --git a/algorithm/lyra2re_old.h b/algorithm/lyra2re_old.h new file mode 100644 index 000000000..0788dfb35 --- /dev/null +++ b/algorithm/lyra2re_old.h @@ -0,0 +1,10 @@ +#ifndef LYRA2REOLD_H +#define LYRA2REOLD_H + +#include "miner.h" + +extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void lyra2reold_regenhash(struct work *work); + +#endif /* LYRA2RE_H */ diff --git a/algorithm/pluck.h b/algorithm/pluck.h index 7582554e3..619eb0137 100644 --- a/algorithm/pluck.h +++ b/algorithm/pluck.h @@ -3,6 +3,8 @@ #include "miner.h" #define PLUCK_SCRATCHBUF_SIZE (128 * 1024) +#define PLUCK_SECBUF_SIZE (64 * 1024) + extern int pluck_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); extern void pluck_regenhash(struct work *work); diff --git a/algorithm/sponge.c b/algorithm/sponge.c index e717a508d..c788952a5 100644 --- a/algorithm/sponge.c +++ b/algorithm/sponge.c @@ -158,11 +158,11 @@ void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) { * @param state The current state of the sponge * @param rowOut Row to receive the data squeezed */ -void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) { - uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] +void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) { + uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] int i; //M[row][C-1-col] = H.reduced_squeeze() - for (i = 0; i < N_COLS; i++) { + for (i = 0; i < nCols; i++) { ptrWord[0] = state[0]; ptrWord[1] = state[1]; ptrWord[2] = state[2]; @@ -193,12 +193,12 @@ void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) { * @param rowIn Row to feed the sponge * @param rowOut Row to receive the sponge's output */ -void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { +void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) { uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev - uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row int i; - for (i = 0; i < N_COLS; i++) { + for (i = 0; i < nCols; i++) { //Absorbing "M[prev][col]" state[0] ^= (ptrWordIn[0]); @@ -253,13 +253,13 @@ void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { * @param rowOut Row receiving the output * */ -void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { +void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) { uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* - uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row int i; - for (i = 0; i < N_COLS; i++) { + for (i = 0; i < nCols; i++) { //Absorbing "M[prev] [+] M[row*]" state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); @@ -327,13 +327,13 @@ void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, * @param rowOut Row receiving the output * */ -void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) { uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row int i; - for (i = 0; i < N_COLS; i++) { + for (i = 0; i < nCols; i++) { //Absorbing "M[prev] [+] M[row*]" state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); diff --git a/algorithm/sponge.h b/algorithm/sponge.h index 3fcff0d7e..198229791 100644 --- a/algorithm/sponge.h +++ b/algorithm/sponge.h @@ -78,16 +78,16 @@ void initState(uint64_t state[/*16*/]); //---- Squeezes void squeeze(uint64_t *state, unsigned char *out, unsigned int len); -void reducedSqueezeRow0(uint64_t* state, uint64_t* row); +void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols); //---- Absorbs void absorbBlock(uint64_t *state, const uint64_t *in); void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in); //---- Duplexes -void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut); -void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); -void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols); +void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols); +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols); //---- Misc void printArray(unsigned char *array, unsigned int size, char *name); diff --git a/algorithm/sponge_old.c b/algorithm/sponge_old.c new file mode 100644 index 000000000..7152687ff --- /dev/null +++ b/algorithm/sponge_old.c @@ -0,0 +1,405 @@ +/** + * A simple implementation of Blake2b's internal permutation + * in the form of a sponge. + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include "sponge_old.h" +#include "lyra2_old.h" + + + +/** + * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder + * receive Blake2b's IV as per Blake2b's specification. Note: Even though sponges + * typically have their internal state initialized with zeros, Blake2b's G function + * has a fixed point: if the internal state and message are both filled with zeros. the + * resulting permutation will always be a block filled with zeros; this happens because + * Blake2b does not use the constants originally employed in Blake2 inside its G function, + * relying on the IV for avoiding possible fixed points. + * + * @param state The 1024-bit array to be initialized + */ +void initStateO(uint64_t state[/*16*/]) { + //First 512 bis are zeros + memset(state, 0, 64); + //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV + state[8] = blake2b_IV[0]; + state[9] = blake2b_IV[1]; + state[10] = blake2b_IV[2]; + state[11] = blake2b_IV[3]; + state[12] = blake2b_IV[4]; + state[13] = blake2b_IV[5]; + state[14] = blake2b_IV[6]; + state[15] = blake2b_IV[7]; +} + +/** + * Execute Blake2b's G function, with all 12 rounds. + * + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +static void blake2bLyra(uint64_t *v) { + ROUND_LYRA(0); + ROUND_LYRA(1); + ROUND_LYRA(2); + ROUND_LYRA(3); + ROUND_LYRA(4); + ROUND_LYRA(5); + ROUND_LYRA(6); + ROUND_LYRA(7); + ROUND_LYRA(8); + ROUND_LYRA(9); + ROUND_LYRA(10); + ROUND_LYRA(11); +} + +/** + * Executes a reduced version of Blake2b's G function with only one round + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +static void reducedBlake2bLyra(uint64_t *v) { + ROUND_LYRA(0); +} + +/** + * Performs a squeeze operation, using Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param out Array that will receive the data squeezed + * @param len The number of bytes to be squeezed into the "out" array + */ +void squeezeO(uint64_t *state, byte *out, unsigned int len) { + int fullBlocks = len / BLOCK_LEN_BYTES; + byte *ptr = out; + int i; + //Squeezes full blocks + for (i = 0; i < fullBlocks; i++) { + memcpy(ptr, state, BLOCK_LEN_BYTES); + blake2bLyra(state); + ptr += BLOCK_LEN_BYTES; + } + + //Squeezes remaining bytes + memcpy(ptr, state, (len % BLOCK_LEN_BYTES)); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words + * of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_INT64 words) + */ +void absorbBlockO(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_INT64 words of "in" with the current state + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + state[8] ^= in[8]; + state[9] ^= in[9]; + state[10] ^= in[10]; + state[11] ^= in[11]; + + //Applies the transformation f to the sponge's state + blake2bLyra(state); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 + * words of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) + */ +void absorbBlockBlake2SafeO(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + + //Applies the transformation f to the sponge's state + blake2bLyra(state); +} + +/** + * Performs a reduced squeeze operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowOut Row to receive the data squeezed + */ +void reducedSqueezeRow0O(uint64_t* state, uint64_t* rowOut) { + uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] + int i; + //M[row][C-1-col] = H.reduced_squeeze() + for (i = 0; i < N_COLS; i++) { + ptrWord[0] = state[0]; + ptrWord[1] = state[1]; + ptrWord[2] = state[2]; + ptrWord[3] = state[3]; + ptrWord[4] = state[4]; + ptrWord[5] = state[5]; + ptrWord[6] = state[6]; + ptrWord[7] = state[7]; + ptrWord[8] = state[8]; + ptrWord[9] = state[9]; + ptrWord[10] = state[10]; + ptrWord[11] = state[11]; + + //Goes to next block (column) that will receive the squeezed data + ptrWord -= BLOCK_LEN_INT64; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + } +} + +/** + * Performs a reduced duplex operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowIn Row to feed the sponge + * @param rowOut Row to receive the sponge's output + */ +void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[prev][col]" + state[0] ^= (ptrWordIn[0]); + state[1] ^= (ptrWordIn[1]); + state[2] ^= (ptrWordIn[2]); + state[3] ^= (ptrWordIn[3]); + state[4] ^= (ptrWordIn[4]); + state[5] ^= (ptrWordIn[5]); + state[6] ^= (ptrWordIn[6]); + state[7] ^= (ptrWordIn[7]); + state[8] ^= (ptrWordIn[8]); + state[9] ^= (ptrWordIn[9]); + state[10] ^= (ptrWordIn[10]); + state[11] ^= (ptrWordIn[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][C-1-col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + + //Input: next column (i.e., next block in sequence) + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left and N_COLS is a system parameter. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Inputs: next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][col] = M[rowOut][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[rowOut][col] = M[rowOut][col] XOR rand + ptrWordOut[0] ^= state[0]; + ptrWordOut[1] ^= state[1]; + ptrWordOut[2] ^= state[2]; + ptrWordOut[3] ^= state[3]; + ptrWordOut[4] ^= state[4]; + ptrWordOut[5] ^= state[5]; + ptrWordOut[6] ^= state[6]; + ptrWordOut[7] ^= state[7]; + ptrWordOut[8] ^= state[8]; + ptrWordOut[9] ^= state[9]; + ptrWordOut[10] ^= state[10]; + ptrWordOut[11] ^= state[11]; + + //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Goes to next block + ptrWordOut += BLOCK_LEN_INT64; + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + } +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + Prints an array of unsigned chars + */ +void printArrayO(unsigned char *array, unsigned int size, char *name) { + int i; + printf("%s: ", name); + for (i = 0; i < size; i++) { + printf("%2x|", array[i]); + } + printf("\n"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/algorithm/sponge_old.h b/algorithm/sponge_old.h new file mode 100644 index 000000000..c23781d37 --- /dev/null +++ b/algorithm/sponge_old.h @@ -0,0 +1,98 @@ +/** + * Header file for Blake2b's internal permutation in the form of a sponge. + * This code is based on the original Blake2b's implementation provided by + * Samuel Neves (https://blake2.net/) + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SPONGEOLD_H_ +#define SPONGEOLD_H_ + +#include + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + + +/*Blake2b IV Array*/ +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/*Blake2b's rotation*/ +static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/*Blake2b's G function*/ +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + + +/*One Round of the Blake2b's compression function*/ +#define ROUND_LYRA(r) \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); + + +//---- Housekeeping +void initStateO(uint64_t state[/*16*/]); + +//---- Squeezes +void squeezeO(uint64_t *state, unsigned char *out, unsigned int len); +void reducedSqueezeRow0O(uint64_t* state, uint64_t* row); + +//---- Absorbs +void absorbBlockO(uint64_t *state, const uint64_t *in); +void absorbBlockBlake2SafeO(uint64_t *state, const uint64_t *in); + +//---- Duplexes +void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut); +void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); + +//---- Misc +void printArrayO(unsigned char *array, unsigned int size, char *name); + +//////////////////////////////////////////////////////////////////////////////////////////////// + + +#endif /* SPONGE_H_ */ diff --git a/algorithm/sysendian.h b/algorithm/sysendian.h new file mode 100644 index 000000000..31ac985fb --- /dev/null +++ b/algorithm/sysendian.h @@ -0,0 +1,140 @@ +/*- + * Copyright 2007-2009 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ +#ifndef _SYSENDIAN_H_ +#define _SYSENDIAN_H_ + +/* If we don't have be64enc, the we have isn't usable. */ +#if !HAVE_DECL_BE64ENC +#undef HAVE_SYS_ENDIAN_H +#endif + +#ifdef HAVE_SYS_ENDIAN_H + +#include + +#else + +#include + +#if !HAVE_DECL_LE32DEC +static uint32_t le32dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + + ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); +} +#endif + +#if !HAVE_DECL_BE32ENC +static void be32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[3] = x & 0xff; + p[2] = (x >> 8) & 0xff; + p[1] = (x >> 16) & 0xff; + p[0] = (x >> 24) & 0xff; +} +#endif + +#if !HAVE_DECL_BE32DEC +static uint32_t be32dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + + ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); +} +#endif + +#if !HAVE_DECL_LE32ENC +static void le32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; +} +#endif + +static uint64_t +be64dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + + return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) + + ((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) + + ((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) + + ((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56)); +} + +static void +be64enc(void *pp, uint64_t x) +{ + uint8_t * p = (uint8_t *)pp; + + p[7] = x & 0xff; + p[6] = (x >> 8) & 0xff; + p[5] = (x >> 16) & 0xff; + p[4] = (x >> 24) & 0xff; + p[3] = (x >> 32) & 0xff; + p[2] = (x >> 40) & 0xff; + p[1] = (x >> 48) & 0xff; + p[0] = (x >> 56) & 0xff; +} + + + +static uint64_t +le64dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + + return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) + + ((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) + + ((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) + + ((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56)); +} + +static void +le64enc(void *pp, uint64_t x) +{ + uint8_t * p = (uint8_t *)pp; + + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; + p[4] = (x >> 32) & 0xff; + p[5] = (x >> 40) & 0xff; + p[6] = (x >> 48) & 0xff; + p[7] = (x >> 56) & 0xff; +} +#endif /* !HAVE_SYS_ENDIAN_H */ + +#endif /* !_SYSENDIAN_H_ */ diff --git a/algorithm/yescrypt-opt.c b/algorithm/yescrypt-opt.c new file mode 100644 index 000000000..b54be469d --- /dev/null +++ b/algorithm/yescrypt-opt.c @@ -0,0 +1,1364 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2013,2014 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#ifdef __i386__ +#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Enable at least SSE2 in the C compiler and use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (portability to older CPUs or testing)." +#elif defined(__x86_64__) +#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (for testing only)." +#endif + +#include +#include +#include +#include "algorithm/yescrypt_core.h" +#include "sph/sha256_Y.h" +#include "algorithm/sysendian.h" + +// #include "sph/yescrypt-platform.c" +#define HUGEPAGE_THRESHOLD (12 * 1024 * 1024) + +#ifdef __x86_64__ +#define HUGEPAGE_SIZE (2 * 1024 * 1024) +#else +#undef HUGEPAGE_SIZE +#endif + + +static void * +alloc_region(yescrypt_region_t * region, size_t size) +{ + size_t base_size = size; + uint8_t * base, *aligned; +#ifdef MAP_ANON + int flags = +#ifdef MAP_NOCORE + MAP_NOCORE | +#endif + MAP_ANON | MAP_PRIVATE; +#if defined(MAP_HUGETLB) && defined(HUGEPAGE_SIZE) + size_t new_size = size; + const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE - 1; + if (size >= HUGEPAGE_THRESHOLD && size + hugepage_mask >= size) { + flags |= MAP_HUGETLB; + /* + * Linux's munmap() fails on MAP_HUGETLB mappings if size is not a multiple of + * huge page size, so let's round up to huge page size here. + */ + new_size = size + hugepage_mask; + new_size &= ~hugepage_mask; + } + base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (base != MAP_FAILED) { + base_size = new_size; + } + else + if (flags & MAP_HUGETLB) { + flags &= ~MAP_HUGETLB; + base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + } + +#else + base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); +#endif + if (base == MAP_FAILED) + base = NULL; + aligned = base; +#elif defined(HAVE_POSIX_MEMALIGN) + if ((errno = posix_memalign((void **)&base, 64, size)) != 0) + base = NULL; + aligned = base; +#else + base = aligned = NULL; + if (size + 63 < size) { + errno = ENOMEM; + } + else if ((base = malloc(size + 63)) != NULL) { + aligned = base + 63; + aligned -= (uintptr_t)aligned & 63; + } +#endif + region->base = base; + region->aligned = aligned; + region->base_size = base ? base_size : 0; + region->aligned_size = base ? size : 0; + return aligned; +} + +static void init_region(yescrypt_region_t * region) +{ + region->base = region->aligned = NULL; + region->base_size = region->aligned_size = 0; +} + +static int +free_region(yescrypt_region_t * region) +{ + if (region->base) { +#ifdef MAP_ANON + if (munmap(region->base, region->base_size)) + return -1; +#else + free(region->base); +#endif + } + init_region(region); + return 0; +} + +int +yescrypt_init_shared(yescrypt_shared_t * shared, +const uint8_t * param, size_t paramlen, +uint64_t N, uint32_t r, uint32_t p, +yescrypt_init_shared_flags_t flags, uint32_t mask, +uint8_t * buf, size_t buflen) +{ + yescrypt_shared1_t * shared1 = &shared->shared1; + yescrypt_shared_t dummy, half1, half2; + // yescrypt_shared_t * half2; + uint8_t salt[32]; + + if (flags & YESCRYPT_SHARED_PREALLOCATED) { + if (!shared1->aligned || !shared1->aligned_size) + return -1; + } + else { + init_region(shared1); + } + shared->mask1 = 1; + if (!param && !paramlen && !N && !r && !p && !buf && !buflen) + return 0; + + init_region(&dummy.shared1); + dummy.mask1 = 1; + if (yescrypt_kdf(&dummy, shared1, + param, paramlen, NULL, 0, N, r, p, 0, + YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, + salt, sizeof(salt))) + goto out; + + half1 = half2 = *shared; + half1.shared1.aligned_size /= 2; + half2.shared1.aligned_size = half1.shared1.aligned_size; + half2.shared1.aligned = (char*)half2.shared1.aligned + half1.shared1.aligned_size; + + N /= 2; + + if (p > 1 && yescrypt_kdf(&half1, &half2.shared1, + param, paramlen, salt, sizeof(salt), N, r, p, 0, + YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_2, + salt, sizeof(salt))) + goto out; + + if (yescrypt_kdf(&half2, &half1.shared1, + param, paramlen, salt, sizeof(salt), N, r, p, 0, + YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, + salt, sizeof(salt))) + goto out; + + if (yescrypt_kdf(&half1, &half2.shared1, + param, paramlen, salt, sizeof(salt), N, r, p, 0, + YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, + buf, buflen)) + goto out; + + shared->mask1 = mask; + + return 0; + +out: + if (!(flags & YESCRYPT_SHARED_PREALLOCATED)) + free_region(shared1); + return -1; +} + +int +yescrypt_free_shared(yescrypt_shared_t * shared) +{ + return free_region(&shared->shared1); +} + +int +yescrypt_init_local(yescrypt_local_t * local) +{ + init_region(local); + return 0; +} + +int +yescrypt_free_local(yescrypt_local_t * local) +{ + return free_region(local); +} + + +static void +blkcpy(uint64_t * dest, const uint64_t * src, size_t count) +{ + do { + *dest++ = *src++; *dest++ = *src++; + *dest++ = *src++; *dest++ = *src++; + } while (count -= 4); +}; + +static void +blkxor(uint64_t * dest, const uint64_t * src, size_t count) +{ + do { + *dest++ ^= *src++; *dest++ ^= *src++; + *dest++ ^= *src++; *dest++ ^= *src++; + } while (count -= 4); +}; + +typedef union { + uint32_t w[16]; + uint64_t d[8]; +} salsa20_blk_t; + +static void +salsa20_simd_shuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout) +{ +#define COMBINE(out, in1, in2) \ + Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); + COMBINE(0, 0, 2) + COMBINE(1, 5, 7) + COMBINE(2, 2, 4) + COMBINE(3, 7, 1) + COMBINE(4, 4, 6) + COMBINE(5, 1, 3) + COMBINE(6, 6, 0) + COMBINE(7, 3, 5) +#undef COMBINE +} + +static void +salsa20_simd_unshuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout) +{ +#define COMBINE(out, in1, in2) \ + Bout->w[out * 2] = Bin->d[in1]; \ + Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; + COMBINE(0, 0, 6) + COMBINE(1, 5, 3) + COMBINE(2, 2, 0) + COMBINE(3, 7, 5) + COMBINE(4, 4, 2) + COMBINE(5, 1, 7) + COMBINE(6, 6, 4) + COMBINE(7, 3, 1) +#undef COMBINE +} + +/** + * salsa20_8(B): + * Apply the salsa20/8 core to the provided block. + */ + +static void +salsa20_8(uint64_t B[8]) +{ + size_t i; + salsa20_blk_t X; + +#define x X.w + + salsa20_simd_unshuffle((const salsa20_blk_t *)B, &X); + + for (i = 0; i < 8; i += 2) { +#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns */ + x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); + x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); + + x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); + x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); + + x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); + x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); + + x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); + x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); + + /* Operate on rows */ + x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); + x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); + + x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); + x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); + + x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); + x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); + + x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); + x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); +#undef R + } +#undef x + + { + salsa20_blk_t Y; + salsa20_simd_shuffle(&X, &Y); + for (i = 0; i < 16; i += 4) { + ((salsa20_blk_t *)B)->w[i] += Y.w[i]; + ((salsa20_blk_t *)B)->w[i + 1] += Y.w[i + 1]; + ((salsa20_blk_t *)B)->w[i + 2] += Y.w[i + 2]; + ((salsa20_blk_t *)B)->w[i + 3] += Y.w[i + 3]; + } + } +} + +/** + * blockmix_salsa8(Bin, Bout, X, r): + * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r + * bytes in length; the output Bout must also be the same size. The + * temporary space X must be 64 bytes. + */ +static void +blockmix_salsa8(const uint64_t * Bin, uint64_t * Bout, uint64_t * X, size_t r) +{ + size_t i; + + /* 1: X <-- B_{2r - 1} */ + blkcpy(X, &Bin[(2 * r - 1) * 8], 8); + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < 2 * r; i += 2) { + /* 3: X <-- H(X \xor B_i) */ + blkxor(X, &Bin[i * 8], 8); + salsa20_8(X); + + /* 4: Y_i <-- X */ + /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ + blkcpy(&Bout[i * 4], X, 8); + + /* 3: X <-- H(X \xor B_i) */ + blkxor(X, &Bin[i * 8 + 8], 8); + salsa20_8(X); + + /* 4: Y_i <-- X */ + /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ + blkcpy(&Bout[i * 4 + r * 8], X, 8); + } + +} + +/* These are tunable */ +#define S_BITS 8 +#define S_SIMD 2 +#define S_P 4 +#define S_ROUNDS 6 + +/* Number of S-boxes. Not tunable, hard-coded in a few places. */ +#define S_N 2 + +/* Derived values. Not tunable on their own. */ +#define S_SIZE1 (1 << S_BITS) +#define S_MASK ((S_SIZE1 - 1) * S_SIMD * 8) +#define S_MASK2 (((uint64_t)S_MASK << 32) | S_MASK) +#define S_SIZE_ALL (S_N * S_SIZE1 * S_SIMD) +#define S_P_SIZE (S_P * S_SIMD) +#define S_MIN_R ((S_P * S_SIMD + 15) / 16) + +/** + * pwxform(B): + * Transform the provided block using the provided S-boxes. + */ + +static void +block_pwxform(uint64_t * B, const uint64_t * S) +{ + uint64_t(*X)[S_SIMD] = (uint64_t(*)[S_SIMD])B; + const uint8_t *S0 = (const uint8_t *)S; + const uint8_t *S1 = (const uint8_t *)(S + S_SIZE1 * S_SIMD); + size_t i, j; + + for (j = 0; j < S_P; j++) { + + uint64_t *Xj = X[j]; + uint64_t x0 = Xj[0]; + uint64_t x1 = Xj[1]; + + for (i = 0; i < S_ROUNDS; i++) { + uint64_t x = x0 & S_MASK2; + const uint64_t *p0, *p1; + + p0 = (const uint64_t *)(S0 + (uint32_t)x); + p1 = (const uint64_t *)(S1 + (x >> 32)); + + x0 = (uint64_t)(x0 >> 32) * (uint32_t)x0; + x0 += p0[0]; + x0 ^= p1[0]; + + x1 = (uint64_t)(x1 >> 32) * (uint32_t)x1; + x1 += p0[1]; + x1 ^= p1[1]; + } + Xj[0] = x0; + Xj[1] = x1; + } + + + +} + + +/** + * blockmix_pwxform(Bin, Bout, S, r): + * Compute Bout = BlockMix_pwxform{salsa20/8, S, r}(Bin). The input Bin must + * be 128r bytes in length; the output Bout must also be the same size. + * + * S lacks const qualifier to match blockmix_salsa8()'s prototype, which we + * need to refer to both functions via the same function pointers. + */ +static void +blockmix_pwxform(const uint64_t * Bin, uint64_t * Bout, uint64_t * S, size_t r) +{ + size_t r1, r2, i; + // S_P_SIZE = 8; + /* Convert 128-byte blocks to (S_P_SIZE * 64-bit) blocks */ + + r1 = r * 128 / (S_P_SIZE * 8); + /* X <-- B_{r1 - 1} */ + blkcpy(Bout, &Bin[(r1 - 1) * S_P_SIZE], S_P_SIZE); + + /* X <-- X \xor B_i */ + blkxor(Bout, Bin, S_P_SIZE); + + /* X <-- H'(X) */ + /* B'_i <-- X */ + block_pwxform(Bout, S); + + /* for i = 0 to r1 - 1 do */ + for (i = 1; i < r1; i++) { + /* X <-- X \xor B_i */ + blkcpy(&Bout[i * S_P_SIZE], &Bout[(i - 1) * S_P_SIZE],S_P_SIZE); + blkxor(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE], S_P_SIZE); + + /* X <-- H'(X) */ + /* B'_i <-- X */ + block_pwxform(&Bout[i * S_P_SIZE], S); + } + + /* Handle partial blocks */ + if (i * S_P_SIZE < r * 16) { + blkcpy(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE],r * 16 - i * S_P_SIZE); +} + + i = (r1 - 1) * S_P_SIZE / 8; + /* Convert 128-byte blocks to 64-byte blocks */ + r2 = r * 2; + + /* B'_i <-- H(B'_i) */ + salsa20_8(&Bout[i * 8]); + + + i++; +/// not used yescrypt + + for (; i < r2; i++) { + /* B'_i <-- H(B'_i \xor B'_{i-1}) */ + blkxor(&Bout[i * 8], &Bout[(i - 1) * 8], 8); + salsa20_8(&Bout[i * 8]); + } +} + + + +/** + * integerify(B, r): + * Return the result of parsing B_{2r-1} as a little-endian integer. + */ +static uint64_t +integerify(const uint64_t * B, size_t r) +{ +/* + * Our 64-bit words are in host byte order, and word 6 holds the second 32-bit + * word of B_{2r-1} due to SIMD shuffling. The 64-bit value we return is also + * in host byte order, as it should be. + */ + const uint64_t * X = &B[(2 * r - 1) * 8]; + uint32_t lo = X[0]; + uint32_t hi = X[6] >> 32; + return ((uint64_t)hi << 32) + lo; +} + +/** + * smix1(B, r, N, flags, V, NROM, shared, XY, S): + * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 256r + 64 bytes in length. The value N must be even and + * no smaller than 2. + */ +static void +smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags, + uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, + uint64_t * XY, uint64_t * S) +{ + void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8); + const uint64_t * VROM = shared->shared1.aligned; + uint32_t VROM_mask = shared->mask1; + size_t s = 16 * r; + uint64_t * X = V; + uint64_t * Y = &XY[s]; + uint64_t * Z = S ? S : &XY[2 * s]; + uint64_t n, i, j; + size_t k; + + /* 1: X <-- B */ + /* 3: V_i <-- X */ + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8]; + salsa20_blk_t *tmp = (salsa20_blk_t *)Y; + salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8]; + for (k = 0; k < 16; k++) + tmp->w[k] = le32dec(&src->w[k]); + + salsa20_simd_shuffle(tmp, dst); + } + + /* 4: X <-- H(X) */ + /* 3: V_i <-- X */ + + blockmix(X, Y, Z, r); + blkcpy(&V[s], Y, s); + X = XY; + + if (NROM && (VROM_mask & 1)) { + if ((1 & VROM_mask) == 1) { + /* j <-- Integerify(X) mod NROM */ + j = integerify(Y, r) & (NROM - 1); + + /* X <-- H(X \xor VROM_j) */ + blkxor(Y, &VROM[j * s], s); + } + + blockmix(Y, X, Z, r); + + + /* 2: for i = 0 to N - 1 do */ + for (n = 1, i = 2; i < N; i += 2) { + /* 3: V_i <-- X */ + blkcpy(&V[i * s], X, s); + + if ((i & (i - 1)) == 0) + n <<= 1; + + /* j <-- Wrap(Integerify(X), i) */ + j = integerify(X, r) & (n - 1); + j += i - n; + + /* X <-- X \xor V_j */ + blkxor(X, &V[j * s], s); + + /* 4: X <-- H(X) */ + blockmix(X, Y, Z, r); + + /* 3: V_i <-- X */ + blkcpy(&V[(i + 1) * s], Y, s); + + j = integerify(Y, r); + if (((i + 1) & VROM_mask) == 1) { + /* j <-- Integerify(X) mod NROM */ + j &= NROM - 1; + + /* X <-- H(X \xor VROM_j) */ + blkxor(Y, &VROM[j * s], s); + } else { + /* j <-- Wrap(Integerify(X), i) */ + j &= n - 1; + j += i + 1 - n; + + /* X <-- H(X \xor V_j) */ + blkxor(Y, &V[j * s], s); + } + + blockmix(Y, X, Z, r); + } + } else { + yescrypt_flags_t rw = flags & YESCRYPT_RW; + /* 4: X <-- H(X) */ + blockmix(Y, X, Z, r); + + /* 2: for i = 0 to N - 1 do */ + for (n = 1, i = 2; i < N; i += 2) { + /* 3: V_i <-- X */ + blkcpy(&V[i * s], X, s); + + if (rw) { + if ((i & (i - 1)) == 0) + n <<= 1; + + /* j <-- Wrap(Integerify(X), i) */ + j = integerify(X, r) & (n - 1); + j += i - n; + + /* X <-- X \xor V_j */ + blkxor(X, &V[j * s], s); + } + + /* 4: X <-- H(X) */ + blockmix(X, Y, Z, r); + + /* 3: V_i <-- X */ + blkcpy(&V[(i + 1) * s], Y, s); + + if (rw) { + /* j <-- Wrap(Integerify(X), i) */ + j = integerify(Y, r) & (n - 1); + j += (i + 1) - n; + + + /* X <-- X \xor V_j */ + blkxor(Y, &V[j * s], s); + } + + /* 4: X <-- H(X) */ + blockmix(Y, X, Z, r); + } + } + + /* B' <-- X */ + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8]; + salsa20_blk_t *tmp = (salsa20_blk_t *)Y; + salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8]; + for (k = 0; k < 16; k++) + le32enc(&tmp->w[k], src->w[k]); + salsa20_simd_unshuffle(tmp, dst); + } +} + + + +/** + * smix2(B, r, N, Nloop, flags, V, NROM, shared, XY, S): + * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 256r + 64 bytes in length. The value N must be a + * power of 2 greater than 1. The value Nloop must be even. + */ +static void +smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop, + yescrypt_flags_t flags, + uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, + uint64_t * XY, uint64_t * S) +{ + + void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = + (S ? blockmix_pwxform : blockmix_salsa8); + const uint64_t * VROM = shared->shared1.aligned; + uint32_t VROM_mask = shared->mask1 | 1; + size_t s = 16 * r; + yescrypt_flags_t rw = flags & YESCRYPT_RW; + uint64_t * X = XY; + uint64_t * Y = &XY[s]; + uint64_t * Z = S ? S : &XY[2 * s]; + uint64_t i, j; + size_t k; + + if (Nloop == 0) + return; + + /* X <-- B' */ + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8]; + salsa20_blk_t *tmp = (salsa20_blk_t *)Y; + salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8]; + for (k = 0; k < 16; k++) + tmp->w[k] = le32dec(&src->w[k]); + salsa20_simd_shuffle(tmp, dst); + } + if (NROM) { + + /* 6: for i = 0 to N - 1 do */ + for (i = 0; i < Nloop; i += 2) { + /* 7: j <-- Integerify(X) mod N */ + j = integerify(X, r) & (N - 1); + + /* 8: X <-- H(X \xor V_j) */ + blkxor(X, &V[j * s], s); + /* V_j <-- Xprev \xor V_j */ + if (rw) + blkcpy(&V[j * s], X, s); + blockmix(X, Y, Z, r); + + j = integerify(Y, r); + if (((i + 1) & VROM_mask) == 1) { + /* j <-- Integerify(X) mod NROM */ + j &= NROM - 1; + + /* X <-- H(X \xor VROM_j) */ + blkxor(Y, &VROM[j * s], s); + } else { + /* 7: j <-- Integerify(X) mod N */ + j &= N - 1; + + /* 8: X <-- H(X \xor V_j) */ + blkxor(Y, &V[j * s], s); + /* V_j <-- Xprev \xor V_j */ + if (rw) + blkcpy(&V[j * s], Y, s); + } + + blockmix(Y, X, Z, r); + } + } else { + + /* 6: for i = 0 to N - 1 do */ + i = Nloop / 2; + do { + /* 7: j <-- Integerify(X) mod N */ + j = integerify(X, r) & (N - 1); + + /* 8: X <-- H(X \xor V_j) */ + blkxor(X, &V[j * s], s); + /* V_j <-- Xprev \xor V_j */ + if (rw) + blkcpy(&V[j * s], X, s); + blockmix(X, Y, Z, r); + + /* 7: j <-- Integerify(X) mod N */ + j = integerify(Y, r) & (N - 1); + + /* 8: X <-- H(X \xor V_j) */ + blkxor(Y, &V[j * s], s); + /* V_j <-- Xprev \xor V_j */ + if (rw) + blkcpy(&V[j * s], Y, s); + blockmix(Y, X, Z, r); + } while (--i); + } + + /* 10: B' <-- X */ + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8]; + salsa20_blk_t *tmp = (salsa20_blk_t *)Y; + salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8]; + for (k = 0; k < 16; k++) + le32enc(&tmp->w[k], src->w[k]); + salsa20_simd_unshuffle(tmp, dst); + } +} + + + + +/** + * p2floor(x): + * Largest power of 2 not greater than argument. + */ +static uint64_t +p2floor(uint64_t x) +{ + uint64_t y; + while ((y = x & (x - 1))) + x = y; + return x; +} + +/** + * smix(B, r, N, p, t, flags, V, NROM, shared, XY, S): + * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the + * temporary storage V must be 128rN bytes in length; the temporary storage + * XY must be 256r+64 or (256r+64)*p bytes in length (the larger size is + * required with OpenMP-enabled builds). The value N must be a power of 2 + * greater than 1. + */ +static void +smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t, + yescrypt_flags_t flags, + uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, + uint64_t * XY, uint64_t * S) +{ + size_t s = 16 * r; + uint64_t Nchunk = N / p, Nloop_all, Nloop_rw; + uint32_t i; + + Nloop_all = Nchunk; + if (flags & YESCRYPT_RW) { + if (t <= 1) { + if (t) + Nloop_all *= 2; /* 2/3 */ + Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ + } else { + Nloop_all *= t - 1; + } + } else if (t) { + if (t == 1) + Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ + Nloop_all *= t; + } + + Nloop_rw = 0; + if (flags & __YESCRYPT_INIT_SHARED) + Nloop_rw = Nloop_all; + else if (flags & YESCRYPT_RW) + Nloop_rw = Nloop_all / p; + + Nchunk &= ~(uint64_t)1; /* round down to even */ + Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ + Nloop_rw &= ~(uint64_t)1; /* round down to even */ + + + for (i = 0; i < p; i++) { + uint64_t Vchunk = i * Nchunk; + uint64_t * Bp = &B[i * s]; + uint64_t * Vp = &V[Vchunk * s]; + uint64_t * XYp = XY; + + uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); + uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; + + if (Sp) + smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL); + + + + if (!(flags & __YESCRYPT_INIT_SHARED_2)) + smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp); + + + smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp); + + + + } + if (Nloop_all > Nloop_rw) { + + for (i = 0; i < p; i++) { + uint64_t * Bp = &B[i * s]; + + uint64_t * XYp = XY; + + uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; + smix2(Bp, r, N, Nloop_all - Nloop_rw,flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp); + + } + } + + + + +} + +static void +smix_old(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t, +yescrypt_flags_t flags, +uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, +uint64_t * XY, uint64_t * S) +{ + size_t s = 16 * r; + uint64_t Nchunk = N / p, Nloop_all, Nloop_rw; + uint32_t i; + + Nloop_all = Nchunk; + if (flags & YESCRYPT_RW) { + if (t <= 1) { + if (t) + Nloop_all *= 2; /* 2/3 */ + Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ + } + else { + Nloop_all *= t - 1; + } + } + else if (t) { + if (t == 1) + Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ + Nloop_all *= t; + } + + Nloop_rw = 0; + if (flags & __YESCRYPT_INIT_SHARED) + Nloop_rw = Nloop_all; + else if (flags & YESCRYPT_RW) + Nloop_rw = Nloop_all / p; + + Nchunk &= ~(uint64_t)1; /* round down to even */ + Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ + Nloop_rw &= ~(uint64_t)1; /* round down to even */ + + + for (i = 0; i < p; i++) { + uint64_t Vchunk = i * Nchunk; + uint64_t * Bp = &B[i * s]; + uint64_t * Vp = &V[Vchunk * s]; + uint64_t * XYp = XY; + + uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); + uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; + + if (Sp) { + smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM, Sp, NROM, shared, XYp, NULL); + + + } + if (!(flags & __YESCRYPT_INIT_SHARED_2)) { + smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp); + } + + + smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp); + } + + if (Nloop_all > Nloop_rw) { + + for (i = 0; i < p; i++) { + uint64_t * Bp = &B[i * s]; + + uint64_t * XYp = XY; + + uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; + smix2(Bp, r, N, Nloop_all - Nloop_rw, flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp); + } + } +} + +/** + * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, + * N, r, p, t, flags, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen), or a revision of scrypt as requested by flags and shared, and + * write the result into buf. The parameters r, p, and buflen must satisfy + * r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N must be a power + * of 2 greater than 1. + * + * t controls computation time while not affecting peak memory usage. shared + * and flags may request special modes as described in yescrypt.h. local is + * the thread-local data structure, allowing to preserve and reuse a memory + * allocation across calls, thereby reducing its overhead. + * + * Return 0 on success; or -1 on error. + */ +int +yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, + const uint8_t * passwd, size_t passwdlen, + const uint8_t * salt, size_t saltlen, + uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags, + uint8_t * buf, size_t buflen) +{ + yescrypt_region_t tmp; + uint64_t NROM; + size_t B_size, V_size, XY_size, need; + uint64_t * B, * V, * XY, * S; + uint64_t sha256[4]; + + /* + * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose, + * so don't let it have side-effects. Without this adjustment, it'd + * enable the SHA-256 password pre-hashing and output post-hashing, + * because any deviation from classic scrypt implies those. + */ + if (p == 1) + flags &= ~YESCRYPT_PARALLEL_SMIX; + + /* Sanity-check parameters */ + if (flags & ~YESCRYPT_KNOWN_FLAGS) { + errno = EINVAL; + return -1; + } +#if SIZE_MAX > UINT32_MAX + if (buflen > (((uint64_t)(1) << 32) - 1) * 32) { + errno = EFBIG; + return -1; + } +#endif + if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) { + errno = EFBIG; + return -1; + } + if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) { + errno = EINVAL; + return -1; + } + if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) { + errno = EINVAL; + return -1; + } +#if S_MIN_R > 1 + if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) { + errno = EINVAL; + return -1; + } +#endif + if ((p > SIZE_MAX / ((size_t)256 * r + 64)) || +#if SIZE_MAX / 256 <= UINT32_MAX + (r > SIZE_MAX / 256) || +#endif + (N > SIZE_MAX / 128 / r)) { + errno = ENOMEM; + return -1; + } + if (N > UINT64_MAX / ((uint64_t)t + 1)) { + errno = EFBIG; + return -1; + } + + if ((flags & YESCRYPT_PWXFORM) && + p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) { + errno = ENOMEM; + return -1; + } + + NROM = 0; + if (shared->shared1.aligned) { + NROM = shared->shared1.aligned_size / ((size_t)128 * r); + if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) || + !(flags & YESCRYPT_RW)) { + errno = EINVAL; + return -1; + } + } + + /* Allocate memory */ + V = NULL; + V_size = (size_t)128 * r * N; + + need = V_size; + if (flags & __YESCRYPT_INIT_SHARED) { + if (local->aligned_size < need) { + if (local->base || local->aligned || + local->base_size || local->aligned_size) { + errno = EINVAL; + return -1; + } + if (!alloc_region(local, need)) + return -1; + } + V = (uint64_t *)local->aligned; + need = 0; + } + B_size = (size_t)128 * r * p; + need += B_size; + if (need < B_size) { + errno = ENOMEM; + return -1; + } + XY_size = (size_t)256 * r + 64; + + need += XY_size; + if (need < XY_size) { + errno = ENOMEM; + return -1; + } + if (flags & YESCRYPT_PWXFORM) { + size_t S_size = S_SIZE_ALL * sizeof(*S); + + if (flags & YESCRYPT_PARALLEL_SMIX) + S_size *= p; + + need += S_size; + if (need < S_size) { + errno = ENOMEM; + return -1; + } + } + if (flags & __YESCRYPT_INIT_SHARED) { + if (!alloc_region(&tmp, need)) + return -1; + B = (uint64_t *)tmp.aligned; + XY = (uint64_t *)((uint8_t *)B + B_size); + } else { + init_region(&tmp); + if (local->aligned_size < need) { + if (free_region(local)) + return -1; + if (!alloc_region(local, need)) + return -1; + } + B = (uint64_t *)local->aligned; + V = (uint64_t *)((uint8_t *)B + B_size); + XY = (uint64_t *)((uint8_t *)V + V_size); + } + S = NULL; + if (flags & YESCRYPT_PWXFORM) + S = (uint64_t *)((uint8_t *)XY + XY_size); + + + if (t || flags) { + SHA256_CTX_Y ctx; + SHA256_Init_Y(&ctx); + SHA256_Update_Y(&ctx, passwd, passwdlen); + SHA256_Final_Y((uint8_t *)sha256, &ctx); + passwd = (uint8_t *)sha256; + passwdlen = sizeof(sha256); + } + /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ + PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1,(uint8_t *)B, B_size); + + if (t || flags) + { + blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0])); + } + if (p == 1 || (flags & YESCRYPT_PARALLEL_SMIX)) { + smix(B, r, N, p, t, flags, V, NROM, shared, XY, S); + } else { + uint32_t i; + /* 2: for i = 0 to p - 1 do */ + for (i = 0; i < p; i++) { + /* 3: B_i <-- MF(B_i, N) */ + smix(&B[(size_t)16 * r * i], r, N, 1, t, flags, V, NROM, shared, XY, S); + } + } + + /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ + + PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen); + /* + * Except when computing classic scrypt, allow all computation so far + * to be performed on the client. The final steps below match those of + * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so + * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of + * SCRAM's use of SHA-1) would be usable with yescrypt hashes. + */ + if ((t || flags) && buflen == sizeof(sha256)) { + /* Compute ClientKey */ + + { + HMAC_SHA256_CTX_Y ctx; + HMAC_SHA256_Init_Y(&ctx, buf, buflen); + HMAC_SHA256_Update_Y(&ctx, salt, saltlen); + HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx); + } + /* Compute StoredKey */ + { + SHA256_CTX_Y ctx; + SHA256_Init_Y(&ctx); + SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256)); + SHA256_Final_Y(buf, &ctx); + } + } + + if (free_region(&tmp)) + return -1; + + /* Success! */ + return 0; +} + +int +yescrypt_kdf_old(const yescrypt_shared_t * shared, yescrypt_local_t * local, +const uint8_t * passwd, size_t passwdlen, +const uint8_t * salt, size_t saltlen, +uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags, +uint8_t * buf, size_t buflen) +{ + yescrypt_region_t tmp; + uint64_t NROM; + size_t B_size, V_size, XY_size, need; + uint64_t * B, *V, *XY, *S; + uint64_t sha256[4]; + + /* + * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose, + * so don't let it have side-effects. Without this adjustment, it'd + * enable the SHA-256 password pre-hashing and output post-hashing, + * because any deviation from classic scrypt implies those. + */ + if (p == 1) + flags &= ~YESCRYPT_PARALLEL_SMIX; + + /* Sanity-check parameters */ + if (flags & ~YESCRYPT_KNOWN_FLAGS) { + errno = EINVAL; + return -1; + } +#if SIZE_MAX > UINT32_MAX + if (buflen > (((uint64_t)(1) << 32) - 1) * 32) { + errno = EFBIG; + return -1; + } +#endif + if ((uint64_t)(r)* (uint64_t)(p) >= (1 << 30)) { + errno = EFBIG; + return -1; + } + if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) { + errno = EINVAL; + return -1; + } + if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) { + errno = EINVAL; + return -1; + } +#if S_MIN_R > 1 + if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) { + errno = EINVAL; + return -1; + } +#endif + if ((p > SIZE_MAX / ((size_t)256 * r + 64)) || +#if SIZE_MAX / 256 <= UINT32_MAX + (r > SIZE_MAX / 256) || +#endif + (N > SIZE_MAX / 128 / r)) { + errno = ENOMEM; + return -1; + } + if (N > UINT64_MAX / ((uint64_t)t + 1)) { + errno = EFBIG; + return -1; + } + + if ((flags & YESCRYPT_PWXFORM) && + p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) { + errno = ENOMEM; + return -1; + } + + NROM = 0; + if (shared->shared1.aligned) { + NROM = shared->shared1.aligned_size / ((size_t)128 * r); + if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) || + !(flags & YESCRYPT_RW)) { + errno = EINVAL; + return -1; + } + } + + /* Allocate memory */ + V = NULL; + V_size = (size_t)128 * r * N; + + need = V_size; + if (flags & __YESCRYPT_INIT_SHARED) { + if (local->aligned_size < need) { + if (local->base || local->aligned || + local->base_size || local->aligned_size) { + errno = EINVAL; + return -1; + } + if (!alloc_region(local, need)) + return -1; + } + V = (uint64_t *)local->aligned; + need = 0; + } + B_size = (size_t)128 * r * p; + need += B_size; + if (need < B_size) { + errno = ENOMEM; + return -1; + } + XY_size = (size_t)256 * r + 64; + + need += XY_size; + if (need < XY_size) { + errno = ENOMEM; + return -1; + } + if (flags & YESCRYPT_PWXFORM) { + size_t S_size = S_SIZE_ALL * sizeof(*S); + + if (flags & YESCRYPT_PARALLEL_SMIX) + S_size *= p; + + need += S_size; + if (need < S_size) { + errno = ENOMEM; + return -1; + } + } + if (flags & __YESCRYPT_INIT_SHARED) { + if (!alloc_region(&tmp, need)) + return -1; + B = (uint64_t *)tmp.aligned; + XY = (uint64_t *)((uint8_t *)B + B_size); + } + else { + init_region(&tmp); + if (local->aligned_size < need) { + if (free_region(local)) + return -1; + if (!alloc_region(local, need)) + return -1; + } + B = (uint64_t *)local->aligned; + V = (uint64_t *)((uint8_t *)B + B_size); + XY = (uint64_t *)((uint8_t *)V + V_size); + } + S = NULL; + if (flags & YESCRYPT_PWXFORM) + S = (uint64_t *)((uint8_t *)XY + XY_size); + + + if (t || flags) { + SHA256_CTX_Y ctx; + SHA256_Init_Y(&ctx); + SHA256_Update_Y(&ctx, passwd, passwdlen); + SHA256_Final_Y((uint8_t *)sha256, &ctx); + passwd = (uint8_t *)sha256; + passwdlen = sizeof(sha256); + } + + /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ + PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, (uint8_t *)B, B_size); + + + if (t || flags) + { + blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0])); + } + smix(B, r, N, p, t, flags, V, NROM, shared, XY, S); + + + /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ + PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen); + + /* + * Except when computing classic scrypt, allow all computation so far + * to be performed on the client. The final steps below match those of + * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so + * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of + * SCRAM's use of SHA-1) would be usable with yescrypt hashes. + */ + if ((t || flags) && buflen == sizeof(sha256)) { + /* Compute ClientKey */ + + { + HMAC_SHA256_CTX_Y ctx; + HMAC_SHA256_Init_Y(&ctx, buf, buflen); + HMAC_SHA256_Update_Y(&ctx, salt, saltlen); + HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx); + } + /* Compute StoredKey */ + { + SHA256_CTX_Y ctx; + SHA256_Init_Y(&ctx); + SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256)); + SHA256_Final_Y(buf, &ctx); + } + } + + if (free_region(&tmp)) + return -1; + + /* Success! */ + return 0; +} + diff --git a/algorithm/yescrypt.c b/algorithm/yescrypt.c new file mode 100644 index 000000000..de00d0f33 --- /dev/null +++ b/algorithm/yescrypt.c @@ -0,0 +1,128 @@ +/*- + * Copyright 2015 djm34 + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "algorithm/yescrypt_core.h" + +static const uint32_t diff1targ = 0x0000ffff; + +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + +/* Used externally as confirmation of correct OCL code */ +int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + yescrypt_hash((unsigned char*)data,(unsigned char*)ohash); + + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + + if (tmp_hash7 > diff1targ) + return -1; + + if (tmp_hash7 > Htarg) + return 0; + + return 1; +} + +void yescrypt_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + + yescrypt_hash((unsigned char*)data, (unsigned char*)ohash); + +} + + +bool scanhash_yescrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while (1) + { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + + yescrypt_hash((unsigned char*)data, (unsigned char*)ostate); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) + { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) + { + *last_nonce = n; + break; + } + } + + return ret; +} \ No newline at end of file diff --git a/algorithm/yescrypt.h b/algorithm/yescrypt.h new file mode 100644 index 000000000..b51cb4959 --- /dev/null +++ b/algorithm/yescrypt.h @@ -0,0 +1,10 @@ +#ifndef YESCRYPT_H +#define YESCRYPT_H + +#include "miner.h" +#define YESCRYPT_SCRATCHBUF_SIZE (128 * 2048 * 8 ) //uchar +#define YESCRYP_SECBUF_SIZE (128*64*8) +extern int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void yescrypt_regenhash(struct work *work); + +#endif /* YESCRYPT_H */ diff --git a/algorithm/yescrypt_core.h b/algorithm/yescrypt_core.h new file mode 100644 index 000000000..64b9a11f6 --- /dev/null +++ b/algorithm/yescrypt_core.h @@ -0,0 +1,376 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2013,2014 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ +#ifndef _YESCRYPT_H_ +#define _YESCRYPT_H_ + +#include +#include /* for size_t */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +//extern void yescrypt_hash_sp(const unsigned char *input, unsigned char *output); +extern void yescrypt_hash(const unsigned char *input, unsigned char *output); + + + +/** + * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen) and write the result into buf. The parameters r, p, and buflen + * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N + * must be a power of 2 greater than 1. + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as buf is local to the thread. + */ +extern int crypto_scrypt(const uint8_t * __passwd, size_t __passwdlen, + const uint8_t * __salt, size_t __saltlen, + uint64_t __N, uint32_t __r, uint32_t __p, + uint8_t * __buf, size_t __buflen); + +/** + * Internal type used by the memory allocator. Please do not use it directly. + * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since + * they might differ from each other in a future version. + */ +typedef struct { + void * base, * aligned; + size_t base_size, aligned_size; +} yescrypt_region_t; + +/** + * Types for shared (ROM) and thread-local (RAM) data structures. + */ +typedef yescrypt_region_t yescrypt_shared1_t; +typedef struct { + yescrypt_shared1_t shared1; + uint32_t mask1; +} yescrypt_shared_t; +typedef yescrypt_region_t yescrypt_local_t; + +/** + * Possible values for yescrypt_init_shared()'s flags argument. + */ +typedef enum { + YESCRYPT_SHARED_DEFAULTS = 0, + YESCRYPT_SHARED_PREALLOCATED = 0x100 +} yescrypt_init_shared_flags_t; + +/** + * Possible values for the flags argument of yescrypt_kdf(), + * yescrypt_gensalt_r(), yescrypt_gensalt(). These may be OR'ed together, + * except that YESCRYPT_WORM and YESCRYPT_RW are mutually exclusive. + * Please refer to the description of yescrypt_kdf() below for the meaning of + * these flags. + */ +typedef enum { +/* public */ + YESCRYPT_WORM = 0, + YESCRYPT_RW = 1, + YESCRYPT_PARALLEL_SMIX = 2, + YESCRYPT_PWXFORM = 4, +/* private */ + __YESCRYPT_INIT_SHARED_1 = 0x10000, + __YESCRYPT_INIT_SHARED_2 = 0x20000, + __YESCRYPT_INIT_SHARED = 0x30000 +} yescrypt_flags_t; + +#define YESCRYPT_KNOWN_FLAGS \ + (YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | YESCRYPT_PWXFORM | \ + __YESCRYPT_INIT_SHARED) + +/** + * yescrypt_init_shared(shared, param, paramlen, N, r, p, flags, mask, + * buf, buflen): + * Optionally allocate memory for and initialize the shared (ROM) data + * structure. The parameters N, r, and p must satisfy the same conditions as + * with crypto_scrypt(). param and paramlen specify a local parameter with + * which the ROM is seeded. If buf is not NULL, then it is used to return + * buflen bytes of message digest for the initialized ROM (the caller may use + * this to verify that the ROM has been computed in the same way that it was on + * a previous run). + * + * Return 0 on success; or -1 on error. + * + * If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the + * ROM is assumed to have been preallocated by the caller, with + * shared->shared1.aligned being the start address of the ROM and + * shared->shared1.aligned_size being its size (which must be consistent with + * N, r, and p). This may be used e.g. when the ROM is to be placed in a SysV + * shared memory segment allocated by the caller. + * + * mask controls the frequency of ROM accesses by yescrypt_kdf(). Normally it + * should be set to 1, to interleave RAM and ROM accesses, which works well + * when both regions reside in the machine's RAM anyway. Other values may be + * used e.g. when the ROM is memory-mapped from a disk file. Recommended mask + * values are powers of 2 minus 1 or minus 2. Here's the effect of some mask + * values: + * mask value ROM accesses in SMix 1st loop ROM accesses in SMix 2nd loop + * 0 0 1/2 + * 1 1/2 1/2 + * 2 0 1/4 + * 3 1/4 1/4 + * 6 0 1/8 + * 7 1/8 1/8 + * 14 0 1/16 + * 15 1/16 1/16 + * 1022 0 1/1024 + * 1023 1/1024 1/1024 + * + * Actual computation of the ROM contents may be avoided, if you don't intend + * to use a ROM but need a dummy shared structure, by calling this function + * with NULL, 0, 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0 for the + * arguments starting with param and on. + * + * MT-safe as long as shared is local to the thread. + */ +extern int yescrypt_init_shared(yescrypt_shared_t * __shared, + const uint8_t * __param, size_t __paramlen, + uint64_t __N, uint32_t __r, uint32_t __p, + yescrypt_init_shared_flags_t __flags, uint32_t __mask, + uint8_t * __buf, size_t __buflen); + +/** + * yescrypt_free_shared(shared): + * Free memory that had been allocated with yescrypt_init_shared(). + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as shared is local to the thread. + */ +extern int yescrypt_free_shared(yescrypt_shared_t * __shared); + +/** + * yescrypt_init_local(local): + * Initialize the thread-local (RAM) data structure. Actual memory allocation + * is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r(). + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as local is local to the thread. + */ +extern int yescrypt_init_local(yescrypt_local_t * __local); + +/** + * yescrypt_free_local(local): + * Free memory that may have been allocated for an initialized thread-local + * (RAM) data structure. + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as local is local to the thread. + */ +extern int yescrypt_free_local(yescrypt_local_t * __local); + +/** + * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, + * N, r, p, t, flags, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen), or a revision of scrypt as requested by flags and shared, and + * write the result into buf. The parameters N, r, p, and buflen must satisfy + * the same conditions as with crypto_scrypt(). t controls computation time + * while not affecting peak memory usage. shared and flags may request + * special modes as described below. local is the thread-local data + * structure, allowing to preserve and reuse a memory allocation across calls, + * thereby reducing its overhead. + * + * Return 0 on success; or -1 on error. + * + * t controls computation time. t = 0 is optimal in terms of achieving the + * highest area-time for ASIC attackers. Thus, higher computation time, if + * affordable, is best achieved by increasing N rather than by increasing t. + * However, if the higher memory usage (which goes along with higher N) is not + * affordable, or if fine-tuning of the time is needed (recall that N must be a + * power of 2), then t = 1 or above may be used to increase time while staying + * at the same peak memory usage. t = 1 increases the time by 25% and + * decreases the normalized area-time to 96% of optimal. (Of course, in + * absolute terms the area-time increases with higher t. It's just that it + * would increase slightly more with higher N*r rather than with higher t.) + * t = 2 increases the time by another 20% and decreases the normalized + * area-time to 89% of optimal. Thus, these two values are reasonable to use + * for fine-tuning. Values of t higher than 2 result in further increase in + * time while reducing the efficiency much further (e.g., down to around 50% of + * optimal for t = 5, which runs 3 to 4 times slower than t = 0, with exact + * numbers varying by the flags settings). + * + * Classic scrypt is available by setting t = 0 and flags to YESCRYPT_WORM and + * passing a dummy shared structure (see the description of + * yescrypt_init_shared() above for how to produce one). In this mode, the + * thread-local memory region (RAM) is first sequentially written to and then + * randomly read from. This algorithm is friendly towards time-memory + * tradeoffs (TMTO), available both to defenders (albeit not in this + * implementation) and to attackers. + * + * Setting YESCRYPT_RW adds extra random reads and writes to the thread-local + * memory region (RAM), which makes TMTO a lot less efficient. This may be + * used to slow down the kinds of attackers who would otherwise benefit from + * classic scrypt's efficient TMTO. Since classic scrypt's TMTO allows not + * only for the tradeoff, but also for a decrease of attacker's area-time (by + * up to a constant factor), setting YESCRYPT_RW substantially increases the + * cost of attacks in area-time terms as well. Yet another benefit of it is + * that optimal area-time is reached at an earlier time than with classic + * scrypt, and t = 0 actually corresponds to this earlier completion time, + * resulting in quicker hash computations (and thus in higher request rate + * capacity). Due to these properties, YESCRYPT_RW should almost always be + * set, except when compatibility with classic scrypt or TMTO-friendliness are + * desired. + * + * YESCRYPT_PARALLEL_SMIX moves parallelism that is present with p > 1 to a + * lower level as compared to where it is in classic scrypt. This reduces + * flexibility for efficient computation (for both attackers and defenders) by + * requiring that, short of resorting to TMTO, the full amount of memory be + * allocated as needed for the specified p, regardless of whether that + * parallelism is actually being fully made use of or not. (For comparison, a + * single instance of classic scrypt may be computed in less memory without any + * CPU time overhead, but in more real time, by not making full use of the + * parallelism.) This may be desirable when the defender has enough memory + * with sufficiently low latency and high bandwidth for efficient full parallel + * execution, yet the required memory size is high enough that some likely + * attackers might end up being forced to choose between using higher latency + * memory than they could use otherwise (waiting for data longer) or using TMTO + * (waiting for data more times per one hash computation). The area-time cost + * for other kinds of attackers (who would use the same memory type and TMTO + * factor or no TMTO either way) remains roughly the same, given the same + * running time for the defender. In the TMTO-friendly YESCRYPT_WORM mode, as + * long as the defender has enough memory that is just as fast as the smaller + * per-thread regions would be, doesn't expect to ever need greater + * flexibility (except possibly via TMTO), and doesn't need backwards + * compatibility with classic scrypt, there are no other serious drawbacks to + * this setting. In the YESCRYPT_RW mode, which is meant to discourage TMTO, + * this new approach to parallelization makes TMTO less inefficient. (This is + * an unfortunate side-effect of avoiding some random writes, as we have to in + * order to allow for parallel threads to access a common memory region without + * synchronization overhead.) Thus, in this mode this setting poses an extra + * tradeoff of its own (higher area-time cost for a subset of attackers vs. + * better TMTO resistance). Setting YESCRYPT_PARALLEL_SMIX also changes the + * way the running time is to be controlled from N*r*p (for classic scrypt) to + * N*r (in this modification). All of this applies only when p > 1. For + * p = 1, this setting is a no-op. + * + * Passing a real shared structure, with ROM contents previously computed by + * yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW for + * the thread-local RAM region. In order to allow for initialization of the + * ROM to be split into a separate program, the shared->shared1.aligned and + * shared->shared1.aligned_size fields may be set by the caller of + * yescrypt_kdf() manually rather than with yescrypt_init_shared(). + * + * local must be initialized with yescrypt_init_local(). + * + * MT-safe as long as local and buf are local to the thread. + */ +extern int yescrypt_kdf(const yescrypt_shared_t * __shared, + yescrypt_local_t * __local, + const uint8_t * __passwd, size_t __passwdlen, + const uint8_t * __salt, size_t __saltlen, + uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t, + yescrypt_flags_t __flags, + uint8_t * __buf, size_t __buflen); + +/** + * yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen): + * Compute and encode an scrypt or enhanced scrypt hash of passwd given the + * parameters and salt value encoded in setting. If the shared structure is + * not dummy, a ROM is used and YESCRYPT_RW is required. Otherwise, whether to + * use the YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff + * discouraging modification) is determined by the setting string. shared and + * local must be initialized as described above for yescrypt_kdf(). buf must + * be large enough (as indicated by buflen) to hold the encoded hash string. + * + * Return the encoded hash string on success; or NULL on error. + * + * MT-safe as long as local and buf are local to the thread. + */ +extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared, + yescrypt_local_t * __local, + const uint8_t * __passwd, size_t __passwdlen, + const uint8_t * __setting, + uint8_t * __buf, size_t __buflen); + +/** + * yescrypt(passwd, setting): + * Compute and encode an scrypt or enhanced scrypt hash of passwd given the + * parameters and salt value encoded in setting. Whether to use the + * YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff + * discouraging modification) is determined by the setting string. + * + * Return the encoded hash string on success; or NULL on error. + * + * This is a crypt(3)-like interface, which is simpler to use than + * yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM, + * and it is slower than yescrypt_r() for repeated calls because it allocates + * and frees memory on each call. + * + * MT-unsafe. + */ +extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting); + +/** + * yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen): + * Generate a setting string for use with yescrypt_r() and yescrypt() by + * encoding into it the parameters N_log2 (which is to be set to base 2 + * logarithm of the desired value for N), r, p, flags, and a salt given by src + * (of srclen bytes). buf must be large enough (as indicated by buflen) to + * hold the setting string. + * + * Return the setting string on success; or NULL on error. + * + * MT-safe as long as buf is local to the thread. + */ +extern uint8_t * yescrypt_gensalt_r( + uint32_t __N_log2, uint32_t __r, uint32_t __p, + yescrypt_flags_t __flags, + const uint8_t * __src, size_t __srclen, + uint8_t * __buf, size_t __buflen); + +/** + * yescrypt_gensalt(N_log2, r, p, flags, src, srclen): + * Generate a setting string for use with yescrypt_r() and yescrypt(). This + * function is the same as yescrypt_gensalt_r() except that it uses a static + * buffer and thus is not MT-safe. + * + * Return the setting string on success; or NULL on error. + * + * MT-unsafe. + */ +extern uint8_t * yescrypt_gensalt( + uint32_t __N_log2, uint32_t __r, uint32_t __p, + yescrypt_flags_t __flags, + const uint8_t * __src, size_t __srclen); + +#ifdef __cplusplus +} +#endif + +#endif /* !_YESCRYPT_H_ */ diff --git a/algorithm/yescryptcommon.c b/algorithm/yescryptcommon.c new file mode 100644 index 000000000..cf7067d02 --- /dev/null +++ b/algorithm/yescryptcommon.c @@ -0,0 +1,360 @@ +/*- + * Copyright 2013,2014 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include "algorithm/yescrypt_core.h" + +#define BYTES2CHARS(bytes) \ + ((((bytes) * 8) + 5) / 6) + +#define HASH_SIZE 32 /* bytes */ +#define HASH_LEN BYTES2CHARS(HASH_SIZE) /* base-64 chars */ +#define YESCRYPT_FLAGS (YESCRYPT_RW | YESCRYPT_PWXFORM) +static const char * const itoa64 = + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +static uint8_t * encode64_uint32(uint8_t * dst, size_t dstlen, + uint32_t src, uint32_t srcbits) +{ + uint32_t bit; + + for (bit = 0; bit < srcbits; bit += 6) { + if (dstlen < 1) + return NULL; + *dst++ = itoa64[src & 0x3f]; + dstlen--; + src >>= 6; + } + + return dst; +} + +static uint8_t * encode64(uint8_t * dst, size_t dstlen, + const uint8_t * src, size_t srclen) +{ + size_t i; + + for (i = 0; i < srclen; ) { + uint8_t * dnext; + uint32_t value = 0, bits = 0; + do { + value |= (uint32_t)src[i++] << bits; + bits += 8; + } while (bits < 24 && i < srclen); + dnext = encode64_uint32(dst, dstlen, value, bits); + if (!dnext) + return NULL; + dstlen -= dnext - dst; + dst = dnext; + } + + return dst; +} + +static int decode64_one(uint32_t * dst, uint8_t src) +{ + const char * ptr = strchr(itoa64, src); + if (ptr) { + *dst = ptr - itoa64; + return 0; + } + *dst = 0; + return -1; +} + +static const uint8_t * decode64_uint32(uint32_t * dst, uint32_t dstbits, + const uint8_t * src) +{ + uint32_t bit; + uint32_t value; + + value = 0; + for (bit = 0; bit < dstbits; bit += 6) { + uint32_t one; + if (decode64_one(&one, *src)) { + *dst = 0; + return NULL; + } + src++; + value |= one << bit; + } + + *dst = value; + return src; +} + +uint8_t * +yescrypt_r(const yescrypt_shared_t * shared, yescrypt_local_t * local, + const uint8_t * passwd, size_t passwdlen, + const uint8_t * setting, + uint8_t * buf, size_t buflen) +{ + uint8_t hash[HASH_SIZE]; + const uint8_t * src, * salt; + uint8_t * dst; + size_t prefixlen, saltlen, need; + uint8_t version; + uint64_t N; + uint32_t r, p; + yescrypt_flags_t flags = YESCRYPT_WORM; + fflush(stdout); + if (setting[0] != '$' || setting[1] != '7') + { + fflush(stdout); + return NULL; + } + fflush(stdout); + src = setting + 2; + fflush(stdout); + switch ((version = *src)) { + case '$': + fflush(stdout); + break; + case 'X': + src++; + flags = YESCRYPT_RW; + fflush(stdout); + break; + default: + { + fflush(stdout); + return NULL; + } + } + + fflush(stdout); + if (*src != '$') { + uint32_t decoded_flags; + if (decode64_one(&decoded_flags, *src)) + + { + fflush(stdout); + return NULL; + } + flags = decoded_flags; + if (*++src != '$') + { + fflush(stdout); + return NULL; + } + } + src++; + + { + uint32_t N_log2; + if (decode64_one(&N_log2, *src)) + { + return NULL; + } + src++; + N = (uint64_t)1 << N_log2; + } + + src = decode64_uint32(&r, 30, src); + if (!src) + { + return NULL; + } + + src = decode64_uint32(&p, 30, src); + if (!src) + { + return NULL; + } + + prefixlen = src - setting; + + salt = src; + src = (uint8_t *)strrchr((char *)salt, '$'); + if (src) + saltlen = src - salt; + else + saltlen = strlen((char *)salt); + + need = prefixlen + saltlen + 1 + HASH_LEN + 1; + if (need > buflen || need < saltlen) + + { + fflush(stdout); + return NULL; + } + +fflush(stdout); + if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, + N, r, p, 0, flags, hash, sizeof(hash))) + { + fflush(stdout); + return NULL; + } + + dst = buf; + memcpy(dst, setting, prefixlen + saltlen); + dst += prefixlen + saltlen; + *dst++ = '$'; + + dst = encode64(dst, buflen - (dst - buf), hash, sizeof(hash)); + /* Could zeroize hash[] here, but yescrypt_kdf() doesn't zeroize its + * memory allocations yet anyway. */ + if (!dst || dst >= buf + buflen) /* Can't happen */ + { + return NULL; + } + + *dst = 0; /* NUL termination */ + fflush(stdout); + return buf; +} + +uint8_t * +yescrypt(const uint8_t * passwd, const uint8_t * setting) +{ + static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1]; + yescrypt_shared_t shared; + yescrypt_local_t local; + uint8_t * retval; + if (yescrypt_init_shared(&shared, NULL, 0, + 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0)) + return NULL; + if (yescrypt_init_local(&local)) { + yescrypt_free_shared(&shared); + return NULL; + } + retval = yescrypt_r(&shared, &local, + passwd, 80, setting, buf, sizeof(buf)); + // printf("hashse='%s'\n", (char *)retval); + if (yescrypt_free_local(&local)) { + yescrypt_free_shared(&shared); + return NULL; + } + if (yescrypt_free_shared(&shared)) + return NULL; + return retval; + +} + +uint8_t * +yescrypt_gensalt_r(uint32_t N_log2, uint32_t r, uint32_t p, + yescrypt_flags_t flags, + const uint8_t * src, size_t srclen, + uint8_t * buf, size_t buflen) +{ + uint8_t * dst; + size_t prefixlen = 3 + 1 + 5 + 5; + size_t saltlen = BYTES2CHARS(srclen); + size_t need; + + if (p == 1) + flags &= ~YESCRYPT_PARALLEL_SMIX; + + if (flags) { + if (flags & ~0x3f) + return NULL; + + prefixlen++; + if (flags != YESCRYPT_RW) + prefixlen++; + } + + need = prefixlen + saltlen + 1; + if (need > buflen || need < saltlen || saltlen < srclen) + return NULL; + + if (N_log2 > 63 || ((uint64_t)r * (uint64_t)p >= (1U << 30))) + return NULL; + + dst = buf; + *dst++ = '$'; + *dst++ = '7'; + if (flags) { + *dst++ = 'X'; /* eXperimental, subject to change */ + if (flags != YESCRYPT_RW) + *dst++ = itoa64[flags]; + } + *dst++ = '$'; + + *dst++ = itoa64[N_log2]; + + dst = encode64_uint32(dst, buflen - (dst - buf), r, 30); + if (!dst) /* Can't happen */ + return NULL; + + dst = encode64_uint32(dst, buflen - (dst - buf), p, 30); + if (!dst) /* Can't happen */ + return NULL; + + dst = encode64(dst, buflen - (dst - buf), src, srclen); + if (!dst || dst >= buf + buflen) /* Can't happen */ + return NULL; + + *dst = 0; /* NUL termination */ + + return buf; +} + +uint8_t * +yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p, + yescrypt_flags_t flags, + const uint8_t * src, size_t srclen) +{ + static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1]; + return yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, + buf, sizeof(buf)); +} + +static int +yescrypt_bsty(const uint8_t * passwd, size_t passwdlen, + const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, + uint8_t * buf, size_t buflen) +{ + static __thread int initialized = 0; + static __thread yescrypt_shared_t shared; + static __thread yescrypt_local_t local; + +// static __declspec(thread) int initialized = 0; +// static __declspec(thread) yescrypt_shared_t shared; +// static __declspec(thread) yescrypt_local_t local; + + int retval; + if (!initialized) { +/* "shared" could in fact be shared, but it's simpler to keep it private + * along with "local". It's dummy and tiny anyway. */ + if (yescrypt_init_shared(&shared, NULL, 0, + 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0)) + return -1; + if (yescrypt_init_local(&local)) { + yescrypt_free_shared(&shared); + return -1; + } + initialized = 1; + } + retval = yescrypt_kdf(&shared, &local, + passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS, + buf, buflen); + + return retval; +} + +void yescrypt_hash(const unsigned char *input, unsigned char *output) +{ + + yescrypt_bsty((const uint8_t *)input, 80, (const uint8_t *) input, 80, 2048, 8, 1, (uint8_t *)output, 32); +} diff --git a/driver-opencl.c b/driver-opencl.c index 72ee95596..f0a88a6f6 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -257,14 +257,14 @@ char *set_gpu_threads(const char *_arg) if (nextptr == NULL) return "Invalid parameters for set_gpu_threads"; val = atoi(nextptr); - if (val < 1 || val > 10) + if (val < 1 || val > 20) // gpu_threads increase max value to 20 return "Invalid value passed to set_gpu_threads"; gpus[device++].threads = val; while ((nextptr = strtok(NULL, ",")) != NULL) { val = atoi(nextptr); - if (val < 1 || val > 10) + if (val < 1 || val > 20) // gpu_threads increase max value to 20 return "Invalid value passed to set_gpu_threads"; gpus[device++].threads = val; @@ -1472,6 +1472,9 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, } applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id); postcalc_hash_async(thr, work, thrdata->res); +// postcalc_hash(thr); +// submit_tested_work(thr, work); +// submit_work_async(work); memset(thrdata->res, 0, buffersize); /* This finish flushes the writebuffer set with CL_FALSE in clEnqueueWriteBuffer */ clFinish(clState->commandQueue); @@ -1493,6 +1496,12 @@ static void opencl_thread_shutdown(struct thr_info *thr) clFinish(clState->commandQueue); clReleaseMemObject(clState->outputBuffer); clReleaseMemObject(clState->CLbuffer0); + if (clState->buffer1) + clReleaseMemObject(clState->buffer1); + if (clState->buffer2) + clReleaseMemObject(clState->buffer2); + if (clState->buffer3) + clReleaseMemObject(clState->buffer3); if (clState->padbuffer8) clReleaseMemObject(clState->padbuffer8); clReleaseKernel(clState->kernel); diff --git a/findnonce.c b/findnonce.c index c24029972..8858cfa6c 100644 --- a/findnonce.c +++ b/findnonce.c @@ -214,6 +214,7 @@ static void *postcalc_hash(void *userdata) void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) { + struct pc_data *pcd = (struct pc_data *)malloc(sizeof(struct pc_data)); int buffersize; @@ -225,8 +226,7 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) pcd->thr = thr; pcd->work = copy_work(work); buffersize = BUFFERSIZE; - - memcpy(&pcd->res, res, buffersize); + memcpy(&pcd->res, res, buffersize); if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) { applog(LOG_ERR, "Failed to create postcalc_hash thread"); @@ -366,4 +366,4 @@ void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) blk->cty_a = data[16]; blk->cty_b = data[17]; blk->cty_c = data[18]; -} \ No newline at end of file +} diff --git a/kernel/bmw256.cl b/kernel/bmw256.cl new file mode 100644 index 000000000..19c85cbc9 --- /dev/null +++ b/kernel/bmw256.cl @@ -0,0 +1,162 @@ +/* +* bmw256 kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* Copyright (c) 2015 djm34 +* +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ + + + +#define shl(x, n) ((x) << (n)) +#define shr(x, n) ((x) >> (n)) +//#define SHR(x, n) SHR2(x, n) +//#define SHL(x, n) SHL2(x, n) + + +#define SPH_ROTL32(x,n) rotate(x,(uint)n) +#define ss0(x) (shr((x), 1) ^ shl((x), 3) ^ SPH_ROTL32((x), 4) ^ SPH_ROTL32((x), 19)) +#define ss1(x) (shr((x), 1) ^ shl((x), 2) ^ SPH_ROTL32((x), 8) ^ SPH_ROTL32((x), 23)) +#define ss2(x) (shr((x), 2) ^ shl((x), 1) ^ SPH_ROTL32((x), 12) ^ SPH_ROTL32((x), 25)) +#define ss3(x) (shr((x), 2) ^ shl((x), 2) ^ SPH_ROTL32((x), 15) ^ SPH_ROTL32((x), 29)) +#define ss4(x) (shr((x), 1) ^ (x)) +#define ss5(x) (shr((x), 2) ^ (x)) +#define rs1(x) SPH_ROTL32((x), 3) +#define rs2(x) SPH_ROTL32((x), 7) +#define rs3(x) SPH_ROTL32((x), 13) +#define rs4(x) SPH_ROTL32((x), 16) +#define rs5(x) SPH_ROTL32((x), 19) +#define rs6(x) SPH_ROTL32((x), 23) +#define rs7(x) SPH_ROTL32((x), 27) + +/* Message expansion function 1 */ +uint expand32_1(int i, uint *M32, uint *H, uint *Q) +{ + + return (ss1(Q[i - 16]) + ss2(Q[i - 15]) + ss3(Q[i - 14]) + ss0(Q[i - 13]) + + ss1(Q[i - 12]) + ss2(Q[i - 11]) + ss3(Q[i - 10]) + ss0(Q[i - 9]) + + ss1(Q[i - 8]) + ss2(Q[i - 7]) + ss3(Q[i - 6]) + ss0(Q[i - 5]) + + ss1(Q[i - 4]) + ss2(Q[i - 3]) + ss3(Q[i - 2]) + ss0(Q[i - 1]) + + ((i*(0x05555555ul) + SPH_ROTL32(M32[(i - 16) % 16], ((i - 16) % 16) + 1) + SPH_ROTL32(M32[(i - 13) % 16], ((i - 13) % 16) + 1) - SPH_ROTL32(M32[(i - 6) % 16], ((i - 6) % 16) + 1)) ^ H[(i - 16 + 7) % 16])); + +} + +/* Message expansion function 2 */ +uint expand32_2(int i, uint *M32, uint *H, uint *Q) +{ + + return (Q[i - 16] + rs1(Q[i - 15]) + Q[i - 14] + rs2(Q[i - 13]) + + Q[i - 12] + rs3(Q[i - 11]) + Q[i - 10] + rs4(Q[i - 9]) + + Q[i - 8] + rs5(Q[i - 7]) + Q[i - 6] + rs6(Q[i - 5]) + + Q[i - 4] + rs7(Q[i - 3]) + ss4(Q[i - 2]) + ss5(Q[i - 1]) + + ((i*(0x05555555ul) + SPH_ROTL32(M32[(i - 16) % 16], ((i - 16) % 16) + 1) + SPH_ROTL32(M32[(i - 13) % 16], ((i - 13) % 16) + 1) - SPH_ROTL32(M32[(i - 6) % 16], ((i - 6) % 16) + 1)) ^ H[(i - 16 + 7) % 16])); + +} + +void Compression256(uint *M32, uint *H) +{ + + int i; + uint XL32, XH32, Q[32]; + + + Q[0] = (M32[5] ^ H[5]) - (M32[7] ^ H[7]) + (M32[10] ^ H[10]) + (M32[13] ^ H[13]) + (M32[14] ^ H[14]); + Q[1] = (M32[6] ^ H[6]) - (M32[8] ^ H[8]) + (M32[11] ^ H[11]) + (M32[14] ^ H[14]) - (M32[15] ^ H[15]); + Q[2] = (M32[0] ^ H[0]) + (M32[7] ^ H[7]) + (M32[9] ^ H[9]) - (M32[12] ^ H[12]) + (M32[15] ^ H[15]); + Q[3] = (M32[0] ^ H[0]) - (M32[1] ^ H[1]) + (M32[8] ^ H[8]) - (M32[10] ^ H[10]) + (M32[13] ^ H[13]); + Q[4] = (M32[1] ^ H[1]) + (M32[2] ^ H[2]) + (M32[9] ^ H[9]) - (M32[11] ^ H[11]) - (M32[14] ^ H[14]); + Q[5] = (M32[3] ^ H[3]) - (M32[2] ^ H[2]) + (M32[10] ^ H[10]) - (M32[12] ^ H[12]) + (M32[15] ^ H[15]); + Q[6] = (M32[4] ^ H[4]) - (M32[0] ^ H[0]) - (M32[3] ^ H[3]) - (M32[11] ^ H[11]) + (M32[13] ^ H[13]); + Q[7] = (M32[1] ^ H[1]) - (M32[4] ^ H[4]) - (M32[5] ^ H[5]) - (M32[12] ^ H[12]) - (M32[14] ^ H[14]); + Q[8] = (M32[2] ^ H[2]) - (M32[5] ^ H[5]) - (M32[6] ^ H[6]) + (M32[13] ^ H[13]) - (M32[15] ^ H[15]); + Q[9] = (M32[0] ^ H[0]) - (M32[3] ^ H[3]) + (M32[6] ^ H[6]) - (M32[7] ^ H[7]) + (M32[14] ^ H[14]); + Q[10] = (M32[8] ^ H[8]) - (M32[1] ^ H[1]) - (M32[4] ^ H[4]) - (M32[7] ^ H[7]) + (M32[15] ^ H[15]); + Q[11] = (M32[8] ^ H[8]) - (M32[0] ^ H[0]) - (M32[2] ^ H[2]) - (M32[5] ^ H[5]) + (M32[9] ^ H[9]); + Q[12] = (M32[1] ^ H[1]) + (M32[3] ^ H[3]) - (M32[6] ^ H[6]) - (M32[9] ^ H[9]) + (M32[10] ^ H[10]); + Q[13] = (M32[2] ^ H[2]) + (M32[4] ^ H[4]) + (M32[7] ^ H[7]) + (M32[10] ^ H[10]) + (M32[11] ^ H[11]); + Q[14] = (M32[3] ^ H[3]) - (M32[5] ^ H[5]) + (M32[8] ^ H[8]) - (M32[11] ^ H[11]) - (M32[12] ^ H[12]); + Q[15] = (M32[12] ^ H[12]) - (M32[4] ^ H[4]) - (M32[6] ^ H[6]) - (M32[9] ^ H[9]) + (M32[13] ^ H[13]); + + /* Diffuse the differences in every word in a bijective manner with ssi, and then add the values of the previous double pipe.*/ + Q[0] = ss0(Q[0]) + H[1]; + Q[1] = ss1(Q[1]) + H[2]; + Q[2] = ss2(Q[2]) + H[3]; + Q[3] = ss3(Q[3]) + H[4]; + Q[4] = ss4(Q[4]) + H[5]; + Q[5] = ss0(Q[5]) + H[6]; + Q[6] = ss1(Q[6]) + H[7]; + Q[7] = ss2(Q[7]) + H[8]; + Q[8] = ss3(Q[8]) + H[9]; + Q[9] = ss4(Q[9]) + H[10]; + Q[10] = ss0(Q[10]) + H[11]; + Q[11] = ss1(Q[11]) + H[12]; + Q[12] = ss2(Q[12]) + H[13]; + Q[13] = ss3(Q[13]) + H[14]; + Q[14] = ss4(Q[14]) + H[15]; + Q[15] = ss0(Q[15]) + H[0]; + + /* This is the Message expansion or f_1 in the documentation. */ + /* It has 16 rounds. */ + /* Blue Midnight Wish has two tunable security parameters. */ + /* The parameters are named EXPAND_1_ROUNDS and EXPAND_2_ROUNDS. */ + /* The following relation for these parameters should is satisfied: */ + /* EXPAND_1_ROUNDS + EXPAND_2_ROUNDS = 16 */ +#pragma unroll + for (i = 0; i<2; i++) + Q[i + 16] = expand32_1(i + 16, M32, H, Q); + +#pragma unroll + for (i = 2; i<16; i++) + Q[i + 16] = expand32_2(i + 16, M32, H, Q); + + /* Blue Midnight Wish has two temporary cummulative variables that accumulate via XORing */ + /* 16 new variables that are prooduced in the Message Expansion part. */ + XL32 = Q[16] ^ Q[17] ^ Q[18] ^ Q[19] ^ Q[20] ^ Q[21] ^ Q[22] ^ Q[23]; + XH32 = XL32^Q[24] ^ Q[25] ^ Q[26] ^ Q[27] ^ Q[28] ^ Q[29] ^ Q[30] ^ Q[31]; + + + /* This part is the function f_2 - in the documentation */ + + /* Compute the double chaining pipe for the next message block. */ + H[0] = (shl(XH32, 5) ^ shr(Q[16], 5) ^ M32[0]) + (XL32 ^ Q[24] ^ Q[0]); + H[1] = (shr(XH32, 7) ^ shl(Q[17], 8) ^ M32[1]) + (XL32 ^ Q[25] ^ Q[1]); + H[2] = (shr(XH32, 5) ^ shl(Q[18], 5) ^ M32[2]) + (XL32 ^ Q[26] ^ Q[2]); + H[3] = (shr(XH32, 1) ^ shl(Q[19], 5) ^ M32[3]) + (XL32 ^ Q[27] ^ Q[3]); + H[4] = (shr(XH32, 3) ^ Q[20] ^ M32[4]) + (XL32 ^ Q[28] ^ Q[4]); + H[5] = (shl(XH32, 6) ^ shr(Q[21], 6) ^ M32[5]) + (XL32 ^ Q[29] ^ Q[5]); + H[6] = (shr(XH32, 4) ^ shl(Q[22], 6) ^ M32[6]) + (XL32 ^ Q[30] ^ Q[6]); + H[7] = (shr(XH32, 11) ^ shl(Q[23], 2) ^ M32[7]) + (XL32 ^ Q[31] ^ Q[7]); + + H[8] = SPH_ROTL32(H[4], 9) + (XH32 ^ Q[24] ^ M32[8]) + (shl(XL32, 8) ^ Q[23] ^ Q[8]); + H[9] = SPH_ROTL32(H[5], 10) + (XH32 ^ Q[25] ^ M32[9]) + (shr(XL32, 6) ^ Q[16] ^ Q[9]); + H[10] = SPH_ROTL32(H[6], 11) + (XH32 ^ Q[26] ^ M32[10]) + (shl(XL32, 6) ^ Q[17] ^ Q[10]); + H[11] = SPH_ROTL32(H[7], 12) + (XH32 ^ Q[27] ^ M32[11]) + (shl(XL32, 4) ^ Q[18] ^ Q[11]); + H[12] = SPH_ROTL32(H[0], 13) + (XH32 ^ Q[28] ^ M32[12]) + (shr(XL32, 3) ^ Q[19] ^ Q[12]); + H[13] = SPH_ROTL32(H[1], 14) + (XH32 ^ Q[29] ^ M32[13]) + (shr(XL32, 4) ^ Q[20] ^ Q[13]); + H[14] = SPH_ROTL32(H[2], 15) + (XH32 ^ Q[30] ^ M32[14]) + (shr(XL32, 7) ^ Q[21] ^ Q[14]); + H[15] = SPH_ROTL32(H[3], 16) + (XH32 ^ Q[31] ^ M32[15]) + (shr(XL32, 2) ^ Q[22] ^ Q[15]); + +} diff --git a/kernel/credits.cl b/kernel/credits.cl new file mode 100644 index 000000000..19cbea67f --- /dev/null +++ b/kernel/credits.cl @@ -0,0 +1,232 @@ +/* +* "credits" kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* +* Copyright (c) 2015 djm34 +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ +#if !defined(cl_khr_byte_addressable_store) +#error "Device does not support unaligned stores" +#endif + + +#define ROL32(x, n) rotate(x, (uint) n) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) +#define SWAP64(x) as_ulong(as_uchar8(x).s32107654) /// hmm... + +#define SHR(x, n) ((x) >> n) + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} + +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) + +/// generic sha transform +inline uint8 sha256_Transform(uint16 data, uint8 state) +{ + uint temp1; + uint8 res = state; + uint W0 = data.s0; + uint W1 = data.s1; + uint W2 = data.s2; + uint W3 = data.s3; + uint W4 = data.s4; + uint W5 = data.s5; + uint W6 = data.s6; + uint W7 = data.s7; + uint W8 = data.s8; + uint W9 = data.s9; + uint W10 = data.sA; + uint W11 = data.sB; + uint W12 = data.sC; + uint W13 = data.sD; + uint W14 = data.sE; + uint W15 = data.sF; + +#define v0 res.s0 +#define v1 res.s1 +#define v2 res.s2 +#define v3 res.s3 +#define v4 res.s4 +#define v5 res.s5 +#define v6 res.s6 +#define v7 res.s7 + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); +#undef v0 +#undef v1 +#undef v2 +#undef v3 +#undef v4 +#undef v5 +#undef v6 +#undef v7 + return (res + state); +} + + + +static __constant uint8 H256 = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, + 0xA54FF53A, 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19 +}; + + +static __constant uint8 pad_data = +{ + 0x00000000, 0x00000000, 0x80000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000540 +}; + +static __constant uint8 pad_state = +{ + 0x80000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000100 +}; + + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uchar* restrict input, __global uint* restrict output,const ulong target, uint8 midstate ) +{ + + + uint nonce = get_global_id(0); + uint16 in; + uint8 state1; + + in.lo = ((__global const uint8 *)input)[4]; + in.hi = pad_data; + in.hi.s0 = ((__global const uint *)input)[40]; + in.hi.s1 = ((__global const uint *)input)[41]; + in.s3 = nonce; + state1 = sha256_Transform(in, midstate); + in.lo = state1; + in.hi = pad_state; + state1 = sha256_Transform(in,H256); + +if (SWAP64(state1.s67) <= target) + output[atomic_inc(output + 0xFF)] = nonce; + +} + diff --git a/kernel/cubehash256.cl b/kernel/cubehash256.cl new file mode 100644 index 000000000..9bc4c6545 --- /dev/null +++ b/kernel/cubehash256.cl @@ -0,0 +1,132 @@ +// cubehash256 +// djm34 2015 based on ccminer cubehash512 + +#define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */ +#define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */ + + +#define LROT(x, bits) rotate( x,(uint) bits) + + +#define ROTATEUPWARDS7(a) LROT(a,7) +#define ROTATEUPWARDS11(a) LROT(a,11) + +#define SWAP(a,b) { uint u = a; a = b; b = u; } + +inline void rrounds(uint x[2][2][2][2][2]) +{ + int r; + int j; + int k; + int l; + int m; + +//#pragma unroll 2 + for (r = 0; r < CUBEHASH_ROUNDS; ++r) { + + /* "add x_0jklm into x_1jklmn modulo 2^32" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + x[1][j][k][l][m] += x[0][j][k][l][m]; + + /* "rotate x_0jklm upwards by 7 bits" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + x[0][j][k][l][m] = ROTATEUPWARDS7(x[0][j][k][l][m]); + + /* "swap x_00klm with x_01klm" */ +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + SWAP(x[0][0][k][l][m], x[0][1][k][l][m]) + + /* "xor x_1jklm into x_0jklm" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + x[0][j][k][l][m] ^= x[1][j][k][l][m]; + + /* "swap x_1jk0m with x_1jk1m" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + SWAP(x[1][j][k][0][m], x[1][j][k][1][m]) + + /* "add x_0jklm into x_1jklm modulo 2^32" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + x[1][j][k][l][m] += x[0][j][k][l][m]; + + /* "rotate x_0jklm upwards by 11 bits" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + x[0][j][k][l][m] = ROTATEUPWARDS11(x[0][j][k][l][m]); + + /* "swap x_0j0lm with x_0j1lm" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + SWAP(x[0][j][0][l][m], x[0][j][1][l][m]) + + /* "xor x_1jklm into x_0jklm" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) +//#pragma unroll 2 + for (m = 0; m < 2; ++m) + x[0][j][k][l][m] ^= x[1][j][k][l][m]; + + /* "swap x_1jkl0 with x_1jkl1" */ +//#pragma unroll 2 + for (j = 0; j < 2; ++j) +//#pragma unroll 2 + for (k = 0; k < 2; ++k) +//#pragma unroll 2 + for (l = 0; l < 2; ++l) + SWAP(x[1][j][k][l][0], x[1][j][k][l][1]) + + } +} + + diff --git a/kernel/lyra2rev2.cl b/kernel/lyra2rev2.cl new file mode 100644 index 000000000..0fe0440d6 --- /dev/null +++ b/kernel/lyra2rev2.cl @@ -0,0 +1,525 @@ +/* + * Lyra2RE kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * Copyright (c) 2014 djm34 + * Copyright (c) 2014 James Lovejoy + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author djm34 + */ +// typedef unsigned int uint; +#pragma OPENCL EXTENSION cl_amd_printf : enable + +#ifndef LYRA2RE_CL +#define LYRA2RE_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long sph_u64; +typedef long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) + +//#define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +//#define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +//#define SPH_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +//#define SPH_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +#define SPH_ROTL32(x,n) rotate(x,(uint)n) //faster with driver 14.6 +#define SPH_ROTR32(x,n) rotate(x,(uint)(32-n)) +#define SPH_ROTL64(x,n) rotate(x,(ulong)n) +#define SPH_ROTR64(x,n) rotate(x,(ulong)(64-n)) +static inline sph_u64 ror64(sph_u64 vw, unsigned a) { + uint2 result; + uint2 v = as_uint2(vw); + unsigned n = (unsigned)(64 - a); + if (n == 32) { return as_ulong((uint2)(v.y, v.x)); } + if (n < 32) { + result.y = ((v.y << (n)) | (v.x >> (32 - n))); + result.x = ((v.x << (n)) | (v.y >> (32 - n))); + } + else { + result.y = ((v.x << (n - 32)) | (v.y >> (64 - n))); + result.x = ((v.y << (n - 32)) | (v.x >> (64 - n))); + } + return as_ulong(result); +} + +//#define SPH_ROTR64(l,n) ror64(l,n) +#define memshift 3 +#include "blake256.cl" +#include "lyra2v2.cl" +#include "keccak1600.cl" +#include "skein256.cl" +#include "cubehash.cl" +#include "bmw256.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) +//#define SWAP8(x) as_ulong(as_uchar8(x).s32107654) +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); + #define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC32LE(x) (*(const __global sph_u32 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC64LE(x) (*(const __global sph_u64 *) (x)); +#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); +#endif + +typedef union { + unsigned char h1[32]; + uint h4[8]; + ulong h8[4]; +} hash_t; + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search( + __global uchar* hashes, + // precalc hash from fisrt part of message + const uint h0, + const uint h1, + const uint h2, + const uint h3, + const uint h4, + const uint h5, + const uint h6, + const uint h7, + // last 12 bytes of original message + const uint in16, + const uint in17, + const uint in18 +) +{ + uint gid = get_global_id(0); + __global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + +// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + unsigned int h[8]; + unsigned int m[16]; + unsigned int v[16]; + + +h[0]=h0; +h[1]=h1; +h[2]=h2; +h[3]=h3; +h[4]=h4; +h[5]=h5; +h[6]=h6; +h[7]=h7; +// compress 2nd round + m[0] = in16; + m[1] = in17; + m[2] = in18; + m[3] = SWAP4(gid); + + for (int i = 4; i < 16; i++) {m[i] = c_Padding[i];} + + for (int i = 0; i < 8; i++) {v[i] = h[i];} + + v[8] = c_u256[0]; + v[9] = c_u256[1]; + v[10] = c_u256[2]; + v[11] = c_u256[3]; + v[12] = c_u256[4] ^ 640; + v[13] = c_u256[5] ^ 640; + v[14] = c_u256[6]; + v[15] = c_u256[7]; + + for (int r = 0; r < 14; r++) { + GS(0, 4, 0x8, 0xC, 0x0); + GS(1, 5, 0x9, 0xD, 0x2); + GS(2, 6, 0xA, 0xE, 0x4); + GS(3, 7, 0xB, 0xF, 0x6); + GS(0, 5, 0xA, 0xF, 0x8); + GS(1, 6, 0xB, 0xC, 0xA); + GS(2, 7, 0x8, 0xD, 0xC); + GS(3, 4, 0x9, 0xE, 0xE); + } + + for (int i = 0; i < 16; i++) { + int j = i & 7; + h[j] ^= v[i];} + +for (int i=0;i<8;i++) {hash->h4[i]=SWAP4(h[i]);} + +barrier(CLK_LOCAL_MEM_FENCE); + +} + +// keccak256 + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global uchar* hashes) +{ + uint gid = get_global_id(0); + // __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + sph_u64 keccak_gpu_state[25]; + + for (int i = 0; i<25; i++) { + if (i<4) { keccak_gpu_state[i] = hash->h8[i]; } + else { keccak_gpu_state[i] = 0; } + } + keccak_gpu_state[4] = 0x0000000000000001; + keccak_gpu_state[16] = 0x8000000000000000; + + keccak_block(keccak_gpu_state); + for (int i = 0; i<4; i++) { hash->h8[i] = keccak_gpu_state[i]; } +barrier(CLK_LOCAL_MEM_FENCE); + + + +} + +// cubehash256 + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global uchar* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + + sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71; + sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63; + sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696; + sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C; + sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00; + sph_u32 xf = 0x40E5FBAB; + + sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5; + sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549; + sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285; + sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD; + sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6; + sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A; + + x0 ^= (hash->h4[0]); + x1 ^= (hash->h4[1]); + x2 ^= (hash->h4[2]); + x3 ^= (hash->h4[3]); + x4 ^= (hash->h4[4]); + x5 ^= (hash->h4[5]); + x6 ^= (hash->h4[6]); + x7 ^= (hash->h4[7]); + + + SIXTEEN_ROUNDS; + x0 ^= 0x80; + SIXTEEN_ROUNDS; + xv ^= 0x01; + for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS; + + hash->h4[0] = x0; + hash->h4[1] = x1; + hash->h4[2] = x2; + hash->h4[3] = x3; + hash->h4[4] = x4; + hash->h4[5] = x5; + hash->h4[6] = x6; + hash->h4[7] = x7; + + + barrier(CLK_GLOBAL_MEM_FENCE); + +} + + +/// lyra2 algo + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global uchar* hashes,__global uchar* matrix ) +{ + uint gid = get_global_id(0); + // __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + __global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong4 *DMatrix = (__global ulong4 *)(matrix + (4 * memshift * 4 * 4 * 8 * (get_global_id(0) % MAX_GLOBAL_THREADS))); + +// uint offset = (4 * memshift * 4 * 4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))/32; + ulong4 state[4]; + + state[0].x = hash->h8[0]; //password + state[0].y = hash->h8[1]; //password + state[0].z = hash->h8[2]; //password + state[0].w = hash->h8[3]; //password + state[1] = state[0]; + state[2] = (ulong4)(0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL, 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL); + state[3] = (ulong4)(0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL, 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL); + for (int i = 0; i<12; i++) { round_lyra(state); } + + state[0] ^= (ulong4)(0x20,0x20,0x20,0x01); + state[1] ^= (ulong4)(0x04,0x04,0x80,0x0100000000000000); + + for (int i = 0; i<12; i++) { round_lyra(state); } + + + uint ps1 = (memshift * 3); +//#pragma unroll 4 + for (int i = 0; i < 4; i++) + { + uint s1 = ps1 - memshift * i; + for (int j = 0; j < 3; j++) + (DMatrix)[j+s1] = state[j]; + + round_lyra(state); + } + + reduceDuplexf(state,DMatrix); + + reduceDuplexRowSetupf(1, 0, 2,state, DMatrix); + reduceDuplexRowSetupf(2, 1, 3, state,DMatrix); + + + uint rowa; + uint prev = 3; + for (uint i = 0; i<4; i++) { + rowa = state[0].x & 3; + reduceDuplexRowf(prev, rowa, i, state, DMatrix); + prev = i; + } + + + + uint shift = (memshift * 4 * rowa); + + for (int j = 0; j < 3; j++) + state[j] ^= (DMatrix)[j+shift]; + + for (int i = 0; i < 12; i++) + round_lyra(state); +////////////////////////////////////// + + + for (int i = 0; i<4; i++) {hash->h8[i] = ((ulong*)state)[i];} +barrier(CLK_LOCAL_MEM_FENCE); + + + +} + +//skein256 + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global uchar* hashes) +{ + uint gid = get_global_id(0); + // __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + __global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + + sph_u64 h[9]; + sph_u64 t[3]; + sph_u64 dt0,dt1,dt2,dt3; + sph_u64 p0, p1, p2, p3, p4, p5, p6, p7; + h[8] = skein_ks_parity; + + for (int i = 0; i<8; i++) { + h[i] = SKEIN_IV512_256[i]; + h[8] ^= h[i];} + + t[0]=t12[0]; + t[1]=t12[1]; + t[2]=t12[2]; + + dt0=hash->h8[0]; + dt1=hash->h8[1]; + dt2=hash->h8[2]; + dt3=hash->h8[3]; + + p0 = h[0] + dt0; + p1 = h[1] + dt1; + p2 = h[2] + dt2; + p3 = h[3] + dt3; + p4 = h[4]; + p5 = h[5] + t[0]; + p6 = h[6] + t[1]; + p7 = h[7]; + + #pragma unroll + for (int i = 1; i<19; i+=2) {Round_8_512(p0,p1,p2,p3,p4,p5,p6,p7,i);} + p0 ^= dt0; + p1 ^= dt1; + p2 ^= dt2; + p3 ^= dt3; + + h[0] = p0; + h[1] = p1; + h[2] = p2; + h[3] = p3; + h[4] = p4; + h[5] = p5; + h[6] = p6; + h[7] = p7; + h[8] = skein_ks_parity; + + for (int i = 0; i<8; i++) { h[8] ^= h[i]; } + + t[0] = t12[3]; + t[1] = t12[4]; + t[2] = t12[5]; + p5 += t[0]; //p5 already equal h[5] + p6 += t[1]; + + #pragma unroll + for (int i = 1; i<19; i+=2) { Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, i); } + + hash->h8[0] = p0; + hash->h8[1] = p1; + hash->h8[2] = p2; + hash->h8[3] = p3; + barrier(CLK_LOCAL_MEM_FENCE); + +} + +//cubehash + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search5(__global uchar* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71; + sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63; + sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696; + sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C; + sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00; + sph_u32 xf = 0x40E5FBAB; + + sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5; + sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549; + sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285; + sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD; + sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6; + sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A; + + x0 ^= (hash->h4[0]); + x1 ^= (hash->h4[1]); + x2 ^= (hash->h4[2]); + x3 ^= (hash->h4[3]); + x4 ^= (hash->h4[4]); + x5 ^= (hash->h4[5]); + x6 ^= (hash->h4[6]); + x7 ^= (hash->h4[7]); + + + SIXTEEN_ROUNDS; + x0 ^= 0x80; + SIXTEEN_ROUNDS; + xv ^= 0x01; + for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS; + + hash->h4[0] = x0; + hash->h4[1] = x1; + hash->h4[2] = x2; + hash->h4[3] = x3; + hash->h4[4] = x4; + hash->h4[5] = x5; + hash->h4[6] = x6; + hash->h4[7] = x7; + + + barrier(CLK_GLOBAL_MEM_FENCE); + +} + + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search6(__global uchar* hashes, __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + __global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + uint dh[16] = { + 0x40414243, 0x44454647, + 0x48494A4B, 0x4C4D4E4F, + 0x50515253, 0x54555657, + 0x58595A5B, 0x5C5D5E5F, + 0x60616263, 0x64656667, + 0x68696A6B, 0x6C6D6E6F, + 0x70717273, 0x74757677, + 0x78797A7B, 0x7C7D7E7F + }; + uint final_s[16] = { + 0xaaaaaaa0, 0xaaaaaaa1, 0xaaaaaaa2, + 0xaaaaaaa3, 0xaaaaaaa4, 0xaaaaaaa5, + 0xaaaaaaa6, 0xaaaaaaa7, 0xaaaaaaa8, + 0xaaaaaaa9, 0xaaaaaaaa, 0xaaaaaaab, + 0xaaaaaaac, 0xaaaaaaad, 0xaaaaaaae, + 0xaaaaaaaf + }; + + uint message[16]; + for (int i = 0; i<8; i++) message[i] = hash->h4[i]; + for (int i = 9; i<14; i++) message[i] = 0; + message[8]= 0x80; + message[14]=0x100; + message[15]=0; + + Compression256(message, dh); + Compression256(dh, final_s); + barrier(CLK_LOCAL_MEM_FENCE); + + + bool result = ( ((ulong*)final_s)[7] <= target); + if (result) { + output[atomic_inc(output + 0xFF)] = SWAP4(gid); + } + +} + + +#endif // LYRA2RE_CL \ No newline at end of file diff --git a/kernel/lyra2v2.cl b/kernel/lyra2v2.cl new file mode 100644 index 000000000..be0f1f288 --- /dev/null +++ b/kernel/lyra2v2.cl @@ -0,0 +1,184 @@ +/* +* Lyra2 kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* Copyright (c) 2014 djm34 +* +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ + + + +#define ROTL64(x,n) rotate(x,(ulong)n) +#define ROTR64(x,n) rotate(x,(ulong)(64-n)) +#define SWAP32(x) as_ulong(as_uint2(x).s10) +#define SWAP24(x) as_ulong(as_uchar8(x).s34567012) +#define SWAP16(x) as_ulong(as_uchar8(x).s23456701) + +#define G(a,b,c,d) \ + do { \ + a += b; d ^= a; d = SWAP32(d); \ + c += d; b ^= c; b = ROTR64(b,24); \ + a += b; d ^= a; d = ROTR64(d,16); \ + c += d; b ^= c; b = ROTR64(b, 63); \ +\ + } while (0) + +#define G_old(a,b,c,d) \ + do { \ + a += b; d ^= a; d = ROTR64(d, 32); \ + c += d; b ^= c; b = ROTR64(b, 24); \ + a += b; d ^= a; d = ROTR64(d, 16); \ + c += d; b ^= c; b = ROTR64(b, 63); \ +\ + } while (0) + + +/*One Round of the Blake2b's compression function*/ + +#define round_lyra(s) \ + do { \ + G(s[0].x, s[1].x, s[2].x, s[3].x); \ + G(s[0].y, s[1].y, s[2].y, s[3].y); \ + G(s[0].z, s[1].z, s[2].z, s[3].z); \ + G(s[0].w, s[1].w, s[2].w, s[3].w); \ + G(s[0].x, s[1].y, s[2].z, s[3].w); \ + G(s[0].y, s[1].z, s[2].w, s[3].x); \ + G(s[0].z, s[1].w, s[2].x, s[3].y); \ + G(s[0].w, s[1].x, s[2].y, s[3].z); \ + } while(0) + + + +void reduceDuplexf(ulong4* state ,__global ulong4* DMatrix) +{ + + ulong4 state1[3]; + uint ps1 = 0; + uint ps2 = (memshift * 3 + memshift * 4); +//#pragma unroll 4 + for (int i = 0; i < 4; i++) + { + uint s1 = ps1 + i*memshift; + uint s2 = ps2 - i*memshift; + + for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; + + for (int j = 0; j < 3; j++) state[j] ^= state1[j]; + round_lyra(state); + for (int j = 0; j < 3; j++) state1[j] ^= state[j]; + + for (int j = 0; j < 3; j++) (DMatrix)[j + s2] = state1[j]; + } + +} + + + +void reduceDuplexRowf(uint rowIn,uint rowInOut,uint rowOut,ulong4 * state, __global ulong4 * DMatrix) +{ + +ulong4 state1[3], state2[3]; +uint ps1 = (memshift * 4 * rowIn); +uint ps2 = (memshift * 4 * rowInOut); +uint ps3 = (memshift * 4 * rowOut); + + + for (int i = 0; i < 4; i++) + { + uint s1 = ps1 + i*memshift; + uint s2 = ps2 + i*memshift; + uint s3 = ps3 + i*memshift; + + + for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; + + for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2]; + + for (int j = 0; j < 3; j++) state1[j] += state2[j]; + + for (int j = 0; j < 3; j++) state[j] ^= state1[j]; + + + round_lyra(state); + + ((ulong*)state2)[0] ^= ((ulong*)state)[11]; + for (int j = 0; j < 11; j++) + ((ulong*)state2)[j + 1] ^= ((ulong*)state)[j]; + + if (rowInOut != rowOut) { + for (int j = 0; j < 3; j++) + (DMatrix)[j + s2] = state2[j]; + for (int j = 0; j < 3; j++) + (DMatrix)[j + s3] ^= state[j]; + } + else { + for (int j = 0; j < 3; j++) + state2[j] ^= state[j]; + for (int j = 0; j < 3; j++) + (DMatrix)[j + s2] = state2[j]; + } + + } + } + + + + +void reduceDuplexRowSetupf(uint rowIn, uint rowInOut, uint rowOut, ulong4 *state, __global ulong4* DMatrix) { + + ulong4 state2[3], state1[3]; + uint ps1 = (memshift * 4 * rowIn); + uint ps2 = (memshift * 4 * rowInOut); + uint ps3 = (memshift * 3 + memshift * 4 * rowOut); + + for (int i = 0; i < 4; i++) + { + uint s1 = ps1 + i*memshift; + uint s2 = ps2 + i*memshift; + uint s3 = ps3 - i*memshift; + + for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; + + for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2]; + for (int j = 0; j < 3; j++) { + ulong4 tmp = state1[j] + state2[j]; + state[j] ^= tmp; + } + round_lyra(state); + + for (int j = 0; j < 3; j++) { + state1[j] ^= state[j]; + (DMatrix)[j + s3] = state1[j]; + } + + ((ulong*)state2)[0] ^= ((ulong*)state)[11]; + for (int j = 0; j < 11; j++) + ((ulong*)state2)[j + 1] ^= ((ulong*)state)[j]; + for (int j = 0; j < 3; j++) + (DMatrix)[j + s2] = state2[j]; + } + } + diff --git a/kernel/yescrypt-multi.cl b/kernel/yescrypt-multi.cl new file mode 100644 index 000000000..3af7b28ac --- /dev/null +++ b/kernel/yescrypt-multi.cl @@ -0,0 +1,314 @@ +/* +* "yescrypt" kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* +* Copyright (c) 2015 djm34 +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ +#if !defined(cl_khr_byte_addressable_store) +#error "Device does not support unaligned stores" +#endif + +#include "yescrypt_essential.cl" + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, __global uchar* buff3, const uint target) +{ + + __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS))); + __global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + + + uint nonce = (get_global_id(0)); + uint data[20]; + uint16 in; + uint8 state1, state2; +// uint8 sha256tokeep; + +// ulong16 Bdev[8]; // will require an additional buffer + ((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; + ((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; +// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } + // if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); } + uint8 passwd = sha256_80(data, nonce); + //pbkdf + in.lo = pad1.lo ^ passwd; + in.hi = pad1.hi; + state1 = sha256_Transform(in, H256); + + in.lo = pad2.lo ^ passwd; + in.hi = pad2.hi; + state2 = sha256_Transform(in, H256); + + in = ((uint16*)data)[0]; + state1 = sha256_Transform(in, state1); +#pragma unroll 1 + for (int i = 0; i<8; i++) + { + uint16 result; + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 1; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.lo = swapvec(sha256_Transform(in, state2)); + if (i == 0) sha256tokeep[0] = result.lo; + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 2; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.hi = swapvec(sha256_Transform(in, state2)); + Bdev[i].lo = as_ulong8(shuffle(result)); +// Bdev[i].lo = as_ulong8(result); + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 3; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.lo = swapvec(sha256_Transform(in, state2)); + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 4; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.hi = swapvec(sha256_Transform(in, state2)); + + + Bdev[i].hi = as_ulong8(shuffle(result)); +// Bdev[i].hi = as_ulong8(result); + } + + //mixing1 + + prevstate[0] = Bdev[0]; + Bdev[0] = blockmix_salsa8_small2(Bdev[0]); + prevstate[1] = Bdev[0]; + Bdev[0] = blockmix_salsa8_small2(Bdev[0]); + + uint n = 1; +#pragma unroll 1 + for (uint i = 2; i < 64; i++) + { + + prevstate[i] = Bdev[0]; + + if ((i&(i - 1)) == 0) n = n << 1; + + uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1); + + j += i - n; + Bdev[0] ^= prevstate[j]; + + Bdev[0] = blockmix_salsa8_small2(Bdev[0]); + } + + +} + + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global uchar *buffer1, __global uchar *buffer2) +{ +} + + + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) +{ + + __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + + for (int i = 0; i<8; i++) + hashbuffer[i] = Bdev[i]; + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); + + + for (int i = 0; i<8; i++) + hashbuffer[i + 8] = Bdev[i]; + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); + int n = 1; +#pragma unroll 1 + for (int i = 2; i < 2048; i ++) + { + + for (int k = 0; k<8; k++) + (hashbuffer + 8 * i)[k] = Bdev[k]; + + + if ((i&(i - 1)) == 0) n = n << 1; + + uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1); + j += i - n; + + for (int k = 0; k < 8; k++) + Bdev[k] ^= (hashbuffer + 8 * j)[k]; + + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); + } +} + +/* +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global uchar *buffer1, __global uchar *buffer2) +{ +} +*/ + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) +{ + + __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + +#pragma unroll 1 + for (int z = 0; z < 684; z++) + { + + uint j = as_uint2(Bdev[7].hi.s0).x & 2047; + + + for (int k = 0; k < 8; k++) + Bdev[k] ^= (hashbuffer + 8 * j)[k]; + + if (z<682) + for (int k = 0; k<8; k++) + (hashbuffer + 8 * j)[k] = Bdev[k]; + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); +//// + } + +} + +/* +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search5(__global uchar *buffer1, __global uchar *buffer2) +{ +} +*/ + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global const uchar* restrict input, __global uint* restrict output, __global uchar *buff2,__global uchar* buff3, const uint target) +{ + + __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); + + uint nonce = (get_global_id(0)); + + + uint data[20]; + ((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; + ((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; +// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } + uint8 swpass = swapvec(sha256tokeep[0]); + uint16 in; + uint8 state1,state2; + in.lo = pad1.lo ^ swpass; + in.hi = pad1.hi; + + + state1 = sha256_Transform(in, H256); + + in.lo = pad2.lo ^ swpass; + in.hi = pad2.hi; + state2 = sha256_Transform(in, H256); + +#pragma unroll 1 + for (int i = 0; i<8; i++) { + in = unshuffle(Bdev[i].lo); + in = swapvec16(in); + state1 = sha256_Transform(in, state1); + in = unshuffle(Bdev[i].hi); + in = swapvec16(in); + state1 = sha256_Transform(in, state1); + } + in = pad5; + state1 = sha256_Transform(in, state1); + in.lo = state1; + in.hi = pad4; + uint8 res = sha256_Transform(in, state2); + + //hmac and final sha + + in.lo = pad1.lo ^ res; + in.hi = pad1.hi; + state1 = sha256_Transform(in, H256); + in.lo = pad2.lo ^ res; + in.hi = pad2.hi; + state2 = sha256_Transform(in, H256); + in = ((uint16*)data)[0]; + state1 = sha256_Transform(in, state1); + in = padsha80; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = get_global_id(0); + in.sf = 0x480; + state1 = sha256_Transform(in, state1); + in.lo = state1; + in.hi = pad4; + state1 = sha256_Transform(in, state2); + // state2 = H256; + in.lo = state1; + in.hi = pad4; + in.sf = 0x100; + res = sha256_Transform(in, H256); + + + if (SWAP32(res.s7) <= (target)) + output[atomic_inc(output + 0xFF)] = (nonce); + +} diff --git a/kernel/yescrypt.cl b/kernel/yescrypt.cl new file mode 100644 index 000000000..0a94ebcab --- /dev/null +++ b/kernel/yescrypt.cl @@ -0,0 +1,253 @@ +/* +* "yescrypt" kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* +* Copyright (c) 2015 djm34 +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ +#if !defined(cl_khr_byte_addressable_store) +#error "Device does not support unaligned stores" +#endif + +#include "yescrypt_essential.cl" + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, const uint target) +{ + + __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); + + + + uint nonce = (get_global_id(0)); + uint data[20]; + uint16 in; + uint8 state1, state2; + uint8 sha256tokeep; + + ((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; + ((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; + for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } + // if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); } + uint8 passwd = sha256_80(data, nonce); + //pbkdf + in.lo = pad1.lo ^ passwd; + in.hi = pad1.hi; + state1 = sha256_Transform(in, H256); + + in.lo = pad2.lo ^ passwd; + in.hi = pad2.hi; + state2 = sha256_Transform(in, H256); + + in = ((uint16*)data)[0]; + state1 = sha256_Transform(in, state1); +#pragma unroll 1 + for (int i = 0; i<8; i++) + { + uint16 result; + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 1; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.lo = swapvec(sha256_Transform(in, state2)); + if (i == 0) sha256tokeep = result.lo; + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 2; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.hi = swapvec(sha256_Transform(in, state2)); + Bdev[i].lo = as_ulong8(shuffle(result)); + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 3; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.lo = swapvec(sha256_Transform(in, state2)); + in = pad3; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = nonce; + in.s4 = 4 * i + 4; + in.lo = sha256_Transform(in, state1); + in.hi = pad4; + result.hi = swapvec(sha256_Transform(in, state2)); + + + Bdev[i].hi = as_ulong8(shuffle(result)); + } + + //mixing1 + + prevstate[0] = Bdev[0]; + Bdev[0] = blockmix_salsa8_small2(Bdev[0]); + prevstate[1] = Bdev[0]; + Bdev[0] = blockmix_salsa8_small2(Bdev[0]); + + uint n = 1; +#pragma unroll 1 + for (uint i = 2; i < 64; i++) + { + + prevstate[i] = Bdev[0]; + + if ((i&(i - 1)) == 0) n = n << 1; + + uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1); + + j += i - n; + Bdev[0] ^= prevstate[j]; + + Bdev[0] = blockmix_salsa8_small2(Bdev[0]); + } + + + for (int i = 0; i<8; i++) + hashbuffer[i] = Bdev[i]; + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); + + + for (int i = 0; i<8; i++) + hashbuffer[i + 8] = Bdev[i]; + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); + n = 1; +#pragma unroll 1 + for (int i = 2; i < 2048; i++) + { + + for (int k = 0; k<8; k++) + (hashbuffer + 8 * i)[k] = Bdev[k]; + + + if ((i&(i - 1)) == 0) n = n << 1; + + uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1); + j += i - n; + + for (int k = 0; k < 8; k++) + Bdev[k] ^= (hashbuffer + 8 * j)[k]; + + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); + } + + +#pragma unroll 1 + for (int z = 0; z < 684; z++) + { + + uint j = as_uint2(Bdev[7].hi.s0).x & 2047; + + + for (int k = 0; k < 8; k++) + Bdev[k] ^= (hashbuffer + 8 * j)[k]; + + if (z<682) + for (int k = 0; k<8; k++) + (hashbuffer + 8 * j)[k] = Bdev[k]; + + blockmix_pwxform((__global ulong8*)Bdev, prevstate); + //// + } + + + + uint8 swpass = swapvec(sha256tokeep); +// uint16 in; +// uint8 state1, state2; + in.lo = pad1.lo ^ swpass; + in.hi = pad1.hi; + + + state1 = sha256_Transform(in, H256); + + in.lo = pad2.lo ^ swpass; + in.hi = pad2.hi; + state2 = sha256_Transform(in, H256); + +#pragma unroll 1 + for (int i = 0; i<8; i++) { + in = unshuffle(Bdev[i].lo); + in = swapvec16(in); + state1 = sha256_Transform(in, state1); + in = unshuffle(Bdev[i].hi); + in = swapvec16(in); + state1 = sha256_Transform(in, state1); + } + in = pad5; + state1 = sha256_Transform(in, state1); + in.lo = state1; + in.hi = pad4; + uint8 res = sha256_Transform(in, state2); + + //hmac and final sha + + in.lo = pad1.lo ^ res; + in.hi = pad1.hi; + state1 = sha256_Transform(in, H256); + in.lo = pad2.lo ^ res; + in.hi = pad2.hi; + state2 = sha256_Transform(in, H256); + in = ((uint16*)data)[0]; + state1 = sha256_Transform(in, state1); + in = padsha80; + in.s0 = data[16]; + in.s1 = data[17]; + in.s2 = data[18]; + in.s3 = get_global_id(0); + in.sf = 0x480; + state1 = sha256_Transform(in, state1); + in.lo = state1; + in.hi = pad4; + state1 = sha256_Transform(in, state2); + // state2 = H256; + in.lo = state1; + in.hi = pad4; + in.sf = 0x100; + res = sha256_Transform(in, H256); + + + if (SWAP32(res.s7) <= (target)) + output[atomic_inc(output + 0xFF)] = (nonce); + +} + diff --git a/kernel/yescrypt_essential.cl b/kernel/yescrypt_essential.cl new file mode 100644 index 000000000..ba1816a8e --- /dev/null +++ b/kernel/yescrypt_essential.cl @@ -0,0 +1,760 @@ +/* +* "yescrypt" kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* +* Copyright (c) 2015 djm34 +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ + +#define ROL32(x, n) rotate(x, (uint) n) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) +//#define ROL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define HASH_MEMORY 4096 + + +#define SALSA(a,b,c,d) do { \ + t =a+d; b^=ROL32(t, 7U); \ + t =b+a; c^=ROL32(t, 9U); \ + t =c+b; d^=ROL32(t, 13U); \ + t =d+c; a^=ROL32(t, 18U); \ +} while(0) + + +#define SALSA_CORE(state) do { \ +\ +SALSA(state.s0,state.s4,state.s8,state.sc); \ +SALSA(state.s5,state.s9,state.sd,state.s1); \ +SALSA(state.sa,state.se,state.s2,state.s6); \ +SALSA(state.sf,state.s3,state.s7,state.sb); \ +SALSA(state.s0,state.s1,state.s2,state.s3); \ +SALSA(state.s5,state.s6,state.s7,state.s4); \ +SALSA(state.sa,state.sb,state.s8,state.s9); \ +SALSA(state.sf,state.sc,state.sd,state.se); \ + } while(0) + +#define uSALSA_CORE(state) do { \ +\ +SALSA(state.s0,state.s4,state.s8,state.sc); \ +SALSA(state.s1,state.s5,state.s9,state.sd); \ +SALSA(state.s2,state.s6,state.sa,state.se); \ +SALSA(state.s3,state.s7,state.sb,state.sf); \ +SALSA(state.s0,state.sd,state.sa,state.s7); \ +SALSA(state.s1,state.se,state.sb,state.s4); \ +SALSA(state.s2,state.sf,state.s8,state.s5); \ +SALSA(state.s3,state.sc,state.s9,state.s6); \ +} while(0) + + +#define unshuffle(state) (as_uint16(state).s0da741eb852fc963) + +#define shuffle(state) (as_uint16(state).s05af49e38d27c16b) + +static __constant uint16 pad1 = +{ + 0x36363636, 0x36363636, 0x36363636, 0x36363636, + 0x36363636, 0x36363636, 0x36363636, 0x36363636, + 0x36363636, 0x36363636, 0x36363636, 0x36363636, + 0x36363636, 0x36363636, 0x36363636, 0x36363636 +}; + +static __constant uint16 pad2 = +{ + 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, + 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, + 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, + 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c +}; + +static __constant uint16 pad5 = +{ + 0x00000001, 0x80000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00002220 +}; + +static __constant uint16 pad3 = +{ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x80000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x000004a0 +}; + +static __constant uint16 padsha80 = +{ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x80000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000280 +}; + +static __constant uint8 pad4 = +{ + 0x80000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000300 +}; + + + +static __constant uint8 H256 = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, + 0xA54FF53A, 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19 +}; + +inline uint8 swapvec(uint8 buf) +{ + uint8 vec; + vec.s0 = SWAP32(buf.s0); + vec.s1 = SWAP32(buf.s1); + vec.s2 = SWAP32(buf.s2); + vec.s3 = SWAP32(buf.s3); + vec.s4 = SWAP32(buf.s4); + vec.s5 = SWAP32(buf.s5); + vec.s6 = SWAP32(buf.s6); + vec.s7 = SWAP32(buf.s7); + return vec; +} + + + +inline uint16 swapvec16(uint16 buf) +{ + uint16 vec; + vec.s0 = SWAP32(buf.s0); + vec.s1 = SWAP32(buf.s1); + vec.s2 = SWAP32(buf.s2); + vec.s3 = SWAP32(buf.s3); + vec.s4 = SWAP32(buf.s4); + vec.s5 = SWAP32(buf.s5); + vec.s6 = SWAP32(buf.s6); + vec.s7 = SWAP32(buf.s7); + vec.s8 = SWAP32(buf.s8); + vec.s9 = SWAP32(buf.s9); + vec.sa = SWAP32(buf.sa); + vec.sb = SWAP32(buf.sb); + vec.sc = SWAP32(buf.sc); + vec.sd = SWAP32(buf.sd); + vec.se = SWAP32(buf.se); + vec.sf = SWAP32(buf.sf); + return vec; +} + + ulong8 salsa20_8(uint16 Bx) +{ +uint t; + uint16 st = Bx; + uSALSA_CORE(st); + uSALSA_CORE(st); + uSALSA_CORE(st); + uSALSA_CORE(st); + return(as_ulong8(st + Bx)); +} + + ulong8 salsa20_8n(uint16 Bx) + { + uint t; + uint16 st = Bx; + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + return(as_ulong8(st + Bx)); + } + + + ulong16 blockmix_salsa8_small2(ulong16 Bin) +{ + ulong8 X = Bin.hi; + X ^= Bin.lo; + X = salsa20_8(as_uint16(X)); + Bin.lo = X; + X ^= Bin.hi; + X = salsa20_8(as_uint16(X)); + Bin.hi = X; + return(Bin); +} +/* + uint16 salsa20_8_2(uint16 Bx) + { + uint t; + uint16 st = Bx; + uSALSA_CORE(st); + uSALSA_CORE(st); + uSALSA_CORE(st); + uSALSA_CORE(st); + return(st + Bx); + } + + ulong16 blockmix_salsa8_small2(ulong16 Bin) + { + uint16 X = as_uint16(Bin.hi); + X ^= as_uint16(Bin.lo); + X = salsa20_8_2(as_uint16(X)); + Bin.lo = as_ulong8(X); + X ^= as_uint16(Bin.hi); + X = salsa20_8_2(as_uint16(X)); + Bin.hi = as_ulong8(X); + return(Bin); + } +*/ + + +inline ulong2 madd4long2(uint4 a, uint4 b) +{ + uint4 result; + result.x = a.x*a.y + b.x; + result.y = b.y + mad_hi(a.x, a.y, b.x); + result.z = a.z*a.w + b.z; + result.w = b.w + mad_hi(a.z, a.w, b.z); + return as_ulong2(result); +} + +inline ulong2 madd4long3(uint4 a, ulong2 b) +{ + ulong2 result; + result.x = (ulong)a.x*(ulong)a.y + b.x; + result.y = (ulong)a.z*(ulong)a.w + b.y; + return result; +} + + +inline ulong8 block_pwxform_long_old(ulong8 Bout, __global ulong16 *prevstate) +{ + + ulong2 vec = Bout.lo.lo; + + for (int i = 0; i < 6; i++) + { + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = ((__global ulong2*)(prevstate ))[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = ((__global ulong2*)(prevstate + 32))[x.y]; + + vec ^= p1; + } + Bout.lo.lo = vec; + vec = Bout.lo.hi; + for (int i = 0; i < 6; i++) + { + + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = ((__global ulong2*)(prevstate))[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = ((__global ulong2*)(prevstate + 32))[x.y]; + + vec ^= p1; + } + Bout.lo.hi = vec; + + vec = Bout.hi.lo; + for (int i = 0; i < 6; i++) + { + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = ((__global ulong2*)(prevstate))[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = ((__global ulong2*)(prevstate + 32))[x.y]; + vec ^= p1; + } + Bout.hi.lo = vec; + vec = Bout.hi.hi; + for (int i = 0; i < 6; i++) + { + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = ((__global ulong2*)(prevstate))[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = ((__global ulong2*)(prevstate + 32))[x.y]; + + vec ^= p1; + } + Bout.hi.hi = vec; + + return(Bout); +} + +inline ulong8 block_pwxform_long(ulong8 Bout, __global ulong2 *prevstate) +{ + + ulong2 vec = Bout.lo.lo; + + for (int i = 0; i < 6; i++) + { + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = prevstate[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = (prevstate + 32*8)[x.y]; + + vec ^= p1; + } + Bout.lo.lo = vec; + vec = Bout.lo.hi; + for (int i = 0; i < 6; i++) + { + + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = prevstate[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = (prevstate + 32 * 8)[x.y]; + + vec ^= p1; + } + Bout.lo.hi = vec; + + vec = Bout.hi.lo; + for (int i = 0; i < 6; i++) + { + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = prevstate[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = (prevstate + 32 * 8)[x.y]; + vec ^= p1; + } + Bout.hi.lo = vec; + vec = Bout.hi.hi; + for (int i = 0; i < 6; i++) + { + ulong2 p0, p1; + uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); + p0 = prevstate[x.x]; + vec = madd4long3(as_uint4(vec), p0); + p1 = (prevstate + 32 * 8)[x.y]; + + vec ^= p1; + } + Bout.hi.hi = vec; + + return(Bout); +} + + + + +inline void blockmix_pwxform(__global ulong8 *Bin, __global ulong16 *prevstate) +{ + Bin[0] ^= Bin[15]; + Bin[0] = block_pwxform_long_old(Bin[0], prevstate); +#pragma unroll 1 + for (int i = 1; i < 16; i++) + { + Bin[i] ^= Bin[i - 1]; + Bin[i] = block_pwxform_long_old(Bin[i], prevstate); + } + Bin[15] = salsa20_8(as_uint16(Bin[15])); +} + +#define SHR(x, n) ((x) >> n) + + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} + +#define PLAST(a,b,c,d,e,f,g,h,x,K) \ +{ \ + d += h + S3(e) + F1(e,f,g) + (x + K); \ +} + +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) + +/// generic sha transform +inline uint8 sha256_Transform(uint16 data, uint8 state) +{ +uint temp1; + uint8 res = state; + uint W0 = data.s0; + uint W1 = data.s1; + uint W2 = data.s2; + uint W3 = data.s3; + uint W4 = data.s4; + uint W5 = data.s5; + uint W6 = data.s6; + uint W7 = data.s7; + uint W8 = data.s8; + uint W9 = data.s9; + uint W10 = data.sA; + uint W11 = data.sB; + uint W12 = data.sC; + uint W13 = data.sD; + uint W14 = data.sE; + uint W15 = data.sF; + +#define v0 res.s0 +#define v1 res.s1 +#define v2 res.s2 +#define v3 res.s3 +#define v4 res.s4 +#define v5 res.s5 +#define v6 res.s6 +#define v7 res.s7 + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); +#undef v0 +#undef v1 +#undef v2 +#undef v3 +#undef v4 +#undef v5 +#undef v6 +#undef v7 + return (res+state); +} + + +static inline uint8 sha256_round1(uint16 data) +{ + uint temp1; + uint8 res; + uint W0 = data.s0; + uint W1 = data.s1; + uint W2 = data.s2; + uint W3 = data.s3; + uint W4 = data.s4; + uint W5 = data.s5; + uint W6 = data.s6; + uint W7 = data.s7; + uint W8 = data.s8; + uint W9 = data.s9; + uint W10 = data.sA; + uint W11 = data.sB; + uint W12 = data.sC; + uint W13 = data.sD; + uint W14 = data.sE; + uint W15 = data.sF; + + uint v0 = 0x6A09E667; + uint v1 = 0xBB67AE85; + uint v2 = 0x3C6EF372; + uint v3 = 0xA54FF53A; + uint v4 = 0x510E527F; + uint v5 = 0x9B05688C; + uint v6 = 0x1F83D9AB; + uint v7 = 0x5BE0CD19; + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); + + res.s0 = v0 + 0x6A09E667; + res.s1 = v1 + 0xBB67AE85; + res.s2 = v2 + 0x3C6EF372; + res.s3 = v3 + 0xA54FF53A; + res.s4 = v4 + 0x510E527F; + res.s5 = v5 + 0x9B05688C; + res.s6 = v6 + 0x1F83D9AB; + res.s7 = v7 + 0x5BE0CD19; + return (res); +} + + +static inline uint8 sha256_round2(uint16 data,uint8 buf) +{ + uint temp1; + uint8 res; + uint W0 = data.s0; + uint W1 = data.s1; + uint W2 = data.s2; + uint W3 = data.s3; + uint W4 = data.s4; + uint W5 = data.s5; + uint W6 = data.s6; + uint W7 = data.s7; + uint W8 = data.s8; + uint W9 = data.s9; + uint W10 = data.sA; + uint W11 = data.sB; + uint W12 = data.sC; + uint W13 = data.sD; + uint W14 = data.sE; + uint W15 = data.sF; + + uint v0 = buf.s0; + uint v1 = buf.s1; + uint v2 = buf.s2; + uint v3 = buf.s3; + uint v4 = buf.s4; + uint v5 = buf.s5; + uint v6 = buf.s6; + uint v7 = buf.s7; + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); + + res.s0 = (v0 + buf.s0); + res.s1 = (v1 + buf.s1); + res.s2 = (v2 + buf.s2); + res.s3 = (v3 + buf.s3); + res.s4 = (v4 + buf.s4); + res.s5 = (v5 + buf.s5); + res.s6 = (v6 + buf.s6); + res.s7 = (v7 + buf.s7); + return (res); +} + +static inline uint8 sha256_80(uint* data,uint nonce) +{ + +uint8 buf = sha256_round1( ((uint16*)data)[0]); +uint16 in = padsha80; +in.s0 = data[16]; +in.s1 = data[17]; +in.s2 = data[18]; +in.s3 = nonce; + +return(sha256_round2(in,buf)); +} + diff --git a/miner.h b/miner.h index 459857792..ecaa6a134 100644 --- a/miner.h +++ b/miner.h @@ -733,6 +733,17 @@ static inline void flip128(void *dest_p, const void *src_p) dest[i] = swab32(src[i]); } +static inline void flip168(void *dest_p, const void *src_p) +{ + uint32_t *dest = (uint32_t *)dest_p; + const uint32_t *src = (uint32_t *)src_p; + int i; + + for (i = 0; i < 42; i++) + dest[i] = swab32(src[i]); +} + + /* For flipping to the correct endianness if necessary */ #if defined(__BIG_ENDIAN__) || defined(MIPSEB) static inline void endian_flip32(void *dest_p, const void *src_p) @@ -744,6 +755,11 @@ static inline void endian_flip128(void *dest_p, const void *src_p) { flip128(dest_p, src_p); } +static inline void endian_flip168(void *dest_p, const void *src_p) +{ + flip168(dest_p, src_p); +} + #else static inline void endian_flip32(void __maybe_unused *dest_p, const void __maybe_unused *src_p) @@ -754,8 +770,13 @@ static inline void endian_flip128(void __maybe_unused *dest_p, const void __maybe_unused *src_p) { } +static inline void +endian_flip168(void __maybe_unused *dest_p, const void __maybe_unused *src_p) +{ +} #endif + extern double cgpu_runtime(struct cgpu_info *cgpu); extern void _quit(int status); @@ -1146,8 +1167,8 @@ extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user #define MAX_GPUDEVICES 16 #define MAX_DEVICES 4096 -#define MIN_INTENSITY 8 -#define MIN_INTENSITY_STR "8" +#define MIN_INTENSITY 4 +#define MIN_INTENSITY_STR "4" #define MAX_INTENSITY 31 #define MAX_INTENSITY_STR "31" #define MIN_XINTENSITY 1 @@ -1416,7 +1437,7 @@ struct pool { #define GETWORK_MODE_GBT 'G' struct work { - unsigned char data[128]; + unsigned char data[168]; unsigned char midstate[32]; unsigned char target[32]; unsigned char hash[32]; diff --git a/ocl.c b/ocl.c index 65e34e750..bd8a1527c 100644 --- a/ocl.c +++ b/ocl.c @@ -36,6 +36,8 @@ #include "ocl/binary_kernel.h" #include "algorithm/neoscrypt.h" #include "algorithm/pluck.h" +#include "algorithm/yescrypt.h" +#include "algorithm/lyra2re.h" /* FIXME: only here for global config vars, replace with configuration.h * or similar as soon as config is in a struct instead of littered all @@ -414,8 +416,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } - /////////////////////////////////// pluck - // neoscrypt TC + // pluck TC else if (!safe_cmp(cgpu->algorithm.name, "pluck") && !cgpu->opt_tc) { size_t glob_thread_count; long max_int; @@ -497,7 +498,175 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg cgpu->thread_concurrency = glob_thread_count; applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); + } + + // Yescrypt TC + else if ((!safe_cmp(cgpu->algorithm.name, "yescrypt") || + !safe_cmp(algorithm->name, "yescrypt-multi")) && !cgpu->opt_tc) { + size_t glob_thread_count; + long max_int; + unsigned char type = 0; + + // determine which intensity type to use + if (cgpu->rawintensity > 0) { + glob_thread_count = cgpu->rawintensity; + max_int = glob_thread_count; + type = 2; + } + else if (cgpu->xintensity > 0) { + glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity); + max_int = cgpu->xintensity; + type = 1; + } + else { + glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity); + max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity); + } + glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count); + + // if TC * scratchbuf size is too big for memory... reduce to max + if ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) { + + /* Selected intensity will not run on this GPU. Not enough memory. + * Adapt the memory setting. */ + // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc + switch (type) { + //raw intensity + case 2: + while ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) { + --glob_thread_count; + } + + max_int = glob_thread_count; + cgpu->rawintensity = glob_thread_count; + break; + + //x intensity + case 1: + glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE; + max_int = glob_thread_count / clState->compute_shaders; + + while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_XINTENSITY) { + applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu); + max_int = MIN_XINTENSITY; + } + + cgpu->xintensity = max_int; + glob_thread_count = clState->compute_shaders * (1UL << max_int); + break; + + default: + glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE; + while (max_int && ((1UL << max_int) & glob_thread_count) == 0) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_INTENSITY) { + applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu); + max_int = MIN_INTENSITY; + } + + cgpu->intensity = max_int; + glob_thread_count = 1UL << max_int; + break; + } + } + + // TC is glob thread count + cgpu->thread_concurrency = glob_thread_count; + + applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); + } + + // Lyra2re v2 TC + else if ( !safe_cmp(cgpu->algorithm.name, "lyra2REv2") ) { + size_t glob_thread_count; + long max_int; + unsigned char type = 0; + + // determine which intensity type to use + if (cgpu->rawintensity > 0) { + glob_thread_count = cgpu->rawintensity; + max_int = glob_thread_count; + type = 2; + } + else if (cgpu->xintensity > 0) { + glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity); + max_int = cgpu->xintensity; + type = 1; + } + else { + glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity); + max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity); + } + + glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count); + + // if TC * scratchbuf size is too big for memory... reduce to max + if ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) { + + /* Selected intensity will not run on this GPU. Not enough memory. + * Adapt the memory setting. */ + // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc + switch (type) { + //raw intensity + case 2: + while ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) { + --glob_thread_count; + } + + max_int = glob_thread_count; + cgpu->rawintensity = glob_thread_count; + break; + + //x intensity + case 1: + glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE; + max_int = glob_thread_count / clState->compute_shaders; + + while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_XINTENSITY) { + applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu); + max_int = MIN_XINTENSITY; + } + + cgpu->xintensity = max_int; + glob_thread_count = clState->compute_shaders * (1UL << max_int); + break; + + default: + glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE; + while (max_int && ((1UL << max_int) & glob_thread_count) == 0) { + --max_int; + } + + /* Check if max_intensity is >0. */ + if (max_int < MIN_INTENSITY) { + applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu); + max_int = MIN_INTENSITY; + } + + cgpu->intensity = max_int; + glob_thread_count = 1UL << max_int; + break; + } + } + + // TC is glob thread count + cgpu->thread_concurrency = glob_thread_count; + + applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); } else if (!cgpu->opt_tc) { unsigned int sixtyfours; @@ -586,7 +755,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } size_t bufsize; - size_t readbufsize = 128; + size_t buf1size; + size_t buf3size; + size_t buf2size; + size_t readbufsize = (!safe_cmp(algorithm->name, "credits")) ? 168 : 128; if (algorithm->rw_buffer_size < 0) { // calc buffer size for neoscrypt @@ -612,6 +784,31 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } + else if (!safe_cmp(algorithm->name, "yescrypt") || !safe_cmp(algorithm->name, "yescrypt-multi")) { + /* The scratch/pad-buffer needs 32kBytes memory per thread. */ + bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency; + buf1size = PLUCK_SECBUF_SIZE * cgpu->thread_concurrency; + buf2size = 128 * 8 * 8 * cgpu->thread_concurrency; + buf3size= 8 * 8 * 4 * cgpu->thread_concurrency; + /* This is the input buffer. For yescrypt this is guaranteed to be + * 80 bytes only. */ + readbufsize = 80; + + applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); + // scrypt/n-scrypt + } + else if (!safe_cmp(algorithm->name, "lyra2REv2") ) { + /* The scratch/pad-buffer needs 32kBytes memory per thread. */ + bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency; + buf1size = 4* 8 * cgpu->thread_concurrency; //matrix + + /* This is the input buffer. For yescrypt this is guaranteed to be + * 80 bytes only. */ + readbufsize = 80; + + applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size); + // scrypt/n-scrypt + } else { size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0)); bufsize = 128 * ipt * cgpu->thread_concurrency; @@ -624,6 +821,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } clState->padbuffer8 = NULL; + clState->buffer1 = NULL; + clState->buffer2 = NULL; + clState->buffer3 = NULL; if (bufsize > 0) { applog(LOG_DEBUG, "Creating read/write buffer sized %lu", (unsigned long)bufsize); @@ -635,6 +835,42 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize); } + if (!safe_cmp(algorithm->name, "yescrypt") || !safe_cmp(algorithm->name, "yescrypt-multi")) { + // need additionnal buffers + clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); + if (status != CL_SUCCESS && !clState->buffer1) { + applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status); + return NULL; + } + + clState->buffer2 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf2size, NULL, &status); + if (status != CL_SUCCESS && !clState->buffer2) { + applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer2), decrease TC or increase LG", status); + return NULL; + } + + clState->buffer3 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf3size, NULL, &status); + if (status != CL_SUCCESS && !clState->buffer3) { + applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer3), decrease TC or increase LG", status); + return NULL; + } + } + else if (!safe_cmp(algorithm->name, "lyra2REv2") ) { + // need additionnal buffers + clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); + if (status != CL_SUCCESS && !clState->buffer1) { + applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status); + return NULL; + } + } + else { + clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status); // we don't need that much just tired... + if (status != CL_SUCCESS && !clState->buffer1) { + applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status); + return NULL; + } + } + /* This buffer is weird and might work to some degree even if * the create buffer call has apparently failed, so check if we * get anything back before we call it a failure. */ diff --git a/ocl.h b/ocl.h index 0950d2c03..8d6467f11 100644 --- a/ocl.h +++ b/ocl.h @@ -23,7 +23,10 @@ typedef struct __clState { cl_mem CLbuffer0; cl_mem MidstateBuf; cl_mem padbuffer8; - unsigned char cldata[80]; + cl_mem buffer1; + cl_mem buffer2; + cl_mem buffer3; + unsigned char cldata[168]; bool goffset; cl_uint vwidth; size_t max_work_size; diff --git a/sgminer.c b/sgminer.c index f9e3f096b..0c3fed0f9 100644 --- a/sgminer.c +++ b/sgminer.c @@ -1919,6 +1919,7 @@ static void calc_midstate(struct work *work) endian_flip32(work->midstate, work->midstate); } + static struct work *make_work(void) { struct work *w = (struct work *)calloc(1, sizeof(struct work)); @@ -2260,7 +2261,9 @@ static bool gbt_decode(struct pool *pool, json_t *res_val) static bool getwork_decode(json_t *res_val, struct work *work) { - if (unlikely(!jobj_binary(res_val, "data", work->data, sizeof(work->data), true))) { + size_t worklen = 128; + worklen = ((!safe_cmp(work->pool->algorithm.name, "credits")) ? sizeof(work->data) : worklen); + if (unlikely(!jobj_binary(res_val, "data", work->data, worklen, true))) { if (opt_morenotices) applog(LOG_ERR, "%s: JSON inval data", isnull(get_pool_name(work->pool), "")); return false; @@ -3018,10 +3021,17 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s cgpu = get_thr_cgpu(thr_id); - endian_flip128(work->data, work->data); + if (safe_cmp(work->pool->algorithm.name, "credits")) { + endian_flip128(work->data, work->data); + } else { + endian_flip168(work->data, work->data); + } /* build hex string - Make sure to restrict to 80 bytes for Neoscrypt */ - hexstr = bin2hex(work->data, ((!safe_cmp(work->pool->algorithm.name, "neoscrypt")) ? 80 : sizeof(work->data))); + int datasize = 128; + if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) datasize = 80; + else if (!safe_cmp(work->pool->algorithm.name, "credits")) datasize = 168; + hexstr = bin2hex(work->data, datasize); /* build JSON-RPC request */ if (work->gbt) { @@ -7060,7 +7070,10 @@ void inc_hw_errors(struct thr_info *thr) /* Fills in the work nonce and builds the output data in work->hash */ static void rebuild_nonce(struct work *work, uint32_t nonce) { - uint32_t *work_nonce = (uint32_t *)(work->data + 76); + uint32_t nonce_pos = 76; + if (!safe_cmp(work->pool->algorithm.name, "credits")) nonce_pos = 140; + + uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos); *work_nonce = htole32(nonce); @@ -7076,7 +7089,10 @@ bool test_nonce(struct work *work, uint32_t nonce) rebuild_nonce(work, nonce); // for Neoscrypt, the diff1targ value is in work->target - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck")) { + if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck") + || !safe_cmp(work->pool->algorithm.name, "yescrypt") + || !safe_cmp(work->pool->algorithm.name, "yescrypt-multi") + ) { diff1targ = ((uint32_t *)work->target)[7]; } else { diff --git a/sph/Makefile.am b/sph/Makefile.am index d80e438ad..bc2f4b238 100644 --- a/sph/Makefile.am +++ b/sph/Makefile.am @@ -1,3 +1,3 @@ noinst_LIBRARIES = libsph.a -libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c +libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c diff --git a/sph/sha256_Y.c b/sph/sha256_Y.c new file mode 100644 index 000000000..a5d786d3f --- /dev/null +++ b/sph/sha256_Y.c @@ -0,0 +1,418 @@ +/*- + * Copyright 2005,2007,2009 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include + +#include "algorithm/sysendian.h" + +#include "sph/sha256_Y.h" + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static void +be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + be32enc(dst + i * 4, src[i]); +} + +/* + * Decode a big-endian length len vector of (unsigned char) into a length + * len/4 vector of (uint32_t). Assumes len is a multiple of 4. + */ +static void +be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + dst[i] = be32dec(src + i * 4); +} + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + t0 = h + S1(e) + Ch(e, f, g) + k; \ + t1 = S0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, k) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i] + k) + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA256_Transform(uint32_t * state, const unsigned char block[64]) +{ + uint32_t W[64]; + uint32_t S[8]; + uint32_t t0, t1; + int i; + /* 1. Prepare message schedule W. */ + be32dec_vect(W, block, 64); + + for (i = 16; i < 64; i++) + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + RNDr(S, W, 0, 0x428a2f98); + RNDr(S, W, 1, 0x71374491); + RNDr(S, W, 2, 0xb5c0fbcf); + RNDr(S, W, 3, 0xe9b5dba5); + RNDr(S, W, 4, 0x3956c25b); + RNDr(S, W, 5, 0x59f111f1); + RNDr(S, W, 6, 0x923f82a4); + RNDr(S, W, 7, 0xab1c5ed5); + RNDr(S, W, 8, 0xd807aa98); + RNDr(S, W, 9, 0x12835b01); + RNDr(S, W, 10, 0x243185be); + RNDr(S, W, 11, 0x550c7dc3); + RNDr(S, W, 12, 0x72be5d74); + RNDr(S, W, 13, 0x80deb1fe); + RNDr(S, W, 14, 0x9bdc06a7); + RNDr(S, W, 15, 0xc19bf174); + RNDr(S, W, 16, 0xe49b69c1); + RNDr(S, W, 17, 0xefbe4786); + RNDr(S, W, 18, 0x0fc19dc6); + RNDr(S, W, 19, 0x240ca1cc); + RNDr(S, W, 20, 0x2de92c6f); + RNDr(S, W, 21, 0x4a7484aa); + RNDr(S, W, 22, 0x5cb0a9dc); + RNDr(S, W, 23, 0x76f988da); + RNDr(S, W, 24, 0x983e5152); + RNDr(S, W, 25, 0xa831c66d); + RNDr(S, W, 26, 0xb00327c8); + RNDr(S, W, 27, 0xbf597fc7); + RNDr(S, W, 28, 0xc6e00bf3); + RNDr(S, W, 29, 0xd5a79147); + RNDr(S, W, 30, 0x06ca6351); + RNDr(S, W, 31, 0x14292967); + RNDr(S, W, 32, 0x27b70a85); + RNDr(S, W, 33, 0x2e1b2138); + RNDr(S, W, 34, 0x4d2c6dfc); + RNDr(S, W, 35, 0x53380d13); + RNDr(S, W, 36, 0x650a7354); + RNDr(S, W, 37, 0x766a0abb); + RNDr(S, W, 38, 0x81c2c92e); + RNDr(S, W, 39, 0x92722c85); + RNDr(S, W, 40, 0xa2bfe8a1); + RNDr(S, W, 41, 0xa81a664b); + RNDr(S, W, 42, 0xc24b8b70); + RNDr(S, W, 43, 0xc76c51a3); + RNDr(S, W, 44, 0xd192e819); + RNDr(S, W, 45, 0xd6990624); + RNDr(S, W, 46, 0xf40e3585); + RNDr(S, W, 47, 0x106aa070); + RNDr(S, W, 48, 0x19a4c116); + RNDr(S, W, 49, 0x1e376c08); + RNDr(S, W, 50, 0x2748774c); + RNDr(S, W, 51, 0x34b0bcb5); + RNDr(S, W, 52, 0x391c0cb3); + RNDr(S, W, 53, 0x4ed8aa4a); + RNDr(S, W, 54, 0x5b9cca4f); + RNDr(S, W, 55, 0x682e6ff3); + RNDr(S, W, 56, 0x748f82ee); + RNDr(S, W, 57, 0x78a5636f); + RNDr(S, W, 58, 0x84c87814); + RNDr(S, W, 59, 0x8cc70208); + RNDr(S, W, 60, 0x90befffa); + RNDr(S, W, 61, 0xa4506ceb); + RNDr(S, W, 62, 0xbef9a3f7); + RNDr(S, W, 63, 0xc67178f2); + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) { + state[i] += S[i]; + +} + + /* Clean the stack. */ + memset(W, 0, 256); + memset(S, 0, 32); + t0 = t1 = 0; +} + +static unsigned char PAD[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA256_Pad(SHA256_CTX_Y * ctx) +{ + unsigned char len[8]; + uint32_t r, plen; + + /* + * Convert length to a vector of bytes -- we do this now rather + * than later because the length will change after we pad. + */ + be32enc_vect(len, ctx->count, 8); + + /* Add 1--64 bytes so that the resulting length is 56 mod 64 */ + r = (ctx->count[1] >> 3) & 0x3f; + plen = (r < 56) ? (56 - r) : (120 - r); + SHA256_Update_Y(ctx, PAD, (size_t)plen); + + /* Add the terminating bit-count */ + SHA256_Update_Y(ctx, len, 8); +} + +/* SHA-256 initialization. Begins a SHA-256 operation. */ +void +SHA256_Init_Y(SHA256_CTX_Y * ctx) +{ + + /* Zero bits processed so far */ + ctx->count[0] = ctx->count[1] = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x6A09E667; + ctx->state[1] = 0xBB67AE85; + ctx->state[2] = 0x3C6EF372; + ctx->state[3] = 0xA54FF53A; + ctx->state[4] = 0x510E527F; + ctx->state[5] = 0x9B05688C; + ctx->state[6] = 0x1F83D9AB; + ctx->state[7] = 0x5BE0CD19; +} + +/* Add bytes into the hash */ +void +SHA256_Update_Y(SHA256_CTX_Y * ctx, const void *in, size_t len) +{ + uint32_t bitlen[2]; + uint32_t r; + const unsigned char *src = in; + + /* Number of bytes left in the buffer from previous updates */ + r = (ctx->count[1] >> 3) & 0x3f; + + /* Convert the length into a number of bits */ + bitlen[1] = ((uint32_t)len) << 3; + bitlen[0] = (uint32_t)(len >> 29); + + /* Update number of bits */ + if ((ctx->count[1] += bitlen[1]) < bitlen[1]) + ctx->count[0]++; + ctx->count[0] += bitlen[0]; + + /* Handle the case where we don't need to perform any transforms */ + if (len < 64 - r) { + + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block */ + memcpy(&ctx->buf[r], src, 64 - r); + + SHA256_Transform(ctx->state, ctx->buf); + src += 64 - r; + len -= 64 - r; + + /* Perform complete blocks */ + + while (len >= 64) { + SHA256_Transform(ctx->state, src); + src += 64; + len -= 64; + } + + /* Copy left over data into buffer */ + memcpy(ctx->buf, src, len); +} + +/* + * SHA-256 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx) +{ + /* Add padding */ + SHA256_Pad(ctx); + + /* Write the hash */ + be32enc_vect(digest, ctx->state, 32); + + /* Clear the context state */ + memset((void *)ctx, 0, sizeof(*ctx)); +} + +/* Initialize an HMAC-SHA256 operation with the given key. */ +void +HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen) +{ + unsigned char pad[64]; + unsigned char khash[32]; + const unsigned char * K = _K; + size_t i; + + /* If Klen > 64, the key is really SHA256(K). */ + if (Klen > 64) { + SHA256_Init_Y(&ctx->ictx); + SHA256_Update_Y(&ctx->ictx, K, Klen); + SHA256_Final_Y(khash, &ctx->ictx); + K = khash; + Klen = 32; + } + + /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ + SHA256_Init_Y(&ctx->ictx); + memset(pad, 0x36, 64); + for (i = 0; i < Klen; i++) { + pad[i] ^= K[i]; + } + SHA256_Update_Y(&ctx->ictx, pad, 64); + + /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ + SHA256_Init_Y(&ctx->octx); + memset(pad, 0x5c, 64); + for (i = 0; i < Klen; i++) + { + pad[i] ^= K[i]; + } + SHA256_Update_Y(&ctx->octx, pad, 64); + + /* Clean the stack. */ + memset(khash, 0, 32); +} + +/* Add bytes to the HMAC-SHA256 operation. */ +void +HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len) +{ + /* Feed data to the inner SHA256 operation. */ + SHA256_Update_Y(&ctx->ictx, in, len); +} + +/* Finish an HMAC-SHA256 operation. */ +void +HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx) +{ + unsigned char ihash[32]; + + /* Finish the inner SHA256 operation. */ + SHA256_Final_Y(ihash, &ctx->ictx); + + /* Feed the inner hash to the outer SHA256 operation. */ + SHA256_Update_Y(&ctx->octx, ihash, 32); + + /* Finish the outer SHA256 operation. */ + SHA256_Final_Y(digest, &ctx->octx); + + /* Clean the stack. */ + memset(ihash, 0, 32); +} + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ + +void +PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, +size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) +{ + HMAC_SHA256_CTX_Y PShctx, hctx; + size_t i; + uint8_t ivec[4]; + uint8_t U[32]; + uint8_t T[32]; + uint64_t j; + int k; + size_t clen; + + /* Compute HMAC state after processing P and S. */ + HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen); + HMAC_SHA256_Update_Y(&PShctx, salt, saltlen); + + /* Iterate through the blocks. */ + for (i = 0; i * 32 < dkLen; i++) { + /* Generate INT(i + 1). */ + be32enc(ivec, (uint32_t)(i + 1)); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y)); + HMAC_SHA256_Update_Y(&hctx, ivec, 4); + HMAC_SHA256_Final_Y(U, &hctx); + + /* T_i = U_1 ... */ + memcpy(T, U, 32); + + for (j = 2; j <= c; j++) { + /* Compute U_j. */ + HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen); + HMAC_SHA256_Update_Y(&hctx, U, 32); + HMAC_SHA256_Final_Y(U, &hctx); + + /* ... xor U_j ... */ + for (k = 0; k < 32; k++) + T[k] ^= U[k]; + } + + /* Copy as many bytes as necessary into buf. */ + clen = dkLen - i * 32; + if (clen > 32) + clen = 32; + memcpy(&buf[i * 32], T, clen); + } + + /* Clean PShctx, since we never called _Final on it. */ + memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y)); +} diff --git a/sph/sha256_Y.h b/sph/sha256_Y.h new file mode 100644 index 000000000..e97b81ba2 --- /dev/null +++ b/sph/sha256_Y.h @@ -0,0 +1,63 @@ +/*- + * Copyright 2005,2007,2009 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $ + */ + +#ifndef _SHA256_H_ +#define _SHA256_H_ + +#include + +#include + +typedef struct SHA256Context { + uint32_t state[8]; + uint32_t count[2]; + unsigned char buf[64]; +} SHA256_CTX_Y; + +typedef struct HMAC_SHA256Context { + SHA256_CTX_Y ictx; + SHA256_CTX_Y octx; +} HMAC_SHA256_CTX_Y; + +void SHA256_Init_Y(SHA256_CTX_Y *); +void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t); +void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *); +void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t); +void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t); +void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *); + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t, + uint64_t, uint8_t *, size_t); + + +#endif /* !_SHA256_H_ */ From fecc92be894c2ef7f4c84e132bb7ed5a4d2cc3cd Mon Sep 17 00:00:00 2001 From: elbandi Date: Tue, 10 Nov 2015 18:24:23 +0100 Subject: [PATCH 21/63] Use algorithm type in conditions --- driver-opencl.c | 2 +- ocl.c | 24 ++++++++++++------------ sgminer.c | 44 +++++++++++++++++++++----------------------- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/driver-opencl.c b/driver-opencl.c index f0a88a6f6..0e45e5555 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1366,7 +1366,7 @@ static bool opencl_thread_init(struct thr_info *thr) static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work) { - if (!safe_cmp(work->pool->algorithm.name, "Lyra2RE")) { + if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REv2) { work->blk.work = work; precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data)); } diff --git a/ocl.c b/ocl.c index bd8a1527c..cb00790ff 100644 --- a/ocl.c +++ b/ocl.c @@ -332,7 +332,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } // neoscrypt TC - if (!safe_cmp(cgpu->algorithm.name, "neoscrypt") && !cgpu->opt_tc) { + if (cgpu->algorithm.type == ALGO_NEOSCRYPT && !cgpu->opt_tc) { size_t glob_thread_count; long max_int; unsigned char type = 0; @@ -417,7 +417,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } // pluck TC - else if (!safe_cmp(cgpu->algorithm.name, "pluck") && !cgpu->opt_tc) { + else if (cgpu->algorithm.type == ALGO_PLUCK && !cgpu->opt_tc) { size_t glob_thread_count; long max_int; unsigned char type = 0; @@ -501,8 +501,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } // Yescrypt TC - else if ((!safe_cmp(cgpu->algorithm.name, "yescrypt") || - !safe_cmp(algorithm->name, "yescrypt-multi")) && !cgpu->opt_tc) { + else if ((cgpu->algorithm.type == ALGO_YESCRYPT || + algorithm->type == ALGO_YESCRYPT_MULTI) && !cgpu->opt_tc) { size_t glob_thread_count; long max_int; unsigned char type = 0; @@ -586,7 +586,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } // Lyra2re v2 TC - else if ( !safe_cmp(cgpu->algorithm.name, "lyra2REv2") ) { + else if (cgpu->algorithm.type == ALGO_LYRA2REv2 && !cgpu->opt_tc) { size_t glob_thread_count; long max_int; unsigned char type = 0; @@ -758,11 +758,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg size_t buf1size; size_t buf3size; size_t buf2size; - size_t readbufsize = (!safe_cmp(algorithm->name, "credits")) ? 168 : 128; + size_t readbufsize = (algorithm->type == ALGO_CRE) ? 168 : 128; if (algorithm->rw_buffer_size < 0) { // calc buffer size for neoscrypt - if (!safe_cmp(algorithm->name, "neoscrypt")) { + if (algorithm->type == ALGO_NEOSCRYPT) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = NEOSCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency; @@ -773,7 +773,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } - else if (!safe_cmp(algorithm->name, "pluck")) { + else if (algorithm->type == ALGO_PLUCK) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = PLUCK_SCRATCHBUF_SIZE * cgpu->thread_concurrency; @@ -784,7 +784,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } - else if (!safe_cmp(algorithm->name, "yescrypt") || !safe_cmp(algorithm->name, "yescrypt-multi")) { + else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency; buf1size = PLUCK_SECBUF_SIZE * cgpu->thread_concurrency; @@ -797,7 +797,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } - else if (!safe_cmp(algorithm->name, "lyra2REv2") ) { + else if (algorithm->type == ALGO_LYRA2REv2) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency; buf1size = 4* 8 * cgpu->thread_concurrency; //matrix @@ -835,7 +835,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize); } - if (!safe_cmp(algorithm->name, "yescrypt") || !safe_cmp(algorithm->name, "yescrypt-multi")) { + if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { // need additionnal buffers clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); if (status != CL_SUCCESS && !clState->buffer1) { @@ -855,7 +855,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg return NULL; } } - else if (!safe_cmp(algorithm->name, "lyra2REv2") ) { + else if (algorithm->type == ALGO_LYRA2REv2) { // need additionnal buffers clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); if (status != CL_SUCCESS && !clState->buffer1) { diff --git a/sgminer.c b/sgminer.c index 0c3fed0f9..3c5042c32 100644 --- a/sgminer.c +++ b/sgminer.c @@ -2082,7 +2082,7 @@ static double get_work_blockdiff(const struct work *work) double numerator; // Neoscrypt has the data reversed - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) { + if (work->pool->algorithm.type == ALGO_NEOSCRYPT) { diff64 = bswap_64(((uint64_t)(be32toh(*((uint32_t *)(work->data + 72))) & 0xFFFFFF00)) << 8); numerator = (double)work->pool->algorithm.diff_numerator; } @@ -2148,7 +2148,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work) } // Neoscrypt doesn't calc_midstate() - if (safe_cmp(pool->algorithm.name, "neoscrypt")) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { calc_midstate(work); } local_work++; @@ -2262,7 +2262,7 @@ static bool gbt_decode(struct pool *pool, json_t *res_val) static bool getwork_decode(json_t *res_val, struct work *work) { size_t worklen = 128; - worklen = ((!safe_cmp(work->pool->algorithm.name, "credits")) ? sizeof(work->data) : worklen); + worklen = ((work->pool->algorithm.type == ALGO_CRE) ? sizeof(work->data) : worklen); if (unlikely(!jobj_binary(res_val, "data", work->data, worklen, true))) { if (opt_morenotices) applog(LOG_ERR, "%s: JSON inval data", isnull(get_pool_name(work->pool), "")); @@ -2270,7 +2270,7 @@ static bool getwork_decode(json_t *res_val, struct work *work) } // Neoscrypt doesn't calc midstate - if (safe_cmp(work->pool->algorithm.name, "neoscrypt")) { + if (work->pool->algorithm.type != ALGO_NEOSCRYPT) { if (!jobj_binary(res_val, "midstate", work->midstate, sizeof(work->midstate), false)) { // Calculate it ourselves if (opt_morenotices) { @@ -3021,16 +3021,16 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s cgpu = get_thr_cgpu(thr_id); - if (safe_cmp(work->pool->algorithm.name, "credits")) { - endian_flip128(work->data, work->data); - } else { + if (work->pool->algorithm.type == ALGO_CRE) { endian_flip168(work->data, work->data); + } else { + endian_flip128(work->data, work->data); } /* build hex string - Make sure to restrict to 80 bytes for Neoscrypt */ int datasize = 128; - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) datasize = 80; - else if (!safe_cmp(work->pool->algorithm.name, "credits")) datasize = 168; + if (work->pool->algorithm.type == ALGO_NEOSCRYPT) datasize = 80; + else if (work->pool->algorithm.type == ALGO_CRE) datasize = 168; hexstr = bin2hex(work->data, datasize); /* build JSON-RPC request */ @@ -3400,7 +3400,7 @@ static void calc_diff(struct work *work, double known) applog(LOG_DEBUG, "calc_diff() algorithm = %s", work->pool->algorithm.name); // Neoscrypt - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) { + if (work->pool->algorithm.type == ALGO_NEOSCRYPT) { dcut64 = (double)*((uint64_t *)(work->target + 22)); } else { @@ -5574,7 +5574,7 @@ static void *stratum_sthread(void *userdata) applog(LOG_DEBUG, "stratum_sthread() algorithm = %s", pool->algorithm.name); // Neoscrypt is little endian - if (!safe_cmp(pool->algorithm.name, "neoscrypt")) { + if (!pool->algorithm.type == ALGO_NEOSCRYPT) { nonce = htobe32(*((uint32_t *)(work->data + 76))); //*((uint32_t *)nonce2) = htole32(work->nonce2); } @@ -6078,7 +6078,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) applog(LOG_DEBUG, "[THR%d] gen_stratum_work() - algorithm = %s", work->thr_id, pool->algorithm.name); // Different for Neoscrypt because of Little Endian - if (!safe_cmp(pool->algorithm.name, "neoscrypt")) { + if (!pool->algorithm.type == ALGO_NEOSCRYPT) { /* Incoming data is in little endian. */ memcpy(merkle_root, merkle_sha, 32); @@ -6140,7 +6140,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) } // For Neoscrypt use set_target_neoscrypt() function - if (!safe_cmp(pool->algorithm.name, "neoscrypt")) { + if (!pool->algorithm.type == ALGO_NEOSCRYPT) { set_target_neoscrypt(work->target, work->sdiff, work->thr_id); } else { calc_midstate(work); @@ -6238,7 +6238,7 @@ static void apply_initial_gpu_settings(struct pool *pool) //thread-concurrency // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!safe_cmp(pool->algorithm.name, "neoscrypt")) { + if (!pool->algorithm.type == ALGO_NEOSCRYPT) { opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting @@ -6416,7 +6416,7 @@ static unsigned long compare_pool_settings(struct pool *oldpool, struct pool *ne //thread-concurrency // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!safe_cmp(newpool->algorithm.name, "neoscrypt")) { + if (newpool->algorithm.type == ALGO_NEOSCRYPT) { opt2 = ((empty_string(newpool->thread_concurrency))?"0":get_pool_setting(newpool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting @@ -6562,7 +6562,7 @@ static void apply_switcher_options(unsigned long options, struct pool *pool) if(opt_isset(options, SWITCHER_APPLY_TC)) { // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!safe_cmp(pool->algorithm.name, "neoscrypt")) { + if (!pool->algorithm.type == ALGO_NEOSCRYPT) { opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting @@ -6801,7 +6801,7 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work) if(opt_isset(pool_switch_options, SWITCHER_APPLY_TC)) { // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) { + if (work->pool->algorithm.type == ALGO_NEOSCRYPT) { opt = ((empty_string(work->pool->thread_concurrency))?"0":get_pool_setting(work->pool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting @@ -7071,7 +7071,7 @@ void inc_hw_errors(struct thr_info *thr) static void rebuild_nonce(struct work *work, uint32_t nonce) { uint32_t nonce_pos = 76; - if (!safe_cmp(work->pool->algorithm.name, "credits")) nonce_pos = 140; + if (work->pool->algorithm.type == ALGO_CRE) nonce_pos = 140; uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos); @@ -7089,10 +7089,8 @@ bool test_nonce(struct work *work, uint32_t nonce) rebuild_nonce(work, nonce); // for Neoscrypt, the diff1targ value is in work->target - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck") - || !safe_cmp(work->pool->algorithm.name, "yescrypt") - || !safe_cmp(work->pool->algorithm.name, "yescrypt-multi") - ) { + if (work->pool->algorithm.type == ALGO_NEOSCRYPT || work->pool->algorithm.type == ALGO_PLUCK + || work->pool->algorithm.type == ALGO_YESCRYPT || work->pool->algorithm.type == ALGO_YESCRYPT_MULTI) { diff1targ = ((uint32_t *)work->target)[7]; } else { @@ -7234,7 +7232,7 @@ static void hash_sole_work(struct thr_info *mythr) } else if (drv->working_diff > work->work_difficulty) drv->working_diff = work->work_difficulty; - if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) { + if (work->pool->algorithm.type == ALGO_NEOSCRYPT) { set_target_neoscrypt(work->device_target, work->device_diff, work->thr_id); } else { set_target(work->device_target, work->device_diff, work->pool->algorithm.diff_multiplier2, work->thr_id); From c855a8d2d26d607262eead55432f0a86187097bb Mon Sep 17 00:00:00 2001 From: elbandi Date: Tue, 10 Nov 2015 18:24:49 +0100 Subject: [PATCH 22/63] Simplify lyra2re algos --- Makefile.am | 2 +- algorithm/lyra2_old.c | 208 --------------------- algorithm/lyra2_old.h | 50 ----- algorithm/lyra2re.c | 7 +- algorithm/lyra2re_old.c | 14 +- algorithm/sponge_old.c | 405 ---------------------------------------- algorithm/sponge_old.h | 98 ---------- 7 files changed, 6 insertions(+), 778 deletions(-) delete mode 100644 algorithm/lyra2_old.c delete mode 100644 algorithm/lyra2_old.h delete mode 100644 algorithm/sponge_old.c delete mode 100644 algorithm/sponge_old.h diff --git a/Makefile.am b/Makefile.am index 5da38cc59..7047bf10f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -73,7 +73,7 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h -sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h algorithm/lyra2_old.c algorithm/lyra2_old.h algorithm/sponge_old.c algorithm/sponge_old.h +sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h sgminer_SOURCES += algorithm/credits.c algorithm/credits.h sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h diff --git a/algorithm/lyra2_old.c b/algorithm/lyra2_old.c deleted file mode 100644 index 3b3819878..000000000 --- a/algorithm/lyra2_old.c +++ /dev/null @@ -1,208 +0,0 @@ -/** - * Implementation of the Lyra2 Password Hashing Scheme (PHS). - * - * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. - * - * This software is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include -#include -#include -#include -#include "lyra2_old.h" -#include "sponge_old.h" - -/** - * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords - * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, - * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all - * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value - * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols). - * - * @param K The derived key to be output by the algorithm - * @param kLen Desired key length - * @param pwd User password - * @param pwdlen Password length - * @param salt Salt - * @param saltlen Salt length - * @param timeCost Parameter to determine the processing time (T) - * @param nRows Number or rows of the memory matrix (R) - * @param nCols Number of columns of the memory matrix (C) - * - * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) - */ -int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) { - - //============================= Basic variables ============================// - int64_t row = 2; //index of row to be processed - int64_t prev = 1; //index of prev (last row ever computed/modified) - int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering) - int64_t tau; //Time Loop iterator - int64_t step = 1; //Visitation step (used during Setup and Wandering phases) - int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) - int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 - int64_t i; //auxiliary iteration counter - //==========================================================================/ - - //========== Initializing the Memory Matrix and pointers to it =============// - //Tries to allocate enough space for the whole memory matrix - i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); - uint64_t *wholeMatrix = malloc(i); - if (wholeMatrix == NULL) { - return -1; - } - memset(wholeMatrix, 0, i); - - //Allocates pointers to each row of the matrix - uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*)); - if (memMatrix == NULL) { - return -1; - } - //Places the pointers in the correct positions - uint64_t *ptrWord = wholeMatrix; - for (i = 0; i < nRows; i++) { - memMatrix[i] = ptrWord; - ptrWord += ROW_LEN_INT64; - } - //==========================================================================/ - - //============= Getting the password + salt + basil padded with 10*1 ===============// - //OBS.:The memory matrix will temporarily hold the password: not for saving memory, - //but this ensures that the password copied locally will be overwritten as soon as possible - - //First, we clean enough blocks for the password, salt, basil and padding - uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; - byte *ptrByte = (byte*) wholeMatrix; - memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); - - //Prepends the password - memcpy(ptrByte, pwd, pwdlen); - ptrByte += pwdlen; - - //Concatenates the salt - memcpy(ptrByte, salt, saltlen); - ptrByte += saltlen; - - //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface - memcpy(ptrByte, &kLen, sizeof (uint64_t)); - ptrByte += sizeof (uint64_t); - memcpy(ptrByte, &pwdlen, sizeof (uint64_t)); - ptrByte += sizeof (uint64_t); - memcpy(ptrByte, &saltlen, sizeof (uint64_t)); - ptrByte += sizeof (uint64_t); - memcpy(ptrByte, &timeCost, sizeof (uint64_t)); - ptrByte += sizeof (uint64_t); - memcpy(ptrByte, &nRows, sizeof (uint64_t)); - ptrByte += sizeof (uint64_t); - memcpy(ptrByte, &nCols, sizeof (uint64_t)); - ptrByte += sizeof (uint64_t); - - //Now comes the padding - *ptrByte = 0x80; //first byte of padding: right after the password - ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix - ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block - *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block - //==========================================================================/ - - //======================= Initializing the Sponge State ====================// - //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) - uint64_t *state = malloc(16 * sizeof (uint64_t)); - if (state == NULL) { - return -1; - } - initStateO(state); - //==========================================================================/ - - //================================ Setup Phase =============================// - //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits - ptrWord = wholeMatrix; - for (i = 0; i < nBlocksInput; i++) { - absorbBlockBlake2SafeO(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) - ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil) - } - - //Initializes M[0] and M[1] - reducedSqueezeRow0O(state, memMatrix[0]); //The locally copied password is most likely overwritten here - reducedDuplexRow1O(state, memMatrix[0], memMatrix[1]); - - do { - //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) - reducedDuplexRowSetupO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); - - - //updates the value of row* (deterministically picked during Setup)) - rowa = (rowa + step) & (window - 1); - //update prev: it now points to the last row ever computed - prev = row; - //updates row: goes to the next row to be computed - row++; - - //Checks if all rows in the window where visited. - if (rowa == 0) { - step = window + gap; //changes the step: approximately doubles its value - window *= 2; //doubles the size of the re-visitation window - gap = -gap; //inverts the modifier to the step - } - - } while (row < nRows); - //==========================================================================/ - - //============================ Wandering Phase =============================// - row = 0; //Resets the visitation to the first row of the memory matrix - for (tau = 1; tau <= timeCost; tau++) { - //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1 - step = (tau % 2 == 0) ? -1 : nRows / 2 - 1; - do { - //Selects a pseudorandom index row* - //------------------------------------------------------------------------------------------ - //rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) - rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE) - //------------------------------------------------------------------------------------------ - - //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] - reducedDuplexRowO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); - - //update prev: it now points to the last row ever computed - prev = row; - - //updates row: goes to the next row to be computed - //------------------------------------------------------------------------------------------ - //row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) - row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE) - //------------------------------------------------------------------------------------------ - - } while (row != 0); - } - //==========================================================================/ - - //============================ Wrap-up Phase ===============================// - //Absorbs the last block of the memory matrix - absorbBlockO(state, memMatrix[rowa]); - - //Squeezes the key - squeezeO(state, K, kLen); - //==========================================================================/ - - //========================= Freeing the memory =============================// - free(memMatrix); - free(wholeMatrix); - - //Wiping out the sponge's internal state before freeing it - memset(state, 0, 16 * sizeof (uint64_t)); - free(state); - //==========================================================================/ - - return 0; -} diff --git a/algorithm/lyra2_old.h b/algorithm/lyra2_old.h deleted file mode 100644 index 9dbe56682..000000000 --- a/algorithm/lyra2_old.h +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Header file for the Lyra2 Password Hashing Scheme (PHS). - * - * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. - * - * This software is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef LYRA2OLD_H_ -#define LYRA2OLD_H_ - -#include - -typedef unsigned char byte; - -//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED) -#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t) -#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes - - -#ifdef BLOCK_LEN_BITS - #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t) - #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes -#else //default block lenght: 768 bits - #define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t) - #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes -#endif - -#ifndef N_COLS - #define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default -#endif - -#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks -#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row - - -int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); - -#endif /* LYRA2_H_ */ diff --git a/algorithm/lyra2re.c b/algorithm/lyra2re.c index 9d89853fe..cfc5adbb3 100644 --- a/algorithm/lyra2re.c +++ b/algorithm/lyra2re.c @@ -68,9 +68,6 @@ inline void lyra2rehash(void *state, const void *input) sph_blake256 (&ctx_blake, input, 80); sph_blake256_close (&ctx_blake, hashA); - - - sph_keccak256_init(&ctx_keccak); sph_keccak256 (&ctx_keccak,hashA, 32); sph_keccak256_close(&ctx_keccak, hashB); @@ -93,9 +90,7 @@ inline void lyra2rehash(void *state, const void *input) sph_bmw256 (&ctx_bmw, hashB, 32); sph_bmw256_close(&ctx_bmw, hashA); -//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]); - - memcpy(state, hashA, 32); + memcpy(state, hashA, 32); } static const uint32_t diff1targ = 0x0000ffff; diff --git a/algorithm/lyra2re_old.c b/algorithm/lyra2re_old.c index 3aa4be9a7..31a0a1abc 100644 --- a/algorithm/lyra2re_old.c +++ b/algorithm/lyra2re_old.c @@ -36,7 +36,7 @@ #include "sph/sph_groestl.h" #include "sph/sph_skein.h" #include "sph/sph_keccak.h" -#include "lyra2_old.h" +#include "lyra2.h" /* * Encode a length len/4 vector of (uint32_t) into a length len vector of @@ -65,17 +65,13 @@ inline void lyra2rehash_old(void *state, const void *input) sph_blake256 (&ctx_blake, input, 80); sph_blake256_close (&ctx_blake, hashA); - - - sph_keccak256_init(&ctx_keccak); sph_keccak256 (&ctx_keccak,hashA, 32); sph_keccak256_close(&ctx_keccak, hashB); - LYRA2O(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); + LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); - - sph_skein256_init(&ctx_skein); + sph_skein256_init(&ctx_skein); sph_skein256 (&ctx_skein, hashA, 32); sph_skein256_close(&ctx_skein, hashB); @@ -84,9 +80,7 @@ inline void lyra2rehash_old(void *state, const void *input) sph_groestl256 (&ctx_groestl, hashB, 32); sph_groestl256_close(&ctx_groestl, hashA); -//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]); - - memcpy(state, hashA, 32); + memcpy(state, hashA, 32); } static const uint32_t diff1targ = 0x0000ffff; diff --git a/algorithm/sponge_old.c b/algorithm/sponge_old.c deleted file mode 100644 index 7152687ff..000000000 --- a/algorithm/sponge_old.c +++ /dev/null @@ -1,405 +0,0 @@ -/** - * A simple implementation of Blake2b's internal permutation - * in the form of a sponge. - * - * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. - * - * This software is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include -#include -#include -#include "sponge_old.h" -#include "lyra2_old.h" - - - -/** - * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder - * receive Blake2b's IV as per Blake2b's specification. Note: Even though sponges - * typically have their internal state initialized with zeros, Blake2b's G function - * has a fixed point: if the internal state and message are both filled with zeros. the - * resulting permutation will always be a block filled with zeros; this happens because - * Blake2b does not use the constants originally employed in Blake2 inside its G function, - * relying on the IV for avoiding possible fixed points. - * - * @param state The 1024-bit array to be initialized - */ -void initStateO(uint64_t state[/*16*/]) { - //First 512 bis are zeros - memset(state, 0, 64); - //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV - state[8] = blake2b_IV[0]; - state[9] = blake2b_IV[1]; - state[10] = blake2b_IV[2]; - state[11] = blake2b_IV[3]; - state[12] = blake2b_IV[4]; - state[13] = blake2b_IV[5]; - state[14] = blake2b_IV[6]; - state[15] = blake2b_IV[7]; -} - -/** - * Execute Blake2b's G function, with all 12 rounds. - * - * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function - */ -static void blake2bLyra(uint64_t *v) { - ROUND_LYRA(0); - ROUND_LYRA(1); - ROUND_LYRA(2); - ROUND_LYRA(3); - ROUND_LYRA(4); - ROUND_LYRA(5); - ROUND_LYRA(6); - ROUND_LYRA(7); - ROUND_LYRA(8); - ROUND_LYRA(9); - ROUND_LYRA(10); - ROUND_LYRA(11); -} - -/** - * Executes a reduced version of Blake2b's G function with only one round - * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function - */ -static void reducedBlake2bLyra(uint64_t *v) { - ROUND_LYRA(0); -} - -/** - * Performs a squeeze operation, using Blake2b's G function as the - * internal permutation - * - * @param state The current state of the sponge - * @param out Array that will receive the data squeezed - * @param len The number of bytes to be squeezed into the "out" array - */ -void squeezeO(uint64_t *state, byte *out, unsigned int len) { - int fullBlocks = len / BLOCK_LEN_BYTES; - byte *ptr = out; - int i; - //Squeezes full blocks - for (i = 0; i < fullBlocks; i++) { - memcpy(ptr, state, BLOCK_LEN_BYTES); - blake2bLyra(state); - ptr += BLOCK_LEN_BYTES; - } - - //Squeezes remaining bytes - memcpy(ptr, state, (len % BLOCK_LEN_BYTES)); -} - -/** - * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words - * of type uint64_t), using Blake2b's G function as the internal permutation - * - * @param state The current state of the sponge - * @param in The block to be absorbed (BLOCK_LEN_INT64 words) - */ -void absorbBlockO(uint64_t *state, const uint64_t *in) { - //XORs the first BLOCK_LEN_INT64 words of "in" with the current state - state[0] ^= in[0]; - state[1] ^= in[1]; - state[2] ^= in[2]; - state[3] ^= in[3]; - state[4] ^= in[4]; - state[5] ^= in[5]; - state[6] ^= in[6]; - state[7] ^= in[7]; - state[8] ^= in[8]; - state[9] ^= in[9]; - state[10] ^= in[10]; - state[11] ^= in[11]; - - //Applies the transformation f to the sponge's state - blake2bLyra(state); -} - -/** - * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 - * words of type uint64_t), using Blake2b's G function as the internal permutation - * - * @param state The current state of the sponge - * @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) - */ -void absorbBlockBlake2SafeO(uint64_t *state, const uint64_t *in) { - //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state - state[0] ^= in[0]; - state[1] ^= in[1]; - state[2] ^= in[2]; - state[3] ^= in[3]; - state[4] ^= in[4]; - state[5] ^= in[5]; - state[6] ^= in[6]; - state[7] ^= in[7]; - - //Applies the transformation f to the sponge's state - blake2bLyra(state); -} - -/** - * Performs a reduced squeeze operation for a single row, from the highest to - * the lowest index, using the reduced-round Blake2b's G function as the - * internal permutation - * - * @param state The current state of the sponge - * @param rowOut Row to receive the data squeezed - */ -void reducedSqueezeRow0O(uint64_t* state, uint64_t* rowOut) { - uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] - int i; - //M[row][C-1-col] = H.reduced_squeeze() - for (i = 0; i < N_COLS; i++) { - ptrWord[0] = state[0]; - ptrWord[1] = state[1]; - ptrWord[2] = state[2]; - ptrWord[3] = state[3]; - ptrWord[4] = state[4]; - ptrWord[5] = state[5]; - ptrWord[6] = state[6]; - ptrWord[7] = state[7]; - ptrWord[8] = state[8]; - ptrWord[9] = state[9]; - ptrWord[10] = state[10]; - ptrWord[11] = state[11]; - - //Goes to next block (column) that will receive the squeezed data - ptrWord -= BLOCK_LEN_INT64; - - //Applies the reduced-round transformation f to the sponge's state - reducedBlake2bLyra(state); - } -} - -/** - * Performs a reduced duplex operation for a single row, from the highest to - * the lowest index, using the reduced-round Blake2b's G function as the - * internal permutation - * - * @param state The current state of the sponge - * @param rowIn Row to feed the sponge - * @param rowOut Row to receive the sponge's output - */ -void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { - uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev - uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row - int i; - - for (i = 0; i < N_COLS; i++) { - - //Absorbing "M[prev][col]" - state[0] ^= (ptrWordIn[0]); - state[1] ^= (ptrWordIn[1]); - state[2] ^= (ptrWordIn[2]); - state[3] ^= (ptrWordIn[3]); - state[4] ^= (ptrWordIn[4]); - state[5] ^= (ptrWordIn[5]); - state[6] ^= (ptrWordIn[6]); - state[7] ^= (ptrWordIn[7]); - state[8] ^= (ptrWordIn[8]); - state[9] ^= (ptrWordIn[9]); - state[10] ^= (ptrWordIn[10]); - state[11] ^= (ptrWordIn[11]); - - //Applies the reduced-round transformation f to the sponge's state - reducedBlake2bLyra(state); - - //M[row][C-1-col] = M[prev][col] XOR rand - ptrWordOut[0] = ptrWordIn[0] ^ state[0]; - ptrWordOut[1] = ptrWordIn[1] ^ state[1]; - ptrWordOut[2] = ptrWordIn[2] ^ state[2]; - ptrWordOut[3] = ptrWordIn[3] ^ state[3]; - ptrWordOut[4] = ptrWordIn[4] ^ state[4]; - ptrWordOut[5] = ptrWordIn[5] ^ state[5]; - ptrWordOut[6] = ptrWordIn[6] ^ state[6]; - ptrWordOut[7] = ptrWordIn[7] ^ state[7]; - ptrWordOut[8] = ptrWordIn[8] ^ state[8]; - ptrWordOut[9] = ptrWordIn[9] ^ state[9]; - ptrWordOut[10] = ptrWordIn[10] ^ state[10]; - ptrWordOut[11] = ptrWordIn[11] ^ state[11]; - - - //Input: next column (i.e., next block in sequence) - ptrWordIn += BLOCK_LEN_INT64; - //Output: goes to previous column - ptrWordOut -= BLOCK_LEN_INT64; - } -} - -/** - * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., - * the wordwise addition of two columns, ignoring carries between words). The - * output of this operation, "rand", is then used to make - * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and - * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit - * rotation to the left and N_COLS is a system parameter. - * - * @param state The current state of the sponge - * @param rowIn Row used only as input - * @param rowInOut Row used as input and to receive output after rotation - * @param rowOut Row receiving the output - * - */ -void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { - uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev - uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* - uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row - int i; - - for (i = 0; i < N_COLS; i++) { - //Absorbing "M[prev] [+] M[row*]" - state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); - state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); - state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); - state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); - state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); - state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); - state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); - state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); - state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); - state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); - state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); - state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); - - //Applies the reduced-round transformation f to the sponge's state - reducedBlake2bLyra(state); - - //M[row][col] = M[prev][col] XOR rand - ptrWordOut[0] = ptrWordIn[0] ^ state[0]; - ptrWordOut[1] = ptrWordIn[1] ^ state[1]; - ptrWordOut[2] = ptrWordIn[2] ^ state[2]; - ptrWordOut[3] = ptrWordIn[3] ^ state[3]; - ptrWordOut[4] = ptrWordIn[4] ^ state[4]; - ptrWordOut[5] = ptrWordIn[5] ^ state[5]; - ptrWordOut[6] = ptrWordIn[6] ^ state[6]; - ptrWordOut[7] = ptrWordIn[7] ^ state[7]; - ptrWordOut[8] = ptrWordIn[8] ^ state[8]; - ptrWordOut[9] = ptrWordIn[9] ^ state[9]; - ptrWordOut[10] = ptrWordIn[10] ^ state[10]; - ptrWordOut[11] = ptrWordIn[11] ^ state[11]; - - //M[row*][col] = M[row*][col] XOR rotW(rand) - ptrWordInOut[0] ^= state[11]; - ptrWordInOut[1] ^= state[0]; - ptrWordInOut[2] ^= state[1]; - ptrWordInOut[3] ^= state[2]; - ptrWordInOut[4] ^= state[3]; - ptrWordInOut[5] ^= state[4]; - ptrWordInOut[6] ^= state[5]; - ptrWordInOut[7] ^= state[6]; - ptrWordInOut[8] ^= state[7]; - ptrWordInOut[9] ^= state[8]; - ptrWordInOut[10] ^= state[9]; - ptrWordInOut[11] ^= state[10]; - - //Inputs: next column (i.e., next block in sequence) - ptrWordInOut += BLOCK_LEN_INT64; - ptrWordIn += BLOCK_LEN_INT64; - //Output: goes to previous column - ptrWordOut -= BLOCK_LEN_INT64; - } -} - -/** - * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., - * the wordwise addition of two columns, ignoring carries between words). The - * output of this operation, "rand", is then used to make - * "M[rowOut][col] = M[rowOut][col] XOR rand" and - * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit - * rotation to the left. - * - * @param state The current state of the sponge - * @param rowIn Row used only as input - * @param rowInOut Row used as input and to receive output after rotation - * @param rowOut Row receiving the output - * - */ -void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { - uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* - uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev - uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row - int i; - - for (i = 0; i < N_COLS; i++) { - - //Absorbing "M[prev] [+] M[row*]" - state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); - state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); - state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); - state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); - state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); - state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); - state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); - state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); - state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); - state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); - state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); - state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); - - //Applies the reduced-round transformation f to the sponge's state - reducedBlake2bLyra(state); - - //M[rowOut][col] = M[rowOut][col] XOR rand - ptrWordOut[0] ^= state[0]; - ptrWordOut[1] ^= state[1]; - ptrWordOut[2] ^= state[2]; - ptrWordOut[3] ^= state[3]; - ptrWordOut[4] ^= state[4]; - ptrWordOut[5] ^= state[5]; - ptrWordOut[6] ^= state[6]; - ptrWordOut[7] ^= state[7]; - ptrWordOut[8] ^= state[8]; - ptrWordOut[9] ^= state[9]; - ptrWordOut[10] ^= state[10]; - ptrWordOut[11] ^= state[11]; - - //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) - ptrWordInOut[0] ^= state[11]; - ptrWordInOut[1] ^= state[0]; - ptrWordInOut[2] ^= state[1]; - ptrWordInOut[3] ^= state[2]; - ptrWordInOut[4] ^= state[3]; - ptrWordInOut[5] ^= state[4]; - ptrWordInOut[6] ^= state[5]; - ptrWordInOut[7] ^= state[6]; - ptrWordInOut[8] ^= state[7]; - ptrWordInOut[9] ^= state[8]; - ptrWordInOut[10] ^= state[9]; - ptrWordInOut[11] ^= state[10]; - - //Goes to next block - ptrWordOut += BLOCK_LEN_INT64; - ptrWordInOut += BLOCK_LEN_INT64; - ptrWordIn += BLOCK_LEN_INT64; - } -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/** - Prints an array of unsigned chars - */ -void printArrayO(unsigned char *array, unsigned int size, char *name) { - int i; - printf("%s: ", name); - for (i = 0; i < size; i++) { - printf("%2x|", array[i]); - } - printf("\n"); -} - -//////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/algorithm/sponge_old.h b/algorithm/sponge_old.h deleted file mode 100644 index c23781d37..000000000 --- a/algorithm/sponge_old.h +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Header file for Blake2b's internal permutation in the form of a sponge. - * This code is based on the original Blake2b's implementation provided by - * Samuel Neves (https://blake2.net/) - * - * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. - * - * This software is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef SPONGEOLD_H_ -#define SPONGEOLD_H_ - -#include - -#if defined(__GNUC__) -#define ALIGN __attribute__ ((aligned(32))) -#elif defined(_MSC_VER) -#define ALIGN __declspec(align(32)) -#else -#define ALIGN -#endif - - -/*Blake2b IV Array*/ -static const uint64_t blake2b_IV[8] = -{ - 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, - 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, - 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, - 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL -}; - -/*Blake2b's rotation*/ -static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ - return ( w >> c ) | ( w << ( 64 - c ) ); -} - -/*Blake2b's G function*/ -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while(0) - - -/*One Round of the Blake2b's compression function*/ -#define ROUND_LYRA(r) \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); - - -//---- Housekeeping -void initStateO(uint64_t state[/*16*/]); - -//---- Squeezes -void squeezeO(uint64_t *state, unsigned char *out, unsigned int len); -void reducedSqueezeRow0O(uint64_t* state, uint64_t* row); - -//---- Absorbs -void absorbBlockO(uint64_t *state, const uint64_t *in); -void absorbBlockBlake2SafeO(uint64_t *state, const uint64_t *in); - -//---- Duplexes -void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut); -void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); -void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); - -//---- Misc -void printArrayO(unsigned char *array, unsigned int size, char *name); - -//////////////////////////////////////////////////////////////////////////////////////////////// - - -#endif /* SPONGE_H_ */ From 7c9fe7112e2996b0024786f44cc5c8112c642237 Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 21 Aug 2015 21:16:51 +0200 Subject: [PATCH 23/63] Apply immediately the first set_difficulty This is a workaround for pool, who send the first difficulty after notify --- util.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/util.c b/util.c index 95adc2ee6..3f76aa137 100644 --- a/util.c +++ b/util.c @@ -1560,7 +1560,9 @@ static bool parse_notify(struct pool *pool, json_t *val) pool->swork.nbit = nbit; pool->swork.ntime = ntime; pool->swork.clean = clean; - pool->swork.diff = pool->next_diff; + if (pool->next_diff > 0) { + pool->swork.diff = pool->next_diff; + } alloc_len = pool->swork.cb_len = cb1_len + pool->n1_len + pool->n2size + cb2_len; pool->nonce2_offset = cb1_len + pool->n1_len; @@ -1669,8 +1671,13 @@ static bool parse_diff(struct pool *pool, json_t *val) return false; cg_wlock(&pool->data_lock); - old_diff = pool->next_diff; - pool->next_diff = diff; + if (pool->next_diff > 0) { + old_diff = pool->next_diff; + pool->next_diff = diff; + } else { + old_diff = pool->swork.diff; + pool->next_diff = pool->swork.diff = diff; + } cg_wunlock(&pool->data_lock); if (old_diff != diff) { @@ -2561,7 +2568,8 @@ bool initiate_stratum(struct pool *pool) if (!pool->stratum_url) pool->stratum_url = pool->sockaddr_url; pool->stratum_active = true; - pool->next_diff = pool->swork.diff = 1; + pool->next_diff = 0; + pool->swork.diff = 1; if (opt_protocol) { applog(LOG_DEBUG, "%s confirmed mining.subscribe with extranonce1 %s extran2size %d", get_pool_name(pool), pool->nonce1, pool->n2size); From 9fe7dc94824d2d282f6f60926048350c3e2b0293 Mon Sep 17 00:00:00 2001 From: elbandi Date: Tue, 10 Nov 2015 21:15:22 +0100 Subject: [PATCH 24/63] Version bump to 5.2.0 --- configure.ac | 4 ++-- winbuild/dist/include/config.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 15580ce0a..dd135150e 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [5]) -m4_define([v_min], [1]) -m4_define([v_mic], [1]) +m4_define([v_min], [2]) +m4_define([v_mic], [0]) m4_define([v_rev], [nicehash]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_ifdef([v_rev], [m4_define([v_ver], [v_maj.v_min.v_mic-v_rev])], [m4_define([v_ver], [v_maj.v_min.v_mic])]) diff --git a/winbuild/dist/include/config.h b/winbuild/dist/include/config.h index ea35478d3..869317088 100644 --- a/winbuild/dist/include/config.h +++ b/winbuild/dist/include/config.h @@ -67,11 +67,11 @@ #endif -#define VERSION "v5.1.1" +#define VERSION "v5.2.0" #define PACKAGE_NAME "sgminer" #define PACKAGE_TARNAME "sgminer" -#define PACKAGE_VERSION "5.1.1" -#define PACKAGE_STRING "sgminer 5.1.1" +#define PACKAGE_VERSION "5.2.0" +#define PACKAGE_STRING "sgminer 5.2.0" #define PACKAGE "sgminer" #define SGMINER_PREFIX "" From 8d2eefe50ca656bd5bcfd635ce3e6dc3b3dc9282 Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 11 Nov 2015 22:37:11 +0100 Subject: [PATCH 25/63] Fix for automated build --- sph/sha256_Y.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sph/sha256_Y.c b/sph/sha256_Y.c index a5d786d3f..6a2dac456 100644 --- a/sph/sha256_Y.c +++ b/sph/sha256_Y.c @@ -29,9 +29,9 @@ #include #include -#include "algorithm/sysendian.h" +#include "../algorithm/sysendian.h" -#include "sph/sha256_Y.h" +#include "sha256_Y.h" /* * Encode a length len/4 vector of (uint32_t) into a length len vector of From 6efe2f94a8842a20b753886d2ee240f93e863ab9 Mon Sep 17 00:00:00 2001 From: elbandi Date: Tue, 8 Dec 2015 23:01:30 +0100 Subject: [PATCH 26/63] Fix typos in neoscrypt conditions --- sgminer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sgminer.c b/sgminer.c index 3c5042c32..a418e1d88 100644 --- a/sgminer.c +++ b/sgminer.c @@ -2148,7 +2148,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work) } // Neoscrypt doesn't calc_midstate() - if (pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type != ALGO_NEOSCRYPT) { calc_midstate(work); } local_work++; @@ -5574,7 +5574,7 @@ static void *stratum_sthread(void *userdata) applog(LOG_DEBUG, "stratum_sthread() algorithm = %s", pool->algorithm.name); // Neoscrypt is little endian - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { nonce = htobe32(*((uint32_t *)(work->data + 76))); //*((uint32_t *)nonce2) = htole32(work->nonce2); } @@ -6078,7 +6078,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) applog(LOG_DEBUG, "[THR%d] gen_stratum_work() - algorithm = %s", work->thr_id, pool->algorithm.name); // Different for Neoscrypt because of Little Endian - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { /* Incoming data is in little endian. */ memcpy(merkle_root, merkle_sha, 32); @@ -6140,7 +6140,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) } // For Neoscrypt use set_target_neoscrypt() function - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { set_target_neoscrypt(work->target, work->sdiff, work->thr_id); } else { calc_midstate(work); @@ -6238,7 +6238,7 @@ static void apply_initial_gpu_settings(struct pool *pool) //thread-concurrency // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting @@ -6562,7 +6562,7 @@ static void apply_switcher_options(unsigned long options, struct pool *pool) if(opt_isset(options, SWITCHER_APPLY_TC)) { // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting From ca3f9d5c3af86767ea71ad4ec7719e98280ba5b1 Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 9 Dec 2015 01:07:39 +0100 Subject: [PATCH 27/63] Always use libcurl cflags --- Makefile.am | 6 +++--- configure.ac | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile.am b/Makefile.am index 7047bf10f..ac2ecdade 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,10 +18,10 @@ sgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @OPENCL_LIBS@ @NCURSES_LIBS@ @PDCURSES_LIBS@ @WS2_LIBS@ \ @MM_LIBS@ @RT_LIBS@ @MATH_LIBS@ lib/libgnu.a ccan/libccan.a sph/libsph.a -sgminer_CPPFLAGS += -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ +sgminer_CPPFLAGS += -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBCURL_CFLAGS@ -if !HAVE_WINDOWS -sgminer_CPPFLAGS += @LIBCURL_CFLAGS@ +if HAVE_WINDOWS +sgminer_LDFLAGS += -all-static endif sgminer_CPPFLAGS += $(ADL_CPPFLAGS) diff --git a/configure.ac b/configure.ac index dd135150e..681c2b9c2 100644 --- a/configure.ac +++ b/configure.ac @@ -288,6 +288,7 @@ else LIBCURL_LIBS="" fi AC_SUBST(LIBCURL_LIBS) +AC_SUBST(LIBCURL_CFLAGS) # Enable or disable use of git version in version string AC_MSG_CHECKING(whether to use git version if available) From b8253d106fee01480dd205a3b25c0664a33f2d2a Mon Sep 17 00:00:00 2001 From: elbandi Date: Tue, 9 Feb 2016 02:30:53 +0100 Subject: [PATCH 28/63] Fix display device status if some are disabled --- sgminer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sgminer.c b/sgminer.c index a418e1d88..ca58f701d 100644 --- a/sgminer.c +++ b/sgminer.c @@ -2625,7 +2625,7 @@ static void curses_print_devstatus(struct cgpu_info *cgpu, int count) if (devcursor + count > LINES - 2) return; - if (count >= most_devices) + if (count >= (opt_removedisabled ? most_devices : total_devices)) return; if (cgpu->dev_start_tv.tv_sec == 0) @@ -2745,7 +2745,7 @@ static void switch_logsize(bool __maybe_unused newdevs) if (opt_compact) { logstart = devcursor + 1; } else { - logstart = devcursor + most_devices + 1; + logstart = devcursor + (opt_removedisabled ? most_devices : total_devices) + 1; } logcursor = logstart + 1; #ifdef WIN32 @@ -8873,7 +8873,7 @@ int main(int argc, char *argv[]) rd_unlock(&devices_lock); if (!opt_compact) { - logstart += most_devices; + logstart += (opt_removedisabled ? most_devices : total_devices); logcursor = logstart + 1; #ifdef HAVE_CURSES check_winsizes(); From 6fd618d28435df90fa6f7404dad367802173185b Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 11 Feb 2016 21:33:56 +0100 Subject: [PATCH 29/63] Tweak lyra2re algo files --- Makefile.am | 2 +- algorithm.c | 14 ++++---- algorithm.h | 2 +- algorithm/lyra2.c | 12 ++++--- algorithm/lyra2re.c | 23 ++++-------- algorithm/lyra2re.h | 2 -- algorithm/lyra2re_old.h | 10 ------ algorithm/{lyra2re_old.c => lyra2rev2.c} | 45 +++++++++++++++--------- algorithm/lyra2rev2.h | 11 ++++++ driver-opencl.c | 2 +- kernel/lyra2rev2.cl | 6 ++-- ocl.c | 8 ++--- winbuild/sgminer.vcxproj | 2 ++ winbuild/sgminer.vcxproj.filters | 6 ++++ 14 files changed, 77 insertions(+), 68 deletions(-) delete mode 100644 algorithm/lyra2re_old.h rename algorithm/{lyra2re_old.c => lyra2rev2.c} (77%) create mode 100644 algorithm/lyra2rev2.h diff --git a/Makefile.am b/Makefile.am index ac2ecdade..125b432b3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -73,7 +73,7 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h -sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h +sgminer_SOURCES += algorithm/lyra2rev2.c algorithm/lyra2rev2.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h sgminer_SOURCES += algorithm/credits.c algorithm/credits.h sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h diff --git a/algorithm.c b/algorithm.c index 6acab9244..3f6914f4b 100644 --- a/algorithm.c +++ b/algorithm.c @@ -33,7 +33,7 @@ #include "algorithm/neoscrypt.h" #include "algorithm/whirlpoolx.h" #include "algorithm/lyra2re.h" -#include "algorithm/lyra2re_old.h" +#include "algorithm/lyra2rev2.h" #include "algorithm/pluck.h" #include "algorithm/yescrypt.h" #include "algorithm/credits.h" @@ -62,7 +62,7 @@ const char *algorithm_type_str[] = { "Neoscrypt", "WhirlpoolX", "Lyra2RE", - "Lyra2REv2" + "Lyra2REV2" "Pluck" "Yescrypt", "Yescrypt-multi" @@ -798,7 +798,7 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk return status; } -static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +static cl_int queue_lyra2re_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel; unsigned int num; @@ -842,7 +842,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct return status; } -static cl_int queue_lyra2REv2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +static cl_int queue_lyra2rev2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel; unsigned int num; @@ -992,10 +992,8 @@ static algorithm_settings_t algos[] = { { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL }, - { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2reold_regenhash, queue_lyra2RE_kernel, gen_hash, NULL }, - - { "lyra2rev2", ALGO_LYRA2REv2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2re_regenhash, queue_lyra2REv2_kernel, gen_hash, append_neoscrypt_compiler_options }, - + { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, queue_lyra2re_kernel, gen_hash, NULL }, + { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options }, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ diff --git a/algorithm.h b/algorithm.h index 8b7185a44..9187eb53e 100644 --- a/algorithm.h +++ b/algorithm.h @@ -30,7 +30,7 @@ typedef enum { ALGO_NEOSCRYPT, ALGO_WHIRLPOOLX, ALGO_LYRA2RE, - ALGO_LYRA2REv2, + ALGO_LYRA2REV2, ALGO_PLUCK, ALGO_YESCRYPT, ALGO_YESCRYPT_MULTI, diff --git a/algorithm/lyra2.c b/algorithm/lyra2.c index 42640e760..865d8e17b 100644 --- a/algorithm/lyra2.c +++ b/algorithm/lyra2.c @@ -61,16 +61,18 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols; const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8; + // for Lyra2REv2, nCols = 4, v1 was using 8 + const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64 : BLOCK_LEN_BLAKE2_SAFE_BYTES; i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); - uint64_t *wholeMatrix = malloc(i); + uint64_t *wholeMatrix = (uint64_t*)malloc(i); if (wholeMatrix == NULL) { return -1; } memset(wholeMatrix, 0, i); //Allocates pointers to each row of the matrix - uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*)); + uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*)); if (memMatrix == NULL) { return -1; } @@ -122,7 +124,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * //======================= Initializing the Sponge State ====================// //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) - uint64_t *state = malloc(16 * sizeof (uint64_t)); + uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t)); if (state == NULL) { return -1; } @@ -134,7 +136,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * ptrWord = wholeMatrix; for (i = 0; i < nBlocksInput; i++) { absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) - ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil) + ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil) } //Initializes M[0] and M[1] @@ -196,7 +198,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * absorbBlock(state, memMatrix[rowa]); //Squeezes the key - squeeze(state, K, kLen); + squeeze(state, (unsigned char*)K, kLen); //==========================================================================/ //========================= Freeing the memory =============================// diff --git a/algorithm/lyra2re.c b/algorithm/lyra2re.c index cfc5adbb3..61f2b34f8 100644 --- a/algorithm/lyra2re.c +++ b/algorithm/lyra2re.c @@ -36,8 +36,6 @@ #include "sph/sph_groestl.h" #include "sph/sph_skein.h" #include "sph/sph_keccak.h" -#include "sph/sph_bmw.h" -#include "sph/sph_cubehash.h" #include "lyra2.h" /* @@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) inline void lyra2rehash(void *state, const void *input) { sph_blake256_context ctx_blake; - sph_bmw256_context ctx_bmw; + sph_groestl256_context ctx_groestl; sph_keccak256_context ctx_keccak; sph_skein256_context ctx_skein; - sph_cubehash256_context ctx_cube; uint32_t hashA[8], hashB[8]; @@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input) sph_keccak256 (&ctx_keccak,hashA, 32); sph_keccak256_close(&ctx_keccak, hashB); - sph_cubehash256_init(&ctx_cube); - sph_cubehash256(&ctx_cube, hashB, 32); - sph_cubehash256_close(&ctx_cube, hashA); + LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); - LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4); sph_skein256_init(&ctx_skein); - sph_skein256 (&ctx_skein, hashB, 32); - sph_skein256_close(&ctx_skein, hashA); + sph_skein256 (&ctx_skein, hashA, 32); + sph_skein256_close(&ctx_skein, hashB); - sph_cubehash256_init(&ctx_cube); - sph_cubehash256(&ctx_cube, hashA, 32); - sph_cubehash256_close(&ctx_cube, hashB); - sph_bmw256_init(&ctx_bmw); - sph_bmw256 (&ctx_bmw, hashB, 32); - sph_bmw256_close(&ctx_bmw, hashA); + sph_groestl256_init(&ctx_groestl); + sph_groestl256 (&ctx_groestl, hashB, 32); + sph_groestl256_close(&ctx_groestl, hashA); memcpy(state, hashA, 32); } diff --git a/algorithm/lyra2re.h b/algorithm/lyra2re.h index 8bc52ac4f..8a58e7471 100644 --- a/algorithm/lyra2re.h +++ b/algorithm/lyra2re.h @@ -2,8 +2,6 @@ #define LYRA2RE_H #include "miner.h" -#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent -#define LYRA_SECBUF_SIZE (4) // (not used) extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); diff --git a/algorithm/lyra2re_old.h b/algorithm/lyra2re_old.h deleted file mode 100644 index 0788dfb35..000000000 --- a/algorithm/lyra2re_old.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef LYRA2REOLD_H -#define LYRA2REOLD_H - -#include "miner.h" - -extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, - uint32_t nonce); -extern void lyra2reold_regenhash(struct work *work); - -#endif /* LYRA2RE_H */ diff --git a/algorithm/lyra2re_old.c b/algorithm/lyra2rev2.c similarity index 77% rename from algorithm/lyra2re_old.c rename to algorithm/lyra2rev2.c index 31a0a1abc..aea0082a7 100644 --- a/algorithm/lyra2re_old.c +++ b/algorithm/lyra2rev2.c @@ -36,6 +36,8 @@ #include "sph/sph_groestl.h" #include "sph/sph_skein.h" #include "sph/sph_keccak.h" +#include "sph/sph_bmw.h" +#include "sph/sph_cubehash.h" #include "lyra2.h" /* @@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) } -inline void lyra2rehash_old(void *state, const void *input) +inline void lyra2rev2hash(void *state, const void *input) { sph_blake256_context ctx_blake; - sph_groestl256_context ctx_groestl; + sph_bmw256_context ctx_bmw; sph_keccak256_context ctx_keccak; sph_skein256_context ctx_skein; - + sph_cubehash256_context ctx_cube; uint32_t hashA[8], hashB[8]; sph_blake256_init(&ctx_blake); @@ -69,32 +71,41 @@ inline void lyra2rehash_old(void *state, const void *input) sph_keccak256 (&ctx_keccak,hashA, 32); sph_keccak256_close(&ctx_keccak, hashB); - LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); + sph_cubehash256_init(&ctx_cube); + sph_cubehash256(&ctx_cube, hashB, 32); + sph_cubehash256_close(&ctx_cube, hashA); + + LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4); + + sph_skein256_init(&ctx_skein); + sph_skein256 (&ctx_skein, hashB, 32); + sph_skein256_close(&ctx_skein, hashA); - sph_skein256_init(&ctx_skein); - sph_skein256 (&ctx_skein, hashA, 32); - sph_skein256_close(&ctx_skein, hashB); + sph_cubehash256_init(&ctx_cube); + sph_cubehash256(&ctx_cube, hashA, 32); + sph_cubehash256_close(&ctx_cube, hashB); + sph_bmw256_init(&ctx_bmw); + sph_bmw256 (&ctx_bmw, hashB, 32); + sph_bmw256_close(&ctx_bmw, hashA); - sph_groestl256_init(&ctx_groestl); - sph_groestl256 (&ctx_groestl, hashB, 32); - sph_groestl256_close(&ctx_groestl, hashA); +//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]); - memcpy(state, hashA, 32); + memcpy(state, hashA, 32); } static const uint32_t diff1targ = 0x0000ffff; /* Used externally as confirmation of correct OCL code */ -int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) { uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); uint32_t data[20], ohash[8]; be32enc_vect(data, (const uint32_t *)pdata, 19); data[19] = htobe32(nonce); - lyra2rehash_old(ohash, data); + lyra2rev2hash(ohash, data); tmp_hash7 = be32toh(ohash[7]); applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", @@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t return 1; } -void lyra2reold_regenhash(struct work *work) +void lyra2rev2_regenhash(struct work *work) { uint32_t data[20]; uint32_t *nonce = (uint32_t *)(work->data + 76); @@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work) be32enc_vect(data, (const uint32_t *)work->data, 19); data[19] = htobe32(*nonce); - lyra2rehash_old(ohash, data); + lyra2rev2hash(ohash, data); } -bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, +bool scanhash_lyra2rev2(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, unsigned char *pdata, unsigned char __maybe_unused *phash1, unsigned char __maybe_unused *phash, const unsigned char *ptarget, uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) @@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse *nonce = ++n; data[19] = (n); - lyra2rehash_old(ostate, data); + lyra2rev2hash(ostate, data); tmp_hash7 = (ostate[7]); applog(LOG_INFO, "data7 %08lx", diff --git a/algorithm/lyra2rev2.h b/algorithm/lyra2rev2.h new file mode 100644 index 000000000..1a31f76f9 --- /dev/null +++ b/algorithm/lyra2rev2.h @@ -0,0 +1,11 @@ +#ifndef LYRA2REV2_H +#define LYRA2REV2_H + +#include "miner.h" +#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent +#define LYRA_SECBUF_SIZE (4) // (not used) +extern int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void lyra2rev2_regenhash(struct work *work); + +#endif /* LYRA2REV2_H */ diff --git a/driver-opencl.c b/driver-opencl.c index 0e45e5555..4a9d0693e 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1366,7 +1366,7 @@ static bool opencl_thread_init(struct thr_info *thr) static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work) { - if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REv2) { + if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REV2) { work->blk.work = work; precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data)); } diff --git a/kernel/lyra2rev2.cl b/kernel/lyra2rev2.cl index 0fe0440d6..a165a7517 100644 --- a/kernel/lyra2rev2.cl +++ b/kernel/lyra2rev2.cl @@ -31,8 +31,8 @@ // typedef unsigned int uint; #pragma OPENCL EXTENSION cl_amd_printf : enable -#ifndef LYRA2RE_CL -#define LYRA2RE_CL +#ifndef LYRA2REV2_CL +#define LYRA2REV2_CL #if __ENDIAN_LITTLE__ #define SPH_LITTLE_ENDIAN 1 @@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong } -#endif // LYRA2RE_CL \ No newline at end of file +#endif // LYRA2REV2_CL \ No newline at end of file diff --git a/ocl.c b/ocl.c index cb00790ff..98f337bdf 100644 --- a/ocl.c +++ b/ocl.c @@ -37,7 +37,7 @@ #include "algorithm/neoscrypt.h" #include "algorithm/pluck.h" #include "algorithm/yescrypt.h" -#include "algorithm/lyra2re.h" +#include "algorithm/lyra2rev2.h" /* FIXME: only here for global config vars, replace with configuration.h * or similar as soon as config is in a struct instead of littered all @@ -586,7 +586,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } // Lyra2re v2 TC - else if (cgpu->algorithm.type == ALGO_LYRA2REv2 && !cgpu->opt_tc) { + else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) { size_t glob_thread_count; long max_int; unsigned char type = 0; @@ -797,7 +797,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } - else if (algorithm->type == ALGO_LYRA2REv2) { + else if (algorithm->type == ALGO_LYRA2REV2) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency; buf1size = 4* 8 * cgpu->thread_concurrency; //matrix @@ -855,7 +855,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg return NULL; } } - else if (algorithm->type == ALGO_LYRA2REv2) { + else if (algorithm->type == ALGO_LYRA2REV2) { // need additionnal buffers clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); if (status != CL_SUCCESS && !clState->buffer1) { diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj index 6ec8a4cdc..e71685e42 100644 --- a/winbuild/sgminer.vcxproj +++ b/winbuild/sgminer.vcxproj @@ -265,6 +265,7 @@ + @@ -330,6 +331,7 @@ + diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters index 02c26210e..14c9ed2f3 100644 --- a/winbuild/sgminer.vcxproj.filters +++ b/winbuild/sgminer.vcxproj.filters @@ -218,6 +218,9 @@ Source Files\algorithm + + Source Files\algorithm + @@ -415,6 +418,9 @@ Header Files\algorithm + + Header Files\algorithm + From 186b75d51c47743151dd65e054d0e78daea96202 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 11 Feb 2016 21:28:17 +0100 Subject: [PATCH 30/63] New neoscrypt kernel --- kernel/neoscrypt.cl | 1091 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 1031 insertions(+), 60 deletions(-) diff --git a/kernel/neoscrypt.cl b/kernel/neoscrypt.cl index 7939d7ed2..9ffcad4bf 100644 --- a/kernel/neoscrypt.cl +++ b/kernel/neoscrypt.cl @@ -1,9 +1,32 @@ -/* NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 */ -/* Adapted and improved for 14.x drivers by Wolf9466 (Wolf`) */ +// NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 +// By Wolf (Wolf0 aka Wolf9466) // Stupid AMD compiler ignores the unroll pragma in these two + +// Tahiti 3/2, +// Hawaii 4/4 + notneededswap +// Pitcairn 3/4 + notneededswap +#if defined(__Tahiti__) +#define SALSA_SMALL_UNROLL 4 +#define CHACHA_SMALL_UNROLL 2 +//#define SWAP 1 +//#define SHITMAIN 1 +//#define WIDE_STRIPE 1 +#elif defined(__Pitcairn__) + #define SALSA_SMALL_UNROLL 3 -#define CHACHA_SMALL_UNROLL 3 +#define CHACHA_SMALL_UNROLL 2 +//#define SWAP 1 +//#define SHITMAIN 1 +//#define WIDE_STRIPE 1 + +#else +#define SALSA_SMALL_UNROLL 4 +#define CHACHA_SMALL_UNROLL 4 +//#define SWAP 1 +//#define SHITMAIN 1 +//#define WIDE_STRIPE 1 +#endif // If SMALL_BLAKE2S is defined, BLAKE2S_UNROLL is interpreted // as the unroll factor; must divide cleanly into ten. @@ -96,6 +119,28 @@ static const __constant uchar BLAKE2S_SIGMA[10][16] = b = rotate(b ^ c, 25U); \ } while(0) +#define BLAKE_PARALLEL_G1(idx0, a, b, c, d, key) do { \ + a += b + (uint4)(key[BLAKE2S_SIGMA[idx0][0]], key[BLAKE2S_SIGMA[idx0][2]], key[BLAKE2S_SIGMA[idx0][4]], key[BLAKE2S_SIGMA[idx0][6]]); \ + d = rotate(d ^ a, 16U); \ + c += d; \ + b = rotate(b ^ c, 20U); \ + a += b + (uint4)(key[BLAKE2S_SIGMA[idx0][1]], key[BLAKE2S_SIGMA[idx0][3]], key[BLAKE2S_SIGMA[idx0][5]], key[BLAKE2S_SIGMA[idx0][7]]); \ + d = rotate(d ^ a, 24U); \ + c += d; \ + b = rotate(b ^ c, 25U); \ +} while(0) + +#define BLAKE_PARALLEL_G2(idx0, a, b, c, d, key) do { \ + a += b + (uint4)(key[BLAKE2S_SIGMA[idx0][8]], key[BLAKE2S_SIGMA[idx0][10]], key[BLAKE2S_SIGMA[idx0][12]], key[BLAKE2S_SIGMA[idx0][14]]); \ + d = rotate(d ^ a, 16U); \ + c += d; \ + b = rotate(b ^ c, 20U); \ + a += b + (uint4)(key[BLAKE2S_SIGMA[idx0][9]], key[BLAKE2S_SIGMA[idx0][11]], key[BLAKE2S_SIGMA[idx0][13]], key[BLAKE2S_SIGMA[idx0][15]]); \ + d = rotate(d ^ a, 24U); \ + c += d; \ + b = rotate(b ^ c, 25U); \ +} while(0) + void Blake2S(uint *restrict inout, const uint *restrict inkey) { uint16 V; @@ -122,14 +167,17 @@ void Blake2S(uint *restrict inout, const uint *restrict inkey) #endif for(int x = 0; x < 10; ++x) { - BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inkey); + /*BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inkey); BLAKE_G(x, 0x02, V.s1, V.s5, V.s9, V.sd, inkey); BLAKE_G(x, 0x04, V.s2, V.s6, V.sa, V.se, inkey); BLAKE_G(x, 0x06, V.s3, V.s7, V.sb, V.sf, inkey); BLAKE_G(x, 0x08, V.s0, V.s5, V.sa, V.sf, inkey); BLAKE_G(x, 0x0A, V.s1, V.s6, V.sb, V.sc, inkey); BLAKE_G(x, 0x0C, V.s2, V.s7, V.s8, V.sd, inkey); - BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inkey); + BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inkey);*/ + + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inkey); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inkey); } // XOR low part of state with the high part, @@ -156,14 +204,17 @@ void Blake2S(uint *restrict inout, const uint *restrict inkey) #endif for(int x = 0; x < 10; ++x) { - BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inout); + /*BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inout); BLAKE_G(x, 0x02, V.s1, V.s5, V.s9, V.sd, inout); BLAKE_G(x, 0x04, V.s2, V.s6, V.sa, V.se, inout); BLAKE_G(x, 0x06, V.s3, V.s7, V.sb, V.sf, inout); BLAKE_G(x, 0x08, V.s0, V.s5, V.sa, V.sf, inout); BLAKE_G(x, 0x0A, V.s1, V.s6, V.sb, V.sc, inout); BLAKE_G(x, 0x0C, V.s2, V.s7, V.s8, V.sd, inout); - BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inout); + BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inout);*/ + + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inout); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inout); } // XOR low part of state with high part, then with input block @@ -227,15 +278,73 @@ void fastkdf(const uchar *restrict password, const uchar *restrict salt, const u { // Make the key buffer twice the size of the key so it fits a Blake2S block // This way, we don't need a temp buffer in the Blake2S function. - uchar input[BLAKE2S_BLOCK_SIZE], key[BLAKE2S_BLOCK_SIZE] = { 0 }; + uchar input[BLAKE2S_BLOCK_SIZE] __attribute__((aligned)), key[BLAKE2S_BLOCK_SIZE] __attribute__((aligned)) = { 0 }; // Copy input and key to their buffers CopyBytes(input, A + bufidx, BLAKE2S_BLOCK_SIZE); CopyBytes(key, B + bufidx, BLAKE2S_KEY_SIZE); // PRF - Blake2S((uint *)input, (uint *)key); + //Blake2S((uint *)input, (uint *)key); + + uint *inkey = (uint *)key, *inout = (uint *)input; + + // PRF + uint16 V; + uint8 tmpblock; + + // Load first block (IV into V.lo) and constants (IV into V.hi) + V.lo = V.hi = vload8(0U, BLAKE2S_IV); + + // XOR with initial constant + V.s0 ^= 0x01012020; + + // Copy input block for later + tmpblock = V.lo; + + // XOR length of message so far (including this block) + // There are two uints for this field, but high uint is zero + V.sc ^= BLAKE2S_BLOCK_SIZE; + + // Compress state, using the key as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inkey); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inkey); + } + + // XOR low part of state with the high part, + // then with the original input block. + V.lo ^= V.hi ^ tmpblock; + // Load constants (IV into V.hi) + V.hi = vload8(0U, BLAKE2S_IV); + + // Copy input block for later + tmpblock = V.lo; + + // XOR length of message into block again + V.sc ^= BLAKE2S_BLOCK_SIZE << 1; + + // Last block compression - XOR final constant into state + V.se ^= 0xFFFFFFFFU; + + // Compress block, using the input as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inout); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inout); + } + + // XOR low part of state with high part, then with input block + V.lo ^= V.hi ^ tmpblock; + + // Store result in input/output buffer + vstore8(V.lo, 0, inout); + + // Calculate the next buffer pointer bufidx = 0; @@ -284,7 +393,475 @@ void fastkdf(const uchar *restrict password, const uchar *restrict salt, const u } } -#define SALSA_CORE(state) do { \ +/* FastKDF, a fast buffered key derivation function: + * FASTKDF_BUFFER_SIZE must be a power of 2; + * password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE; + * prf_output_size must be <= prf_key_size; */ +void fastkdf1(const uchar password[80], uchar output[256]) +{ + + /* WARNING! + * This algorithm uses byte-wise addressing for memory blocks. + * Or in other words, trying to copy an unaligned memory region + * will significantly slow down the algorithm, when copying uses + * words or bigger entities. It even may corrupt the data, when + * the device does not support it properly. + * Therefore use byte copying, which will not the fastest but at + * least get reliable results. */ + + // BLOCK_SIZE 64U + // FASTKDF_BUFFER_SIZE 256U + // BLAKE2S_BLOCK_SIZE 64U + // BLAKE2S_KEY_SIZE 32U + // BLAKE2S_OUT_SIZE 32U + uchar bufidx = 0; + uint8 Abuffer[9], Bbuffer[9] = { (uint8)(0) }; + uchar *A = (uchar *)Abuffer, *B = (uchar *)Bbuffer; + + // Initialize the password buffer + #pragma unroll 1 + for(int i = 0; i < (FASTKDF_BUFFER_SIZE >> 3); ++i) ((ulong *)B)[i] = ((ulong *)A)[i] = ((ulong *)password)[i % 10]; + + ((uint16 *)(B + FASTKDF_BUFFER_SIZE))[0] = ((uint16 *)(A + FASTKDF_BUFFER_SIZE))[0] = ((uint16 *)password)[0]; + + // The primary iteration + #pragma unroll 1 + for(int i = 0; i < 32; ++i) + { + // Make the key buffer twice the size of the key so it fits a Blake2S block + // This way, we don't need a temp buffer in the Blake2S function. + uchar input[BLAKE2S_BLOCK_SIZE] __attribute__((aligned)), key[BLAKE2S_BLOCK_SIZE] __attribute__((aligned)) = { 0 }; + + // Copy input and key to their buffers + CopyBytes(input, A + bufidx, BLAKE2S_BLOCK_SIZE); + CopyBytes(key, B + bufidx, BLAKE2S_KEY_SIZE); + + uint *inkey = (uint *)key, *inout = (uint *)input; + + #ifndef __Hawaii__ + + // PRF + uint4 V[4]; + uint8 tmpblock; + + tmpblock = vload8(0U, BLAKE2S_IV); + + V[0] = V[2] = tmpblock.lo; + V[1] = V[3] = tmpblock.hi; + + V[0].s0 ^= 0x01012020U; + tmpblock.lo = V[0]; + + V[3].s0 ^= BLAKE2S_BLOCK_SIZE; + + // Compress state, using the key as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V[0], V[1], V[2], V[3], inkey); + BLAKE_PARALLEL_G2(x, V[0], V[1].s1230, V[2].s2301, V[3].s3012, inkey); + } + + V[0] ^= V[2] ^ tmpblock.lo; + V[1] ^= V[3] ^ tmpblock.hi; + + V[2] = vload4(0U, BLAKE2S_IV); + V[3] = vload4(1U, BLAKE2S_IV); + + tmpblock.lo = V[0]; + tmpblock.hi = V[1]; + + V[3].s0 ^= BLAKE2S_BLOCK_SIZE << 1; + V[3].s2 ^= 0xFFFFFFFFU; + + // Compress block, using the input as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V[0], V[1], V[2], V[3], inout); + BLAKE_PARALLEL_G2(x, V[0], V[1].s1230, V[2].s2301, V[3].s3012, inout); + } + + V[0] ^= V[2] ^ tmpblock.lo; + V[1] ^= V[3] ^ tmpblock.hi; + + vstore4(V[0], 0, inout); + vstore4(V[1], 1, inout); + + #else + + // PRF + uint16 V; + uint8 tmpblock; + + // Load first block (IV into V.lo) and constants (IV into V.hi) + V.lo = V.hi = vload8(0U, BLAKE2S_IV); + + // XOR with initial constant + V.s0 ^= 0x01012020; + + // Copy input block for later + tmpblock = V.lo; + + // XOR length of message so far (including this block) + // There are two uints for this field, but high uint is zero + V.sc ^= BLAKE2S_BLOCK_SIZE; + + // Compress state, using the key as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inkey); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inkey); + } + + // XOR low part of state with the high part, + // then with the original input block. + V.lo ^= V.hi ^ tmpblock; + + // Load constants (IV into V.hi) + V.hi = vload8(0U, BLAKE2S_IV); + + // Copy input block for later + tmpblock = V.lo; + + // XOR length of message into block again + V.sc ^= BLAKE2S_BLOCK_SIZE << 1; + + // Last block compression - XOR final constant into state + V.se ^= 0xFFFFFFFFU; + + // Compress block, using the input as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inout); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inout); + } + + // XOR low part of state with high part, then with input block + V.lo ^= V.hi ^ tmpblock; + + // Store result in input/output buffer + vstore8(V.lo, 0, inout); + + #endif + + // Calculate the next buffer pointer + bufidx = 0; + + for(int x = 0; x < BLAKE2S_OUT_SIZE; ++x) + bufidx += input[x]; + + // bufidx a uchar now - always mod 255 + //bufidx &= (FASTKDF_BUFFER_SIZE - 1); + + // Modify the salt buffer + XORBytesInPlace(B + bufidx, input, BLAKE2S_OUT_SIZE); + + if(bufidx < BLAKE2S_KEY_SIZE) + { + // Head modified, tail updated + // this was made off the original code... wtf + //CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, min(BLAKE2S_OUT_SIZE, BLAKE2S_KEY_SIZE - bufidx)); + CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, BLAKE2S_KEY_SIZE - bufidx); + } + else if((FASTKDF_BUFFER_SIZE - bufidx) < BLAKE2S_OUT_SIZE) + { + // Tail modified, head updated + CopyBytes(B, B + FASTKDF_BUFFER_SIZE, BLAKE2S_OUT_SIZE - (FASTKDF_BUFFER_SIZE - bufidx)); + } + } + + // Modify and copy into the output buffer + + // Damned compiler crashes + // Fuck you, AMD + + //for(uint i = 0; i < output_len; ++i, ++bufidx) + // output[i] = B[bufidx] ^ A[i]; + + uint left = FASTKDF_BUFFER_SIZE - bufidx; + //uint left = (~bufidx) + 1 + + if(left < 256) + { + XORBytes(output, B + bufidx, A, left); + XORBytes(output + left, B, A + left, 256 - left); + } + else + { + XORBytes(output, B + bufidx, A, 256); + } +} + +/* FastKDF, a fast buffered key derivation function: + * FASTKDF_BUFFER_SIZE must be a power of 2; + * password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE; + * prf_output_size must be <= prf_key_size; */ +void fastkdf2(const uchar password[80], const uchar salt[256], __global uint* restrict output, const uint target) +{ + + /* WARNING! + * This algorithm uses byte-wise addressing for memory blocks. + * Or in other words, trying to copy an unaligned memory region + * will significantly slow down the algorithm, when copying uses + * words or bigger entities. It even may corrupt the data, when + * the device does not support it properly. + * Therefore use byte copying, which will not the fastest but at + * least get reliable results. */ + + // BLOCK_SIZE 64U + // FASTKDF_BUFFER_SIZE 256U + // BLAKE2S_BLOCK_SIZE 64U + // BLAKE2S_KEY_SIZE 32U + // BLAKE2S_OUT_SIZE 32U + // salt_len == 256, output_len == 32 + uchar bufidx = 0; + uint8 Abuffer[9], Bbuffer[9] = { (uint8)(0) }; + uchar *A = (uchar *)Abuffer, *B = (uchar *)Bbuffer; + //uchar A[256], B[256]; + + // Initialize the password buffer + #pragma unroll 1 + for(int i = 0; i < (FASTKDF_BUFFER_SIZE >> 3); ++i) ((ulong *)A)[i] = ((ulong *)password)[i % 10]; + + ((uint16 *)(A + FASTKDF_BUFFER_SIZE))[0] = ((uint16 *)password)[0]; + + // Initialize the salt buffer + ((ulong16 *)B)[0] = ((ulong16 *)B)[2] = ((ulong16 *)salt)[0]; + ((ulong16 *)B)[1] = ((ulong16 *)B)[3] = ((ulong16 *)salt)[1]; + + // The primary iteration + #pragma unroll 1 + for(int i = 0; i < 32; ++i) + { + // Make the key buffer twice the size of the key so it fits a Blake2S block + // This way, we don't need a temp buffer in the Blake2S function. + uchar input[BLAKE2S_BLOCK_SIZE] __attribute__((aligned)), key[BLAKE2S_BLOCK_SIZE] __attribute__((aligned)) = { 0 }; + + // Copy input and key to their buffers + CopyBytes(input, A + bufidx, BLAKE2S_BLOCK_SIZE); + CopyBytes(key, B + bufidx, BLAKE2S_KEY_SIZE); + + uint *inkey = (uint *)key, *inout = (uint *)input; + + #ifndef __Hawaii__ + + // PRF + uint4 V[4]; + uint8 tmpblock; + + tmpblock = vload8(0U, BLAKE2S_IV); + + V[0] = V[2] = tmpblock.lo; + V[1] = V[3] = tmpblock.hi; + + V[0].s0 ^= 0x01012020U; + tmpblock.lo = V[0]; + + V[3].s0 ^= BLAKE2S_BLOCK_SIZE; + + // Compress state, using the key as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V[0], V[1], V[2], V[3], inkey); + BLAKE_PARALLEL_G2(x, V[0], V[1].s1230, V[2].s2301, V[3].s3012, inkey); + } + + V[0] ^= V[2] ^ tmpblock.lo; + V[1] ^= V[3] ^ tmpblock.hi; + + V[2] = vload4(0U, BLAKE2S_IV); + V[3] = vload4(1U, BLAKE2S_IV); + + tmpblock.lo = V[0]; + tmpblock.hi = V[1]; + + V[3].s0 ^= BLAKE2S_BLOCK_SIZE << 1; + V[3].s2 ^= 0xFFFFFFFFU; + + // Compress block, using the input as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V[0], V[1], V[2], V[3], inout); + BLAKE_PARALLEL_G2(x, V[0], V[1].s1230, V[2].s2301, V[3].s3012, inout); + } + + V[0] ^= V[2] ^ tmpblock.lo; + V[1] ^= V[3] ^ tmpblock.hi; + + vstore4(V[0], 0, inout); + vstore4(V[1], 1, inout); + + #else + + // PRF + uint16 V; + uint8 tmpblock; + + // Load first block (IV into V.lo) and constants (IV into V.hi) + V.lo = V.hi = vload8(0U, BLAKE2S_IV); + + // XOR with initial constant + V.s0 ^= 0x01012020; + + // Copy input block for later + tmpblock = V.lo; + + // XOR length of message so far (including this block) + // There are two uints for this field, but high uint is zero + V.sc ^= BLAKE2S_BLOCK_SIZE; + + // Compress state, using the key as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inkey); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inkey); + } + + // XOR low part of state with the high part, + // then with the original input block. + V.lo ^= V.hi ^ tmpblock; + + // Load constants (IV into V.hi) + V.hi = vload8(0U, BLAKE2S_IV); + + // Copy input block for later + tmpblock = V.lo; + + // XOR length of message into block again + V.sc ^= BLAKE2S_BLOCK_SIZE << 1; + + // Last block compression - XOR final constant into state + V.se ^= 0xFFFFFFFFU; + + // Compress block, using the input as the key + #pragma unroll + for(int x = 0; x < 10; ++x) + { + BLAKE_PARALLEL_G1(x, V.s0123, V.s4567, V.s89ab, V.scdef, inout); + BLAKE_PARALLEL_G2(x, V.s0123, V.s5674, V.sab89, V.sfcde, inout); + } + + // XOR low part of state with high part, then with input block + V.lo ^= V.hi ^ tmpblock; + + // Store result in input/output buffer + vstore8(V.lo, 0, inout); + #endif + + // Calculate the next buffer pointer + bufidx = 0; + + for(int x = 0; x < BLAKE2S_OUT_SIZE; ++x) + bufidx += input[x]; + + // bufidx a uchar now - always mod 255 + //bufidx &= (FASTKDF_BUFFER_SIZE - 1); + + // Modify the salt buffer + XORBytesInPlace(B + bufidx, input, BLAKE2S_OUT_SIZE); + + if(bufidx < BLAKE2S_KEY_SIZE) + { + // Head modified, tail updated + // this was made off the original code... wtf + //CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, min(BLAKE2S_OUT_SIZE, BLAKE2S_KEY_SIZE - bufidx)); + CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, BLAKE2S_KEY_SIZE - bufidx); + } + else if((FASTKDF_BUFFER_SIZE - bufidx) < BLAKE2S_OUT_SIZE) + { + // Tail modified, head updated + CopyBytes(B, B + FASTKDF_BUFFER_SIZE, BLAKE2S_OUT_SIZE - (FASTKDF_BUFFER_SIZE - bufidx)); + } + } + + // Modify and copy into the output buffer + + // Damned compiler crashes + // Fuck you, AMD + + uchar outbuf[32]; + + for(uint i = 0; i < 32; ++i, ++bufidx) + outbuf[i] = B[bufidx] ^ A[i]; + + /*uint left = FASTKDF_BUFFER_SIZE - bufidx; + //uint left = (~bufidx) + 1 + uchar outbuf[32]; + + if(left < 32) + { + XORBytes(outbuf, B + bufidx, A, left); + XORBytes(outbuf + left, B, A + left, 32 - left); + } + else + { + XORBytes(outbuf, B + bufidx, A, 32); + }*/ + + if(((uint *)outbuf)[7] <= target) output[atomic_add(output + 0xFF, 1)] = get_global_id(0); + +} + +/* + s0 s1 s2 s3 + s4 s5 s6 s7 + s8 s9 sa sb + sc sd se sf +shittify: +s0=s4 +s1=s9 +s2=se +s3=s3 +s4=s8 +s5=sd +s6=s2 +s7=s7 +s8=sc +s9=s1 +sa=s6 +sb=sb +sc=s0 +sd=s5 +se=sa +sf=sf +unshittify: +s0=sc +s1=s9 +s2=s6 +s3=s3 +s4=s0 +s5=sd +s6=sa +s7=s7 +s8=s4 +s9=s1 +sa=se +sb=sb +sc=s8 +sd=s5 +se=s2 +sf=sf + +*/ + +#define SALSA_CORE(state) do { \ + state[0] ^= rotate(state[3] + state[2], 7U); \ + state[1] ^= rotate(state[0] + state[3], 9U); \ + state[2] ^= rotate(state[1] + state[0], 13U); \ + state[3] ^= rotate(state[2] + state[1], 18U); \ + state[2] ^= rotate(state[3].wxyz + state[0].zwxy, 7U); \ + state[1] ^= rotate(state[2].wxyz + state[3].zwxy, 9U); \ + state[0] ^= rotate(state[1].wxyz + state[2].zwxy, 13U); \ + state[3] ^= rotate(state[0].wxyz + state[1].zwxy, 18U); \ +} while(0) + +#define SALSA_CORE_SCALAR(state) do { \ state.s4 ^= rotate(state.s0 + state.sc, 7U); state.s8 ^= rotate(state.s4 + state.s0, 9U); state.sc ^= rotate(state.s8 + state.s4, 13U); state.s0 ^= rotate(state.sc + state.s8, 18U); \ state.s9 ^= rotate(state.s5 + state.s1, 7U); state.sd ^= rotate(state.s9 + state.s5, 9U); state.s1 ^= rotate(state.sd + state.s9, 13U); state.s5 ^= rotate(state.s1 + state.sd, 18U); \ state.se ^= rotate(state.sa + state.s6, 7U); state.s2 ^= rotate(state.se + state.sa, 9U); state.s6 ^= rotate(state.s2 + state.se, 13U); state.sa ^= rotate(state.s6 + state.s2, 18U); \ @@ -295,10 +872,18 @@ void fastkdf(const uchar *restrict password, const uchar *restrict salt, const u state.sc ^= rotate(state.sf + state.se, 7U); state.sd ^= rotate(state.sc + state.sf, 9U); state.se ^= rotate(state.sd + state.sc, 13U); state.sf ^= rotate(state.se + state.sd, 18U); \ } while(0) -uint16 salsa_small_scalar_rnd(uint16 X) +uint16 salsa_small_parallel_rnd(uint16 X) { - uint16 st = X; - +#ifndef SHITMAIN + uint4 st[4] = { (uint4)(X.s4, X.s9, X.se, X.s3), + (uint4)(X.s8, X.sd, X.s2, X.s7), + (uint4)(X.sc, X.s1, X.s6, X.sb), + (uint4)(X.s0, X.s5, X.sa, X.sf) }; +#else + uint4 st[4]; + ((uint16 *)st)[0] = X; +#endif + #if SALSA_SMALL_UNROLL == 1 for(int i = 0; i < 10; ++i) @@ -335,7 +920,7 @@ uint16 salsa_small_scalar_rnd(uint16 X) SALSA_CORE(st); } - #else + #elif SALSA_SMALL_UNROLL == 5 for(int i = 0; i < 2; ++i) { @@ -346,26 +931,114 @@ uint16 salsa_small_scalar_rnd(uint16 X) SALSA_CORE(st); } + #else + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + SALSA_CORE(st); + #endif +#ifndef SHITMAIN + return(X + (uint16)( + st[3].x, st[2].y, st[1].z, st[0].w, + st[0].x, st[3].y, st[2].z, st[1].w, + st[1].x, st[0].y, st[3].z, st[2].w, + st[2].x, st[1].y, st[0].z, st[3].w)); +#else + return(X + ((uint16 *)st)[0]); +#endif +} + +uint16 salsa_small_scalar_rnd(uint16 X) +{ + uint16 st = X; + + #if SALSA_SMALL_UNROLL == 1 + + for(int i = 0; i < 10; ++i) + { + SALSA_CORE_SCALAR(st); + } + + #elif SALSA_SMALL_UNROLL == 2 + + for(int i = 0; i < 5; ++i) + { + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + } + + #elif SALSA_SMALL_UNROLL == 3 + + for(int i = 0; i < 4; ++i) + { + SALSA_CORE_SCALAR(st); + if(i == 3) break; + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + } + + #elif SALSA_SMALL_UNROLL == 4 + + for(int i = 0; i < 3; ++i) + { + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + if(i == 2) break; + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + } + + #else + + for(int i = 0; i < 2; ++i) + { + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + SALSA_CORE_SCALAR(st); + } + + #endif + return(X + st); } + #define CHACHA_CORE_PARALLEL(state) do { \ - state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \ - state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(12U, 12U, 12U, 12U)); \ - state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \ - state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(7U, 7U, 7U, 7U)); \ + state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], 16U); \ + state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], 12U); \ + state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], 8U); \ + state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], 7U); \ \ - state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \ - state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(12U, 12U, 12U, 12U)); \ - state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \ - state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(7U, 7U, 7U, 7U)); \ + state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], 16); \ + state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, 12U); \ + state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], 8U); \ + state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, 7U); \ +} while(0) + +#define CHACHA_CORE(state) do { \ + state.s0 += state.s4; state.sc = as_uint(as_ushort2(state.sc ^ state.s0).s10); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 12U); state.s0 += state.s4; state.sc = rotate(state.sc ^ state.s0, 8U); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 7U); \ + state.s1 += state.s5; state.sd = as_uint(as_ushort2(state.sd ^ state.s1).s10); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 12U); state.s1 += state.s5; state.sd = rotate(state.sd ^ state.s1, 8U); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 7U); \ + state.s2 += state.s6; state.se = as_uint(as_ushort2(state.se ^ state.s2).s10); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 12U); state.s2 += state.s6; state.se = rotate(state.se ^ state.s2, 8U); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 7U); \ + state.s3 += state.s7; state.sf = as_uint(as_ushort2(state.sf ^ state.s3).s10); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 12U); state.s3 += state.s7; state.sf = rotate(state.sf ^ state.s3, 8U); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 7U); \ + state.s0 += state.s5; state.sf = as_uint(as_ushort2(state.sf ^ state.s0).s10); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 12U); state.s0 += state.s5; state.sf = rotate(state.sf ^ state.s0, 8U); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 7U); \ + state.s1 += state.s6; state.sc = as_uint(as_ushort2(state.sc ^ state.s1).s10); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 12U); state.s1 += state.s6; state.sc = rotate(state.sc ^ state.s1, 8U); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 7U); \ + state.s2 += state.s7; state.sd = as_uint(as_ushort2(state.sd ^ state.s2).s10); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 12U); state.s2 += state.s7; state.sd = rotate(state.sd ^ state.s2, 8U); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 7U); \ + state.s3 += state.s4; state.se = as_uint(as_ushort2(state.se ^ state.s3).s10); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 12U); state.s3 += state.s4; state.se = rotate(state.se ^ state.s3, 8U); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 7U); \ } while(0) uint16 chacha_small_parallel_rnd(uint16 X) { - uint4 t, st[4]; + uint4 st[4]; ((uint16 *)st)[0] = X; @@ -405,7 +1078,7 @@ uint16 chacha_small_parallel_rnd(uint16 X) CHACHA_CORE_PARALLEL(st); } - #else + #elif CHACHA_SMALL_UNROLL == 5 for(int i = 0; i < 2; ++i) { @@ -415,15 +1088,95 @@ uint16 chacha_small_parallel_rnd(uint16 X) CHACHA_CORE_PARALLEL(st); CHACHA_CORE_PARALLEL(st); } + #else + + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); + CHACHA_CORE_PARALLEL(st); #endif return(X + ((uint16 *)st)[0]); } -void neoscrypt_blkmix(uint16 *XV, bool alg) -{ +uint16 chacha_small_scalar_rnd(uint16 X) +{ + uint16 st = X; + + #if CHACHA_SMALL_UNROLL == 1 + + for(int i = 0; i < 10; ++i) + { + CHACHA_CORE(st); + } + + #elif CHACHA_SMALL_UNROLL == 2 + + for(int i = 0; i < 5; ++i) + { + CHACHA_CORE(st); + CHACHA_CORE(st); + } + + #elif CHACHA_SMALL_UNROLL == 3 + + for(int i = 0; i < 4; ++i) + { + CHACHA_CORE(st); + if(i == 3) break; + CHACHA_CORE(st); + CHACHA_CORE(st); + } + + #elif CHACHA_SMALL_UNROLL == 4 + + for(int i = 0; i < 3; ++i) + { + CHACHA_CORE(st); + CHACHA_CORE(st); + if(i == 2) break; + CHACHA_CORE(st); + CHACHA_CORE(st); + } + + #elif CHACHA_SMALL_UNROLL == 5 + + for(int i = 0; i < 2; ++i) + { + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + } + + #else + + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + + #endif + + return(X + st); +} +void neoscrypt_blkmix_salsa(uint16 XV[4]) +{ /* NeoScrypt flow: Scrypt flow: Xa ^= Xd; M(Xa'); Ya = Xa"; Xa ^= Xb; M(Xa'); Ya = Xa"; Xb ^= Xa"; M(Xb'); Yb = Xb"; Xb ^= Xa"; M(Xb'); Yb = Xb"; @@ -431,48 +1184,135 @@ void neoscrypt_blkmix(uint16 *XV, bool alg) Xd ^= Xc"; M(Xd'); Yd = Xd"; Xb" = Yb; Xa" = Ya; Xb" = Yc; Xc" = Yb; Xd" = Yd; */ - +#if 0 + for(int i = 0; i < 4; ++i) XV[i] = (uint16)( + XV[i].s4, XV[i].s9, XV[i].se, XV[i].s3, XV[i].s8, XV[i].sd, XV[i].s2, XV[i].s7, + XV[i].sc, XV[i].s1, XV[i].s6, XV[i].sb, XV[i].s0, XV[i].s5, XV[i].sa, XV[i].sf); +#endif XV[0] ^= XV[3]; - if(!alg) - { - XV[0] = salsa_small_scalar_rnd(XV[0]); XV[1] ^= XV[0]; - XV[1] = salsa_small_scalar_rnd(XV[1]); XV[2] ^= XV[1]; - XV[2] = salsa_small_scalar_rnd(XV[2]); XV[3] ^= XV[2]; - XV[3] = salsa_small_scalar_rnd(XV[3]); - } - else - { - XV[0] = chacha_small_parallel_rnd(XV[0]); XV[1] ^= XV[0]; - XV[1] = chacha_small_parallel_rnd(XV[1]); XV[2] ^= XV[1]; - XV[2] = chacha_small_parallel_rnd(XV[2]); XV[3] ^= XV[2]; - XV[3] = chacha_small_parallel_rnd(XV[3]); - } + XV[0] = salsa_small_parallel_rnd(XV[0]); XV[1] ^= XV[0]; + XV[1] = salsa_small_parallel_rnd(XV[1]); XV[2] ^= XV[1]; + XV[2] = salsa_small_parallel_rnd(XV[2]); XV[3] ^= XV[2]; + XV[3] = salsa_small_parallel_rnd(XV[3]); + + //XV[0] = salsa_small_scalar_rnd(XV[0]); XV[1] ^= XV[0]; + //XV[1] = salsa_small_scalar_rnd(XV[1]); XV[2] ^= XV[1]; + //XV[2] = salsa_small_scalar_rnd(XV[2]); XV[3] ^= XV[2]; + //XV[3] = salsa_small_scalar_rnd(XV[3]); + + XV[1] ^= XV[2]; + XV[2] ^= XV[1]; + XV[1] ^= XV[2]; +#if 0 + XV[0] = (uint16)(XV[0].sc, XV[0].s9, XV[0].s6, XV[0].s3, XV[0].s0, XV[0].sd, XV[0].sa, XV[0].s7, XV[0].s4, XV[0].s1, XV[0].se, XV[0].sb, XV[0].s8, XV[0].s5, XV[0].s2, XV[0].sf); + XV[1] = (uint16)(XV[1].sc, XV[1].s9, XV[1].s6, XV[1].s3, XV[1].s0, XV[1].sd, XV[1].sa, XV[1].s7, XV[1].s4, XV[1].s1, XV[1].se, XV[1].sb, XV[1].s8, XV[1].s5, XV[1].s2, XV[1].sf); + XV[2] = (uint16)(XV[2].sc, XV[2].s9, XV[2].s6, XV[2].s3, XV[2].s0, XV[2].sd, XV[2].sa, XV[2].s7, XV[2].s4, XV[2].s1, XV[2].se, XV[2].sb, XV[2].s8, XV[2].s5, XV[2].s2, XV[2].sf); + XV[3] = (uint16)(XV[3].sc, XV[3].s9, XV[3].s6, XV[3].s3, XV[3].s0, XV[3].sd, XV[3].sa, XV[3].s7, XV[3].s4, XV[3].s1, XV[3].se, XV[3].sb, XV[3].s8, XV[3].s5, XV[3].s2, XV[3].sf); +#endif +} + +void neoscrypt_blkmix_chacha(uint16 XV[4]) +{ + + /* NeoScrypt flow: Scrypt flow: + Xa ^= Xd; M(Xa'); Ya = Xa"; Xa ^= Xb; M(Xa'); Ya = Xa"; + Xb ^= Xa"; M(Xb'); Yb = Xb"; Xb ^= Xa"; M(Xb'); Yb = Xb"; + Xc ^= Xb"; M(Xc'); Yc = Xc"; Xa" = Ya; + Xd ^= Xc"; M(Xd'); Yd = Xd"; Xb" = Yb; + Xa" = Ya; Xb" = Yc; + Xc" = Yb; Xd" = Yd; */ + XV[0] ^= XV[3]; + + #if 1 + + XV[0] = chacha_small_parallel_rnd(XV[0]); XV[1] ^= XV[0]; + XV[1] = chacha_small_parallel_rnd(XV[1]); XV[2] ^= XV[1]; + XV[2] = chacha_small_parallel_rnd(XV[2]); XV[3] ^= XV[2]; + XV[3] = chacha_small_parallel_rnd(XV[3]); + + #else + + XV[0] = chacha_small_scalar_rnd(XV[0]); XV[1] ^= XV[0]; + XV[1] = chacha_small_scalar_rnd(XV[1]); XV[2] ^= XV[1]; + XV[2] = chacha_small_scalar_rnd(XV[2]); XV[3] ^= XV[2]; + XV[3] = chacha_small_scalar_rnd(XV[3]); + + #endif + XV[1] ^= XV[2]; XV[2] ^= XV[1]; XV[1] ^= XV[2]; } +#ifdef WIDE_STRIPE + +void ScratchpadStore(__global void *V, void *X, uchar idx) +{ + ((__global ulong16 *)V)[mul24(idx << 1, (int)get_global_size(0))] = ((ulong16 *)X)[0]; + ((__global ulong16 *)V)[mul24((idx << 1), (int)get_global_size(0)) + 1] = ((ulong16 *)X)[1]; + //const uint idx2 = mul24(idx << 2, (int)get_global_size(0)); + //#pragma unroll + //for(int i = 0; i < 4; ++i) ((__global uint16 *)V)[idx2 + i] = ((uint16 *)X)[i]; +} + +void ScratchpadMix(void *X, const __global void *V, uchar idx) +{ + ((ulong16 *)X)[0] ^= ((__global ulong16 *)V)[mul24(idx << 1, (int)get_global_size(0))]; + ((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[mul24((idx << 1), (int)get_global_size(0)) + 1]; +} + +#else + void ScratchpadStore(__global void *V, void *X, uchar idx) { - ((__global ulong16 *)V)[idx << 1] = ((ulong16 *)X)[0]; - ((__global ulong16 *)V)[(idx << 1) + 1] = ((ulong16 *)X)[1]; + ((__global ulong16 *)V)[mul24(idx << 1, (int)get_global_size(0))] = ((ulong16 *)X)[0]; + ((__global ulong16 *)V)[mul24((idx << 1) + 1, (int)get_global_size(0))] = ((ulong16 *)X)[1]; } void ScratchpadMix(void *X, const __global void *V, uchar idx) { - ((ulong16 *)X)[0] ^= ((__global ulong16 *)V)[idx << 1]; - ((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[(idx << 1) + 1]; + ((ulong16 *)X)[0] ^= ((__global ulong16 *)V)[mul24(idx << 1, (int)get_global_size(0))]; + ((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[mul24((idx << 1) + 1, (int)get_global_size(0))]; +} + +#endif + + + +#define SALSA_PERM (uint16)(4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11, 0, 5, 10, 15) +#define SALSA_INV_PERM (uint16)(12, 9, 6, 3, 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15) + +void SMix_Salsa(uint16 X[4], __global uint16 *V) +{ + #pragma unroll 1 + for(int i = 0; i < 128; ++i) + { + ScratchpadStore(V, X, i); + neoscrypt_blkmix_salsa(X); + } + + #pragma unroll 1 + for(int i = 0; i < 128; ++i) + { + #ifdef SHITMAIN + const uint idx = convert_uchar(((uint *)X)[60] & 0x7F); + #else + const uint idx = convert_uchar(((uint *)X)[48] & 0x7F); + #endif + ScratchpadMix(X, V, idx); + neoscrypt_blkmix_salsa(X); + } } -void SMix(uint16 *X, __global uint16 *V, bool flag) +void SMix_Chacha(uint16 X[4], __global uint16 *V) { #pragma unroll 1 for(int i = 0; i < 128; ++i) { ScratchpadStore(V, X, i); - neoscrypt_blkmix(X, flag); + neoscrypt_blkmix_chacha(X); } #pragma unroll 1 @@ -480,10 +1320,13 @@ void SMix(uint16 *X, __global uint16 *V, bool flag) { const uint idx = convert_uchar(((uint *)X)[48] & 0x7F); ScratchpadMix(X, V, idx); - neoscrypt_blkmix(X, flag); + neoscrypt_blkmix_chacha(X); } } +#define SALSA_PERM (uint16)(4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11, 0, 5, 10, 15) +#define SALSA_INV_PERM (uint16)(12, 9, 6, 3, 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15) + __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, const uint target) { @@ -491,9 +1334,12 @@ __kernel void search(__global const uchar* restrict input, __global uint* restri #define CONSTANT_r 2 // X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha uint16 X[4], Z[4]; - /* V = CONSTANT_N * CONSTANT_r * 2 * BLOCK_SIZE */ - __global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS))); - uchar outbuf[32]; + #ifdef WIDE_STRIPE + __global ulong16 *V = ((__global ulong16 *)padcache) + ((get_global_id(0) % get_global_size(0)) << 1); + #else + __global ulong16 *V = ((__global ulong16 *)(padcache) + (get_global_id(0) % get_global_size(0))); + #endif + //uchar outbuf[32]; uchar data[PASSWORD_LEN]; ((ulong8 *)data)[0] = ((__global const ulong8 *)input)[0]; @@ -502,24 +1348,149 @@ __kernel void search(__global const uchar* restrict input, __global uint* restri ((uint *)data)[19] = get_global_id(0); // X = KDF(password, salt) - fastkdf(data, data, PASSWORD_LEN, (uchar *)X, 256); - + //fastkdf(data, data, PASSWORD_LEN, (uchar *)X, 256); + fastkdf1(data, (uchar *)X); + + #ifndef SHITMAIN // Process ChaCha 1st, Salsa 2nd and XOR them - run that through PBKDF2 CopyBytes128(Z, X, 2); - + #else + + #pragma unroll + for(int i = 0; i < 4; ++i) ((uint16 *)Z)[i] = shuffle(((uint16 *)X)[i], SALSA_PERM); + + #endif + // X = SMix(X); X & Z are swapped, repeat. - for(bool flag = false;; ++flag) + for(int i = 0;; ++i) { - SMix(X, V, flag); - if(flag) break; + #ifdef SWAP + if (i) SMix_Salsa(X,V); else SMix_Chacha(X,V); + if(i) break; SwapBytes128(X, Z, 256); + #else + if (i) SMix_Chacha(X,V); else SMix_Salsa(Z,V); + if(i) break; + #endif } + + #if defined(SWAP) && defined(SHITMAIN) + #pragma unroll + for(int i = 0; i < 4; ++i) ((uint16 *)Z)[i] ^= shuffle(((uint16 *)X)[i], SALSA_INV_PERM); + fastkdf2(data, (uchar *)Z, output, target); + #elif defined(SHITMAIN) + #pragma unroll + for(int i = 0; i < 4; ++i) ((uint16 *)X)[i] ^= shuffle(((uint16 *)Z)[i], SALSA_INV_PERM); + fastkdf2(data, (uchar *)X, output, target); + #else + // blkxor(X, Z) + ((ulong16 *)X)[0] ^= ((ulong16 *)Z)[0]; + ((ulong16 *)X)[1] ^= ((ulong16 *)Z)[1]; + + // output = KDF(password, X) + //fastkdf(data, (uchar *)X, FASTKDF_BUFFER_SIZE, outbuf, 32); + fastkdf2(data, (uchar *)X, output, target); + #endif +} + + +/* +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uchar* restrict input, __global uint16 *XZOutput) +{ +#define CONSTANT_N 128 +#define CONSTANT_r 2 + // X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha + uint16 X[4]; + XZOutput += (4 * 2 * get_global_id(0)); + + //uchar outbuf[32]; + uchar data[PASSWORD_LEN]; + + ((ulong8 *)data)[0] = ((__global const ulong8 *)input)[0]; + ((ulong *)data)[8] = ((__global const ulong *)input)[8]; + ((uint *)data)[18] = ((__global const uint *)input)[18]; + ((uint *)data)[19] = get_global_id(0); + // X = KDF(password, salt) + //fastkdf(data, data, PASSWORD_LEN, (uchar *)X, 256); + fastkdf1(data, (uchar *)X); + + for(int i = 0; i < 4; ++i) XZOutput[i] = X[i]; + for(int i = 0; i < 4; ++i) XZOutput[i + 4] = X[i]; + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global uint16 *XZOutput, __global uchar *padcache) +{ +#define CONSTANT_N 128 +#define CONSTANT_r 2 + // X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha + uint16 X[4], Z[4]; + #ifdef WIDE_STRIPE + __global ulong16 *V = ((__global ulong16 *)padcache) + ((get_global_id(0) % get_global_size(0)) << 1); + #else + __global ulong16 *V = ((__global ulong16 *)(padcache) + (get_global_id(0) % get_global_size(0))); + #endif + //uchar outbuf[32]; + + XZOutput += (4 * 2 * get_global_id(0)); + + for(int i = 0; i < 4; ++i) X[i] = XZOutput[i]; + + SMix_Salsa(X,V); + + for(int i = 0; i < 4; ++i) XZOutput[i] = X[i]; + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global uint16 *XZOutput, __global uchar *padcache) +{ +#define CONSTANT_N 128 +#define CONSTANT_r 2 + // X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha + uint16 X[4], Z[4]; + #ifdef WIDE_STRIPE + __global ulong16 *V = ((__global ulong16 *)padcache) + ((get_global_id(0) % get_global_size(0)) << 1); + #else + __global ulong16 *V = ((__global ulong16 *)(padcache) + (get_global_id(0) % get_global_size(0))); + #endif + //uchar outbuf[32]; + + XZOutput += (4 * 2 * get_global_id(0)); + + for(int i = 0; i < 4; ++i) X[i] = XZOutput[i + 4]; + + SMix_Chacha(X,V); + + for(int i = 0; i < 4; ++i) XZOutput[i + 4] = X[i]; + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global const uchar* restrict input, __global uint16 *XZOutput, __global uint* restrict output, const uint target) +{ + uint16 X[4], Z[4]; + uchar data[PASSWORD_LEN]; + + ((ulong8 *)data)[0] = ((__global const ulong8 *)input)[0]; + ((ulong *)data)[8] = ((__global const ulong *)input)[8]; + ((uint *)data)[18] = ((__global const uint *)input)[18]; + ((uint *)data)[19] = get_global_id(0); + + XZOutput += (4 * 2 * get_global_id(0)); + + for(int i = 0; i < 4; ++i) X[i] = XZOutput[i]; + for(int i = 0; i < 4; ++i) Z[i] = XZOutput[i + 4]; + // blkxor(X, Z) ((ulong16 *)X)[0] ^= ((ulong16 *)Z)[0]; ((ulong16 *)X)[1] ^= ((ulong16 *)Z)[1]; // output = KDF(password, X) - fastkdf(data, (uchar *)X, FASTKDF_BUFFER_SIZE, outbuf, 32); - if(((uint *)outbuf)[7] <= target) output[atomic_add(output + 0xFF, 1)] = get_global_id(0); -} \ No newline at end of file + //fastkdf(data, (uchar *)X, FASTKDF_BUFFER_SIZE, outbuf, 32); + fastkdf2(data, (uchar *)X, output, target); +} +*/ \ No newline at end of file From 17f2478eddb578de01c891c4c41473cd350f87c0 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 11 Feb 2016 21:29:47 +0100 Subject: [PATCH 31/63] Some other minor fixes from theLosers106 --- algorithm.c | 4 +--- algorithm/yescrypt-opt.c | 8 ++++---- api.c | 2 +- ocl.c | 2 +- winbuild/sgminer.vcxproj | 4 ++++ winbuild/sgminer.vcxproj.filters | 12 ++++++++++++ 6 files changed, 23 insertions(+), 9 deletions(-) diff --git a/algorithm.c b/algorithm.c index 3f6914f4b..0f441c6b2 100644 --- a/algorithm.c +++ b/algorithm.c @@ -943,8 +943,6 @@ static algorithm_settings_t algos[] = { A_CREDITS("credits"), #undef A_CREDITS - - #define A_YESCRYPT(a) \ { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} A_YESCRYPT("yescrypt"), @@ -1004,7 +1002,7 @@ static algorithm_settings_t algos[] = { #undef A_FUGUE { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL }, - { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFFU, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL }, + { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL }, // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL } diff --git a/algorithm/yescrypt-opt.c b/algorithm/yescrypt-opt.c index b54be469d..6adef7e74 100644 --- a/algorithm/yescrypt-opt.c +++ b/algorithm/yescrypt-opt.c @@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size) if (size + 63 < size) { errno = ENOMEM; } - else if ((base = malloc(size + 63)) != NULL) { + else if ((base = (uint8_t *)malloc(size + 63)) != NULL) { aligned = base + 63; aligned -= (uintptr_t)aligned & 63; } @@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags, uint64_t * XY, uint64_t * S) { void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8); - const uint64_t * VROM = shared->shared1.aligned; + const uint64_t * VROM = (uint64_t *)shared->shared1.aligned; uint32_t VROM_mask = shared->mask1; size_t s = 16 * r; uint64_t * X = V; @@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop, void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8); - const uint64_t * VROM = shared->shared1.aligned; + const uint64_t * VROM = (uint64_t *)shared->shared1.aligned; uint32_t VROM_mask = shared->mask1 | 1; size_t s = 16 * r; yescrypt_flags_t rw = flags & YESCRYPT_RW; @@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t, uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; if (Sp) - smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL); + smix1(Bp, 1, S_SIZE_ALL / 16, (yescrypt_flags_t)flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL); diff --git a/api.c b/api.c index 08701b8f1..1efa30045 100644 --- a/api.c +++ b/api.c @@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m io_open = io_add(io_data, isjson ? COMSTR JSON_VERSION : _VERSION COMSTR); root = api_add_string(root, "Miner", PACKAGE " " VERSION, false); - root = api_add_string(root, "CGMiner", CGMINER_VERSION, false); + root = api_add_string(root, "SGMiner", CGMINER_VERSION, false); root = api_add_const(root, "API", APIVERSION, false); root = print_data(root, buf, isjson, false); diff --git a/ocl.c b/ocl.c index 98f337bdf..ecc383c87 100644 --- a/ocl.c +++ b/ocl.c @@ -187,7 +187,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg cl_platform_id platform = NULL; struct cgpu_info *cgpu = &gpus[gpu]; _clState *clState = (_clState *)calloc(1, sizeof(_clState)); - cl_uint preferred_vwidth, slot = 0, cpnd = 0, numDevices = clDevicesNum(); + cl_uint preferred_vwidth, numDevices = clDevicesNum(); cl_device_id *devices = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id)); build_kernel_data *build_data = (build_kernel_data *)alloca(sizeof(struct _build_kernel_data)); char **pbuff = (char **)alloca(sizeof(char *) * numDevices), filename[256]; diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj index e71685e42..67679bb1d 100644 --- a/winbuild/sgminer.vcxproj +++ b/winbuild/sgminer.vcxproj @@ -263,6 +263,7 @@ + @@ -329,12 +330,14 @@ + + @@ -367,6 +370,7 @@ + diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters index 14c9ed2f3..7866e29d6 100644 --- a/winbuild/sgminer.vcxproj.filters +++ b/winbuild/sgminer.vcxproj.filters @@ -221,6 +221,9 @@ Source Files\algorithm + + Source Files\algorithm + @@ -421,6 +424,15 @@ Header Files\algorithm + + Header Files\algorithm + + + Header Files\sph + + + Header Files\algorithm + From 031e4b78cb0395fa21275abc09ae0cf81e9c07f9 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sat, 6 Feb 2016 12:50:16 +0100 Subject: [PATCH 32/63] Add blake256 algos support --- Makefile.am | 2 + algorithm.c | 42 +++++++- algorithm.h | 3 + algorithm/blake256.c | 139 +++++++++++++++++++++++++++ algorithm/blake256.h | 9 ++ algorithm/blakecoin.c | 139 +++++++++++++++++++++++++++ algorithm/blakecoin.h | 9 ++ driver-opencl.c | 11 ++- findnonce.c | 8 +- findnonce.h | 2 +- kernel/blake256r14.cl | 157 ++++++++++++++++++++++++++++++ kernel/blake256r8.cl | 77 +++++++++++++++ kernel/vanilla.cl | 77 +++++++++++++++ sph/blake.c | 217 ++++++++++++++++++++++++++++++++++++++++++ sph/sph_blake.h | 4 + 15 files changed, 887 insertions(+), 9 deletions(-) create mode 100644 algorithm/blake256.c create mode 100644 algorithm/blake256.h create mode 100644 algorithm/blakecoin.c create mode 100644 algorithm/blakecoin.h create mode 100644 kernel/blake256r14.cl create mode 100644 kernel/blake256r8.cl create mode 100644 kernel/vanilla.cl diff --git a/Makefile.am b/Makefile.am index 125b432b3..c36ea1dbd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -77,6 +77,8 @@ sgminer_SOURCES += algorithm/lyra2rev2.c algorithm/lyra2rev2.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h sgminer_SOURCES += algorithm/credits.c algorithm/credits.h sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h +sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h +sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index 0f441c6b2..406a84c66 100644 --- a/algorithm.c +++ b/algorithm.c @@ -37,6 +37,8 @@ #include "algorithm/pluck.h" #include "algorithm/yescrypt.h" #include "algorithm/credits.h" +#include "algorithm/blake256.h" +#include "algorithm/blakecoin.h" #include "compat.h" @@ -65,7 +67,10 @@ const char *algorithm_type_str[] = { "Lyra2REV2" "Pluck" "Yescrypt", - "Yescrypt-multi" + "Yescrypt-multi", + "Blakecoin", + "Blake", + "Vanilla" }; void sha256(const unsigned char *message, unsigned int len, unsigned char *digest) @@ -915,6 +920,34 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un return status; } +static cl_int queue_blake_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_int status = 0; + cl_ulong le_target; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(blk->work->blk.ctx_a); + CL_SET_ARG(blk->work->blk.ctx_b); + CL_SET_ARG(blk->work->blk.ctx_c); + CL_SET_ARG(blk->work->blk.ctx_d); + CL_SET_ARG(blk->work->blk.ctx_e); + CL_SET_ARG(blk->work->blk.ctx_f); + CL_SET_ARG(blk->work->blk.ctx_g); + CL_SET_ARG(blk->work->blk.ctx_h); + + CL_SET_ARG(blk->work->blk.cty_a); + CL_SET_ARG(blk->work->blk.cty_b); + CL_SET_ARG(blk->work->blk.cty_c); + + return status; +} + static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ @@ -953,7 +986,6 @@ static algorithm_settings_t algos[] = { A_YESCRYPT_MULTI("yescrypt-multi"), #undef A_YESCRYPT_MULTI - // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options } @@ -1004,6 +1036,10 @@ static algorithm_settings_t algos[] = { { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL }, { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL }, + { "blake256r8", ALGO_BLAKECOIN, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, queue_blake_kernel, sha256, NULL }, + { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, queue_blake_kernel, gen_hash, NULL }, + { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, queue_blake_kernel, gen_hash, NULL }, + // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL } }; @@ -1077,6 +1113,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa ALGO_ALIAS("whirlpool", "whirlcoin"); ALGO_ALIAS("lyra2", "lyra2re"); ALGO_ALIAS("lyra2v2", "lyra2rev2"); + ALGO_ALIAS("blakecoin", "blake256r8"); + ALGO_ALIAS("blake", "blake256r14"); #undef ALGO_ALIAS #undef ALGO_ALIAS_NF diff --git a/algorithm.h b/algorithm.h index 9187eb53e..c227b43d7 100644 --- a/algorithm.h +++ b/algorithm.h @@ -34,6 +34,9 @@ typedef enum { ALGO_PLUCK, ALGO_YESCRYPT, ALGO_YESCRYPT_MULTI, + ALGO_BLAKECOIN, + ALGO_BLAKE, + ALGO_VANILLA } algorithm_type_t; extern const char *algorithm_type_str[]; diff --git a/algorithm/blake256.c b/algorithm/blake256.c new file mode 100644 index 000000000..e347bc23f --- /dev/null +++ b/algorithm/blake256.c @@ -0,0 +1,139 @@ +/* + * BLAKE implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + * + * Modified for more speed by BlueDragon747 for the Blakecoin project + */ + +#include +#include +#include +#include + +#include "sph/sph_blake.h" +#include "algorithm/blake256.h" + +/* +* Encode a length len/4 vector of (uint32_t) into a length len vector of +* (unsigned char) in big-endian form. Assumes len is a multiple of 4. +*/ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + +static const uint32_t diff1targ_blake256 = 0x000000ff; + +inline void blake256hash(void *state, const void *input) +{ + sph_blake256_context ctx_blake; + sph_blake256_init(&ctx_blake); + sph_blake256(&ctx_blake, input, 80); + sph_blake256_close(&ctx_blake, state); +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int blake256_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + blake256hash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void blake256_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + blake256hash(ohash, data); +} + +bool scanhash_blake256(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + blake256hash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} diff --git a/algorithm/blake256.h b/algorithm/blake256.h new file mode 100644 index 000000000..535a73996 --- /dev/null +++ b/algorithm/blake256.h @@ -0,0 +1,9 @@ +#ifndef BLAKE256_H +#define BLAKE256_H + +#include "miner.h" + +extern int blake256_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void blake256_regenhash(struct work *work); + +#endif /* BLAKE256_H */ \ No newline at end of file diff --git a/algorithm/blakecoin.c b/algorithm/blakecoin.c new file mode 100644 index 000000000..58974c914 --- /dev/null +++ b/algorithm/blakecoin.c @@ -0,0 +1,139 @@ +/* + * BLAKE implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + * + * Modified for more speed by BlueDragon747 for the Blakecoin project + */ + +#include +#include +#include +#include + +#include "sph/sph_blake.h" +#include "algorithm/blakecoin.h" + +/* +* Encode a length len/4 vector of (uint32_t) into a length len vector of +* (unsigned char) in big-endian form. Assumes len is a multiple of 4. +*/ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + +static const uint32_t diff1targ_blake256 = 0x000000ff; + +inline void blakecoinhash(void *state, const void *input) +{ + sph_blake256_context ctx_blake; + sph_blake256_init(&ctx_blake); + sph_blake256r8(&ctx_blake, input, 80); + sph_blake256r8_close(&ctx_blake, state); +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int blakecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + blakecoinhash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void blakecoin_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + blakecoinhash(ohash, data); +} + +bool scanhash_blakecoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + blakecoinhash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} diff --git a/algorithm/blakecoin.h b/algorithm/blakecoin.h new file mode 100644 index 000000000..28b9b3d70 --- /dev/null +++ b/algorithm/blakecoin.h @@ -0,0 +1,9 @@ +#ifndef BLAKECOIN_H +#define BLAKECOIN_H + +#include "miner.h" + +extern int blakecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void blakecoin_regenhash(struct work *work); + +#endif /* BLAKECOIN_H */ \ No newline at end of file diff --git a/driver-opencl.c b/driver-opencl.c index 4a9d0693e..e89cc3503 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1366,9 +1366,16 @@ static bool opencl_thread_init(struct thr_info *thr) static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work) { - if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REV2) { + if (work->pool->algorithm.type == ALGO_LYRA2RE || + work->pool->algorithm.type == ALGO_LYRA2REV2 || + work->pool->algorithm.type == ALGO_BLAKE) { work->blk.work = work; - precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data)); + precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data), 14); + } + else if (work->pool->algorithm.type == ALGO_BLAKECOIN || + work->pool->algorithm.type == ALGO_VANILLA) { + work->blk.work = work; + precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data), 8); } else { work->blk.work = work; diff --git a/findnonce.c b/findnonce.c index 8858cfa6c..42af361f2 100644 --- a/findnonce.c +++ b/findnonce.c @@ -235,15 +235,13 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) } } -// BLAKE 256 14 rounds (standard) - typedef struct { uint32_t h[8]; uint32_t t; } blake_state256; -#define NB_ROUNDS32 14 +int NB_ROUNDS32; const uint8_t blake_sigma[][16] = { @@ -348,8 +346,10 @@ void blake256_update(blake_state256 *S, const uint32_t *in) blake256_compress_block(S, m); } -void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) +void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data, int blake256_rounds) { + NB_ROUNDS32 = blake256_rounds; + blake_state256 S; blake256_init(&S); blake256_update(&S, data); diff --git a/findnonce.h b/findnonce.h index 49b1aa9a5..e268ce1b8 100644 --- a/findnonce.h +++ b/findnonce.h @@ -10,6 +10,6 @@ extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res); -extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); +extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data, int blake256_rounds); #endif /*FINDNONCE_H*/ diff --git a/kernel/blake256r14.cl b/kernel/blake256r14.cl new file mode 100644 index 000000000..e94b4d7c4 --- /dev/null +++ b/kernel/blake256r14.cl @@ -0,0 +1,157 @@ +// (c) 2013 originally written by smolen, modified by kr105 + +#define SPH_ROTR32(v,n) rotate((uint)(v),(uint)(32-(n))) + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search( + volatile __global uint * restrict output, + // precalc hash from fisrt part of message + const uint h0, + const uint h1, + const uint h2, + const uint h3, + const uint h4, + const uint h5, + const uint h6, + const uint h7, + // last 12 bytes of original message + const uint in16, + const uint in17, + const uint in18 +) +{ + uint M0, M1, M2, M3, M4, M5, M6, M7; + uint M8, M9, MA, MB, MC, MD, ME, MF; + uint V0, V1, V2, V3, V4, V5, V6, V7; + uint V8, V9, VA, VB, VC, VD, VE, VF; + uint pre7; + uint nonce = get_global_id(0); + + V0 = h0; + V1 = h1; + V2 = h2; + V3 = h3; + V4 = h4; + V5 = h5; + V6 = h6; + pre7 = V7 = h7; + M0 = in16; + M1 = in17; + M2 = in18; + M3 = nonce; + + V8 = 0x243F6A88UL; + V9 = 0x85A308D3UL; + VA = 0x13198A2EUL; + VB = 0x03707344UL; + VC = 640 ^ 0xA4093822UL; + VD = 640 ^ 0x299F31D0UL; + VE = 0x082EFA98UL; + VF = 0xEC4E6C89UL; + + M4 = 0x80000000; + M5 = 0; + M6 = 0; + M7 = 0; + M8 = 0; + M9 = 0; + MA = 0; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 640; + + V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M9 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M0 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M5 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M7 ^ 0x299F31D0UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M2 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M4 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MA ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MF ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (ME ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M1 ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MB ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MC ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M3 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MD ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M2 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MC ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M6 ^ 0xBE5466CFUL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MA ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M0 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MB ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M8 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M3 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M4 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MD ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M7 ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M5 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MF ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (ME ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M1 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M9 ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MC ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M5 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M1 ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MF ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (ME ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MD ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M4 ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MA ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M0 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M7 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M9 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M2 ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M8 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MB ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MD ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MB ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M7 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (ME ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MC ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M1 ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M3 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M9 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M5 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M0 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MF ^ 0xA4093822UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M4 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M2 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MA ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + + // Constants + // 00 = 0x243F6A88UL + // 01 = 0x85A308D3UL + // 02 = 0x13198A2EUL + // 03 = 0x03707344UL + // 04 = 0xA4093822UL + // 05 = 0x299F31D0UL + // 06 = 0x082EFA98UL + // 07 = 0xEC4E6C89UL + // 08 = 0x452821E6UL + // 09 = 0x38D01377UL + // 10 = 0xBE5466CFUL + // 11 = 0x34E90C6CUL + // 12 = 0xC0AC29B7UL + // 13 = 0xC97C50DDUL + // 14 = 0x3F84D5B5UL + // 15 = 0xB5470917UL + // A=10,B=11,C=12,D=13,E=14,F=15 + + // Round 9: + // 6^15 + V0 = (V0 + V4 + (M6 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MF ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; + // 14^9 + V1 = (V1 + V5 + (ME ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M9 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; + // 11^3 + V2 = (V2 + V6 + (MB ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M3 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; + // 0^8 + V3 = (V3 + V7 + (M0 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M8 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; + // 12^2 + V0 = (V0 + V5 + (MC ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M2 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; + // 13^7 + V1 = (V1 + V6 + (MD ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M7 ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; + // 1^4 + V2 = (V2 + V7 + (M1 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M4 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; + // 10^5 + V3 = (V3 + V4 + (MA ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M5 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + + // Constants + // 00 = 0x243F6A88UL + // 01 = 0x85A308D3UL + // 02 = 0x13198A2EUL + // 03 = 0x03707344UL + // 04 = 0xA4093822UL + // 05 = 0x299F31D0UL + // 06 = 0x082EFA98UL + // 07 = 0xEC4E6C89UL + // 08 = 0x452821E6UL + // 09 = 0x38D01377UL + // 10 = 0xBE5466CFUL + // 11 = 0x34E90C6CUL + // 12 = 0xC0AC29B7UL + // 13 = 0xC97C50DDUL + // 14 = 0x3F84D5B5UL + // 15 = 0xB5470917UL + // A=10,B=11,C=12,D=13,E=14,F=15 + + // Round 10 + // 10^2 + V0 = (V0 + V4 + (MA ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M2 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; + // 8^4 + V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; + // 7^6 + V2 = (V2 + V6 + (M7 ^ 0x082EFA98UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M6 ^ 0xEC4E6C89UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; + // 1^5 + V3 = (V3 + V7 + (M1 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M5 ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; + // 15^11 + V0 = (V0 + V5 + (MF ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MB ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; + // 9^14 + V1 = (V1 + V6 + (M9 ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (ME ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; + // 3^12 + V2 = (V2 + V7 + (M3 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MC ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; + // 13^0 + V3 = (V3 + V4 + (MD ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M0 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + + // Round 11,12,13,14 repeated from beginning again + V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + + if(pre7 ^ V7 ^ VF) + return; + output[output[0xFF]++] = nonce; +} \ No newline at end of file diff --git a/kernel/blake256r8.cl b/kernel/blake256r8.cl new file mode 100644 index 000000000..fca62fe79 --- /dev/null +++ b/kernel/blake256r8.cl @@ -0,0 +1,77 @@ +// (c) 2013 originally written by smolen, modified by kr105 + +#define SPH_ROTR32(v,n) rotate((uint)(v),(uint)(32-(n))) + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search( + volatile __global uint * restrict output, + // precalc hash from fisrt part of message + const uint h0, + const uint h1, + const uint h2, + const uint h3, + const uint h4, + const uint h5, + const uint h6, + const uint h7, + // last 12 bytes of original message + const uint in16, + const uint in17, + const uint in18 +) +{ + uint M0, M1, M2, M3, M4, M5, M6, M7; + uint M8, M9, MA, MB, MC, MD, ME, MF; + uint V0, V1, V2, V3, V4, V5, V6, V7; + uint V8, V9, VA, VB, VC, VD, VE, VF; + uint pre7; + uint nonce = get_global_id(0); + + V0 = h0; + V1 = h1; + V2 = h2; + V3 = h3; + V4 = h4; + V5 = h5; + V6 = h6; + pre7 = V7 = h7; + M0 = in16; + M1 = in17; + M2 = in18; + M3 = nonce; + + V8 = 0x243F6A88UL; + V9 = 0x85A308D3UL; + VA = 0x13198A2EUL; + VB = 0x03707344UL; + VC = 640 ^ 0xA4093822UL; + VD = 640 ^ 0x299F31D0UL; + VE = 0x082EFA98UL; + VF = 0xEC4E6C89UL; + + M4 = 0x80000000; + M5 = 0; + M6 = 0; + M7 = 0; + M8 = 0; + M9 = 0; + MA = 0; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 640; + + V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M9 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M0 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M5 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M7 ^ 0x299F31D0UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M2 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M4 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MA ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MF ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (ME ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M1 ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MB ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MC ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M3 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MD ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M2 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MC ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M6 ^ 0xBE5466CFUL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MA ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M0 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MB ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M8 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M3 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M4 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MD ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M7 ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M5 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MF ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (ME ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M1 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M9 ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MC ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M5 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M1 ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MF ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (ME ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MD ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M4 ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MA ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M0 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M7 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M9 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M2 ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M8 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MB ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MD ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MB ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M7 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (ME ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MC ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M1 ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M3 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M9 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M5 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M0 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MF ^ 0xA4093822UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M4 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M2 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MA ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + + if(pre7 ^ V7 ^ VF) + return; + output[output[0xFF]++] = nonce; +} \ No newline at end of file diff --git a/kernel/vanilla.cl b/kernel/vanilla.cl new file mode 100644 index 000000000..fca62fe79 --- /dev/null +++ b/kernel/vanilla.cl @@ -0,0 +1,77 @@ +// (c) 2013 originally written by smolen, modified by kr105 + +#define SPH_ROTR32(v,n) rotate((uint)(v),(uint)(32-(n))) + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search( + volatile __global uint * restrict output, + // precalc hash from fisrt part of message + const uint h0, + const uint h1, + const uint h2, + const uint h3, + const uint h4, + const uint h5, + const uint h6, + const uint h7, + // last 12 bytes of original message + const uint in16, + const uint in17, + const uint in18 +) +{ + uint M0, M1, M2, M3, M4, M5, M6, M7; + uint M8, M9, MA, MB, MC, MD, ME, MF; + uint V0, V1, V2, V3, V4, V5, V6, V7; + uint V8, V9, VA, VB, VC, VD, VE, VF; + uint pre7; + uint nonce = get_global_id(0); + + V0 = h0; + V1 = h1; + V2 = h2; + V3 = h3; + V4 = h4; + V5 = h5; + V6 = h6; + pre7 = V7 = h7; + M0 = in16; + M1 = in17; + M2 = in18; + M3 = nonce; + + V8 = 0x243F6A88UL; + V9 = 0x85A308D3UL; + VA = 0x13198A2EUL; + VB = 0x03707344UL; + VC = 640 ^ 0xA4093822UL; + VD = 640 ^ 0x299F31D0UL; + VE = 0x082EFA98UL; + VF = 0xEC4E6C89UL; + + M4 = 0x80000000; + M5 = 0; + M6 = 0; + M7 = 0; + M8 = 0; + M9 = 0; + MA = 0; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 640; + + V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M9 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M0 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M5 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M7 ^ 0x299F31D0UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M2 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M4 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MA ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MF ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (ME ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M1 ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MB ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MC ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M3 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MD ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (M2 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MC ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M6 ^ 0xBE5466CFUL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MA ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M0 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MB ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M8 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M3 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M4 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MD ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M7 ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M5 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MF ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (ME ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M1 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M9 ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MC ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M5 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M1 ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MF ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (ME ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MD ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M4 ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MA ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M0 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M7 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M9 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M2 ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M8 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MB ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + V0 = (V0 + V4 + (MD ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MB ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M7 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (ME ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MC ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M1 ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M3 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M9 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M5 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M0 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MF ^ 0xA4093822UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M4 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M2 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MA ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7); + + if(pre7 ^ V7 ^ VF) + return; + output[output[0xFF]++] = nonce; +} \ No newline at end of file diff --git a/sph/blake.c b/sph/blake.c index 1c4a479b5..c7b7b14a8 100644 --- a/sph/blake.c +++ b/sph/blake.c @@ -507,6 +507,55 @@ static const sph_u64 CB[16] = { #if SPH_COMPACT_BLAKE_32 +#define COMPRESS32r8 do { \ + sph_u32 M[16]; \ + sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \ + sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \ + unsigned r; \ + V0 = H0; \ + V1 = H1; \ + V2 = H2; \ + V3 = H3; \ + V4 = H4; \ + V5 = H5; \ + V6 = H6; \ + V7 = H7; \ + V8 = S0 ^ CS0; \ + V9 = S1 ^ CS1; \ + VA = S2 ^ CS2; \ + VB = S3 ^ CS3; \ + VC = T0 ^ CS4; \ + VD = T0 ^ CS5; \ + VE = T1 ^ CS6; \ + VF = T1 ^ CS7; \ + M[0x0] = sph_dec32be_aligned(buf + 0); \ + M[0x1] = sph_dec32be_aligned(buf + 4); \ + M[0x2] = sph_dec32be_aligned(buf + 8); \ + M[0x3] = sph_dec32be_aligned(buf + 12); \ + M[0x4] = sph_dec32be_aligned(buf + 16); \ + M[0x5] = sph_dec32be_aligned(buf + 20); \ + M[0x6] = sph_dec32be_aligned(buf + 24); \ + M[0x7] = sph_dec32be_aligned(buf + 28); \ + M[0x8] = sph_dec32be_aligned(buf + 32); \ + M[0x9] = sph_dec32be_aligned(buf + 36); \ + M[0xA] = sph_dec32be_aligned(buf + 40); \ + M[0xB] = sph_dec32be_aligned(buf + 44); \ + M[0xC] = sph_dec32be_aligned(buf + 48); \ + M[0xD] = sph_dec32be_aligned(buf + 52); \ + M[0xE] = sph_dec32be_aligned(buf + 56); \ + M[0xF] = sph_dec32be_aligned(buf + 60); \ + for (r = 0; r < 8; r ++) \ + ROUND_S(r); \ + H0 ^= S0 ^ V0 ^ V8; \ + H1 ^= S1 ^ V1 ^ V9; \ + H2 ^= S2 ^ V2 ^ VA; \ + H3 ^= S3 ^ V3 ^ VB; \ + H4 ^= S0 ^ V4 ^ VC; \ + H5 ^= S1 ^ V5 ^ VD; \ + H6 ^= S2 ^ V6 ^ VE; \ + H7 ^= S3 ^ V7 ^ VF; \ + } while (0) + #define COMPRESS32 do { \ sph_u32 M[16]; \ sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \ @@ -558,6 +607,61 @@ static const sph_u64 CB[16] = { #else +#define COMPRESS32r8 do { \ + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7; \ + sph_u32 M8, M9, MA, MB, MC, MD, ME, MF; \ + sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \ + sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \ + V0 = H0; \ + V1 = H1; \ + V2 = H2; \ + V3 = H3; \ + V4 = H4; \ + V5 = H5; \ + V6 = H6; \ + V7 = H7; \ + V8 = S0 ^ CS0; \ + V9 = S1 ^ CS1; \ + VA = S2 ^ CS2; \ + VB = S3 ^ CS3; \ + VC = T0 ^ CS4; \ + VD = T0 ^ CS5; \ + VE = T1 ^ CS6; \ + VF = T1 ^ CS7; \ + M0 = sph_dec32be_aligned(buf + 0); \ + M1 = sph_dec32be_aligned(buf + 4); \ + M2 = sph_dec32be_aligned(buf + 8); \ + M3 = sph_dec32be_aligned(buf + 12); \ + M4 = sph_dec32be_aligned(buf + 16); \ + M5 = sph_dec32be_aligned(buf + 20); \ + M6 = sph_dec32be_aligned(buf + 24); \ + M7 = sph_dec32be_aligned(buf + 28); \ + M8 = sph_dec32be_aligned(buf + 32); \ + M9 = sph_dec32be_aligned(buf + 36); \ + MA = sph_dec32be_aligned(buf + 40); \ + MB = sph_dec32be_aligned(buf + 44); \ + MC = sph_dec32be_aligned(buf + 48); \ + MD = sph_dec32be_aligned(buf + 52); \ + ME = sph_dec32be_aligned(buf + 56); \ + MF = sph_dec32be_aligned(buf + 60); \ + ROUND_S(0); \ + ROUND_S(1); \ + ROUND_S(2); \ + ROUND_S(3); \ + ROUND_S(4); \ + ROUND_S(5); \ + ROUND_S(6); \ + ROUND_S(7); \ + H0 ^= S0 ^ V0 ^ V8; \ + H1 ^= S1 ^ V1 ^ V9; \ + H2 ^= S2 ^ V2 ^ VA; \ + H3 ^= S3 ^ V3 ^ VB; \ + H4 ^= S0 ^ V4 ^ VC; \ + H5 ^= S1 ^ V5 ^ VD; \ + H6 ^= S2 ^ V6 ^ VE; \ + H7 ^= S3 ^ V7 ^ VF; \ + } while (0) + #define COMPRESS32 do { \ sph_u32 M0, M1, M2, M3, M4, M5, M6, M7; \ sph_u32 M8, M9, MA, MB, MC, MD, ME, MF; \ @@ -831,6 +935,44 @@ blake32(sph_blake_small_context *sc, const void *data, size_t len) sc->ptr = ptr; } +static void +blake32r8(sph_blake_small_context *sc, const void *data, size_t len) +{ + unsigned char *buf; + size_t ptr; + DECL_STATE32 + + buf = sc->buf; + ptr = sc->ptr; + if (len < (sizeof sc->buf) - ptr) { + memcpy(buf + ptr, data, len); + ptr += len; + sc->ptr = ptr; + return; + } + + READ_STATE32(sc); + while (len > 0) { + size_t clen; + + clen = (sizeof sc->buf) - ptr; + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data = (const unsigned char *)data + clen; + len -= clen; + if (ptr == sizeof sc->buf) { + if ((T0 = SPH_T32(T0 + 512)) < 512) + T1 = SPH_T32(T1 + 1); + COMPRESS32r8; + ptr = 0; + } + } + WRITE_STATE32(sc); + sc->ptr = ptr; +} + static void blake32_close(sph_blake_small_context *sc, unsigned ub, unsigned n, void *dst, size_t out_size_w32) @@ -884,6 +1026,59 @@ blake32_close(sph_blake_small_context *sc, sph_enc32be(out + (k << 2), sc->H[k]); } +static void +blake32r8_close(sph_blake_small_context *sc, + unsigned ub, unsigned n, void *dst, size_t out_size_w32) +{ + union { + unsigned char buf[64]; + sph_u32 dummy; + } u; + size_t ptr, k; + unsigned bit_len; + unsigned z; + sph_u32 th, tl; + unsigned char *out; + + ptr = sc->ptr; + bit_len = ((unsigned)ptr << 3) + n; + z = 0x80 >> n; + u.buf[ptr] = ((ub & -z) | z) & 0xFF; + tl = sc->T0 + bit_len; + th = sc->T1; + if (ptr == 0 && n == 0) { + sc->T0 = SPH_C32(0xFFFFFE00); + sc->T1 = SPH_C32(0xFFFFFFFF); + } else if (sc->T0 == 0) { + sc->T0 = SPH_C32(0xFFFFFE00) + bit_len; + sc->T1 = SPH_T32(sc->T1 - 1); + } else { + sc->T0 -= 512 - bit_len; + } + if (bit_len <= 446) { + memset(u.buf + ptr + 1, 0, 55 - ptr); + if (out_size_w32 == 8) + u.buf[55] |= 1; + sph_enc32be_aligned(u.buf + 56, th); + sph_enc32be_aligned(u.buf + 60, tl); + blake32r8(sc, u.buf + ptr, 64 - ptr); + } else { + memset(u.buf + ptr + 1, 0, 63 - ptr); + blake32r8(sc, u.buf + ptr, 64 - ptr); + sc->T0 = SPH_C32(0xFFFFFE00); + sc->T1 = SPH_C32(0xFFFFFFFF); + memset(u.buf, 0, 56); + if (out_size_w32 == 8) + u.buf[55] = 1; + sph_enc32be_aligned(u.buf + 56, th); + sph_enc32be_aligned(u.buf + 60, tl); + blake32r8(sc, u.buf, 64); + } + out = (unsigned char *)dst; + for (k = 0; k < out_size_w32; k ++) + sph_enc32be(out + (k << 2), sc->H[k]); +} + #if SPH_64 static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 }; @@ -1034,6 +1229,13 @@ sph_blake256(void *cc, const void *data, size_t len) blake32((sph_blake_small_context *)cc, data, len); } +/* see sph_blake.h */ +void +sph_blake256r8(void *cc, const void *data, size_t len) +{ + blake32r8((sph_blake_small_context *)cc, data, len); +} + /* see sph_blake.h */ void sph_blake256_close(void *cc, void *dst) @@ -1041,6 +1243,13 @@ sph_blake256_close(void *cc, void *dst) sph_blake256_addbits_and_close(cc, 0, 0, dst); } +/* see sph_blake.h */ +void +sph_blake256r8_close(void *cc, void *dst) +{ + sph_blake256r8_addbits_and_close(cc, 0, 0, dst); +} + /* see sph_blake.h */ void sph_blake256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) @@ -1049,6 +1258,14 @@ sph_blake256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) sph_blake256_init(cc); } +/* see sph_blake.h */ +void +sph_blake256r8_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + blake32r8_close((sph_blake_small_context *)cc, ub, n, dst, 8); + sph_blake256_init(cc); +} + #if SPH_64 /* see sph_blake.h */ diff --git a/sph/sph_blake.h b/sph/sph_blake.h index c3829cadb..8dea65cad 100644 --- a/sph/sph_blake.h +++ b/sph/sph_blake.h @@ -194,6 +194,7 @@ void sph_blake256_init(void *cc); * @param len the input data length (in bytes) */ void sph_blake256(void *cc, const void *data, size_t len); +void sph_blake256r8(void *cc, const void *data, size_t len); /** * Terminate the current BLAKE-256 computation and output the result into @@ -205,6 +206,7 @@ void sph_blake256(void *cc, const void *data, size_t len); * @param dst the destination buffer */ void sph_blake256_close(void *cc, void *dst); +void sph_blake256r8_close(void *cc, void *dst); /** * Add a few additional bits (0 to 7) to the current computation, then @@ -221,6 +223,8 @@ void sph_blake256_close(void *cc, void *dst); */ void sph_blake256_addbits_and_close( void *cc, unsigned ub, unsigned n, void *dst); +void sph_blake256r8_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); #if SPH_64 From 7cee9c66606b6de620570611ffceb06ca70f79ca Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 12 Feb 2016 02:01:46 +0000 Subject: [PATCH 33/63] Move precalc_hash to algorithm settings --- algorithm.c | 57 +++++++++--------- algorithm.h | 2 + algorithm/blake256.c | 26 ++++++++- algorithm/blake256.h | 1 + algorithm/blakecoin.c | 26 ++++++++- algorithm/blakecoin.h | 1 + driver-opencl.c | 16 +---- findnonce.c | 133 ------------------------------------------ findnonce.h | 1 - 9 files changed, 85 insertions(+), 178 deletions(-) diff --git a/algorithm.c b/algorithm.c index 406a84c66..b53fd2974 100644 --- a/algorithm.c +++ b/algorithm.c @@ -951,7 +951,7 @@ static cl_int queue_blake_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ - { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options } + { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, NULL, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options } A_SCRYPT("ckolivas"), A_SCRYPT("alexkarnew"), A_SCRYPT("alexkarnold"), @@ -962,33 +962,33 @@ static algorithm_settings_t algos[] = { #undef A_SCRYPT #define A_NEOSCRYPT(a) \ - { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options } + { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, NULL, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options } A_NEOSCRYPT("neoscrypt"), #undef A_NEOSCRYPT #define A_PLUCK(a) \ - { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options } + { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, NULL, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options } A_PLUCK("pluck"), #undef A_PLUCK #define A_CREDITS(a) \ - { a, ALGO_CRE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, credits_regenhash, queue_credits_kernel, gen_hash, NULL} + { a, ALGO_CRE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, credits_regenhash, NULL, queue_credits_kernel, gen_hash, NULL} A_CREDITS("credits"), #undef A_CREDITS #define A_YESCRYPT(a) \ - { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} + { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} A_YESCRYPT("yescrypt"), #undef A_YESCRYPT #define A_YESCRYPT_MULTI(a) \ - { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options} + { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, NULL, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options} A_YESCRYPT_MULTI("yescrypt-multi"), #undef A_YESCRYPT_MULTI // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ - { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options } + { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } A_QUARK("quarkcoin", quarkcoin_regenhash), A_QUARK("qubitcoin", qubitcoin_regenhash), A_QUARK("animecoin", animecoin_regenhash), @@ -997,48 +997,48 @@ static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using bitcoin algorithm #define A_DARK(a, b) \ - { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options } + { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } A_DARK("darkcoin", darkcoin_regenhash), A_DARK("inkcoin", inkcoin_regenhash), A_DARK("myriadcoin-groestl", myriadcoin_groestl_regenhash), #undef A_DARK - { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256, NULL }, - { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256, NULL }, + { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, NULL, queue_sph_kernel, sha256, NULL }, + { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, NULL, queue_maxcoin_kernel, sha256, NULL }, - { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, NULL, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, queue_sph_kernel, gen_hash, append_x13_compiler_options }, - { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, - { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, NULL, queue_sph_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options }, - { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_x13_compiler_options }, - { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_x13_compiler_options }, + { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, NULL, queue_x14_kernel, gen_hash, append_x13_compiler_options }, + { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, NULL, queue_x14_old_kernel, gen_hash, append_x13_compiler_options }, - { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_x13_compiler_options }, - { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options }, + { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, queue_bitblock_kernel, gen_hash, append_x13_compiler_options }, + { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options }, - { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, NULL, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL }, + { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, NULL, queue_fresh_kernel, gen_hash, NULL }, - { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, queue_lyra2re_kernel, gen_hash, NULL }, - { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options }, + { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, precalc_hash_blake256, queue_lyra2re_kernel, gen_hash, NULL }, + { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, precalc_hash_blake256, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options }, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ - { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, c, NULL } + { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, c, NULL } A_FUGUE("fuguecoin", fuguecoin_regenhash, sha256), A_FUGUE("groestlcoin", groestlcoin_regenhash, sha256), A_FUGUE("diamond", groestlcoin_regenhash, gen_hash), #undef A_FUGUE - { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL }, - { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL }, + { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, NULL, queue_whirlcoin_kernel, sha256, NULL }, + { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, NULL, queue_whirlpoolx_kernel, gen_hash, NULL }, - { "blake256r8", ALGO_BLAKECOIN, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, queue_blake_kernel, sha256, NULL }, - { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, queue_blake_kernel, gen_hash, NULL }, - { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, queue_blake_kernel, gen_hash, NULL }, + { "blake256r8", ALGO_BLAKECOIN, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, precalc_hash_blakecoin, queue_blake_kernel, sha256, NULL }, + { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, precalc_hash_blake256, queue_blake_kernel, gen_hash, NULL }, + { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, precalc_hash_blakecoin, queue_blake_kernel, gen_hash, NULL }, // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL } @@ -1069,6 +1069,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) dest->rw_buffer_size = src->rw_buffer_size; dest->cq_properties = src->cq_properties; dest->regenhash = src->regenhash; + dest->precalc_hash = src->precalc_hash; dest->queue_kernel = src->queue_kernel; dest->gen_hash = src->gen_hash; dest->set_compile_options = src->set_compile_options; diff --git a/algorithm.h b/algorithm.h index c227b43d7..afbd2f7ab 100644 --- a/algorithm.h +++ b/algorithm.h @@ -70,6 +70,7 @@ typedef struct _algorithm_t { long rw_buffer_size; cl_command_queue_properties cq_properties; void(*regenhash)(struct work *); + void(*precalc_hash)(struct _dev_blk_ctx *, uint32_t *, uint32_t *); cl_int(*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); void(*gen_hash)(const unsigned char *, unsigned int, unsigned char *); void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *); @@ -92,6 +93,7 @@ typedef struct _algorithm_settings_t long rw_buffer_size; cl_command_queue_properties cq_properties; void (*regenhash)(struct work *); + void (*precalc_hash)(struct _dev_blk_ctx *, uint32_t *, uint32_t *); cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *); diff --git a/algorithm/blake256.c b/algorithm/blake256.c index e347bc23f..ddd861229 100644 --- a/algorithm/blake256.c +++ b/algorithm/blake256.c @@ -62,9 +62,33 @@ inline void blake256hash(void *state, const void *input) sph_blake256_close(&ctx_blake, state); } -static const uint32_t diff1targ = 0x0000ffff; +void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata) +{ + sph_blake256_context ctx_blake; + uint32_t data[16]; + + be32enc_vect(data, (const uint32_t *)pdata, 16); + + sph_blake256_init(&ctx_blake); + sph_blake256(&ctx_blake, data, 64); + + blk->ctx_a = ctx_blake.H[0]; + blk->ctx_b = ctx_blake.H[1]; + blk->ctx_c = ctx_blake.H[2]; + blk->ctx_d = ctx_blake.H[3]; + blk->ctx_e = ctx_blake.H[4]; + blk->ctx_f = ctx_blake.H[5]; + blk->ctx_g = ctx_blake.H[6]; + blk->ctx_h = ctx_blake.H[7]; + + blk->cty_a = pdata[16]; + blk->cty_b = pdata[17]; + blk->cty_c = pdata[18]; +} +static const uint32_t diff1targ = 0x0000ffff; + /* Used externally as confirmation of correct OCL code */ int blake256_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) { diff --git a/algorithm/blake256.h b/algorithm/blake256.h index 535a73996..f021585d9 100644 --- a/algorithm/blake256.h +++ b/algorithm/blake256.h @@ -4,6 +4,7 @@ #include "miner.h" extern int blake256_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata); extern void blake256_regenhash(struct work *work); #endif /* BLAKE256_H */ \ No newline at end of file diff --git a/algorithm/blakecoin.c b/algorithm/blakecoin.c index 58974c914..6a118cef1 100644 --- a/algorithm/blakecoin.c +++ b/algorithm/blakecoin.c @@ -62,9 +62,33 @@ inline void blakecoinhash(void *state, const void *input) sph_blake256r8_close(&ctx_blake, state); } -static const uint32_t diff1targ = 0x0000ffff; +void precalc_hash_blakecoin(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata) +{ + sph_blake256_context ctx_blake; + uint32_t data[16]; + + be32enc_vect(data, (const uint32_t *)pdata, 16); + + sph_blake256_init(&ctx_blake); + sph_blake256r8(&ctx_blake, data, 64); + + blk->ctx_a = ctx_blake.H[0]; + blk->ctx_b = ctx_blake.H[1]; + blk->ctx_c = ctx_blake.H[2]; + blk->ctx_d = ctx_blake.H[3]; + blk->ctx_e = ctx_blake.H[4]; + blk->ctx_f = ctx_blake.H[5]; + blk->ctx_g = ctx_blake.H[6]; + blk->ctx_h = ctx_blake.H[7]; + + blk->cty_a = pdata[16]; + blk->cty_b = pdata[17]; + blk->cty_c = pdata[18]; +} +static const uint32_t diff1targ = 0x0000ffff; + /* Used externally as confirmation of correct OCL code */ int blakecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) { diff --git a/algorithm/blakecoin.h b/algorithm/blakecoin.h index 28b9b3d70..dcaeff71d 100644 --- a/algorithm/blakecoin.h +++ b/algorithm/blakecoin.h @@ -4,6 +4,7 @@ #include "miner.h" extern int blakecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void precalc_hash_blakecoin(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void blakecoin_regenhash(struct work *work); #endif /* BLAKECOIN_H */ \ No newline at end of file diff --git a/driver-opencl.c b/driver-opencl.c index e89cc3503..0e368782b 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1366,20 +1366,8 @@ static bool opencl_thread_init(struct thr_info *thr) static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work) { - if (work->pool->algorithm.type == ALGO_LYRA2RE || - work->pool->algorithm.type == ALGO_LYRA2REV2 || - work->pool->algorithm.type == ALGO_BLAKE) { - work->blk.work = work; - precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data), 14); - } - else if (work->pool->algorithm.type == ALGO_BLAKECOIN || - work->pool->algorithm.type == ALGO_VANILLA) { - work->blk.work = work; - precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data), 8); - } - else { - work->blk.work = work; - } + work->blk.work = work; + if (work->pool->algorithm.precalc_hash) work->pool->algorithm.precalc_hash(&work->blk, 0, (uint32_t *)(work->data)); thr->pool_no = work->pool->pool_no; return true; } diff --git a/findnonce.c b/findnonce.c index 42af361f2..72dcaaef4 100644 --- a/findnonce.c +++ b/findnonce.c @@ -234,136 +234,3 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) free(pcd); } } - -typedef struct -{ - uint32_t h[8]; - uint32_t t; -} blake_state256; - -int NB_ROUNDS32; - -const uint8_t blake_sigma[][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } -}; - -const uint32_t blake_u256[16] = -{ - 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, - 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, - 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, - 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917 -}; - -#define ROT32(x,n) (((x)<<(32-n))|( (x)>>(n))) -//#define ROT32(x,n) (rotate((uint)x, (uint)32-n)) -#define ADD32(x,y) ((uint32_t)((x) + (y))) -#define XOR32(x,y) ((uint32_t)((x) ^ (y))) - -#define G(a,b,c,d,i) \ -do { \ - v[a] += XOR32(m[blake_sigma[r][i]], blake_u256[blake_sigma[r][i + 1]]) + v[b]; \ - v[d] = ROT32(XOR32(v[d], v[a]), 16); \ - v[c] += v[d]; \ - v[b] = ROT32(XOR32(v[b], v[c]), 12); \ - v[a] += XOR32(m[blake_sigma[r][i + 1]], blake_u256[blake_sigma[r][i]]) + v[b]; \ - v[d] = ROT32(XOR32(v[d], v[a]), 8); \ - v[c] += v[d]; \ - v[b] = ROT32(XOR32(v[b], v[c]), 7); \ -} while (0) - -// compress a block -void blake256_compress_block(blake_state256 *S, uint32_t *m) -{ - uint32_t v[16]; - int i, r; - for (i = 0; i < 8; ++i) v[i] = S->h[i]; - - v[8] = blake_u256[0]; - v[9] = blake_u256[1]; - v[10] = blake_u256[2]; - v[11] = blake_u256[3]; - v[12] = blake_u256[4]; - v[13] = blake_u256[5]; - v[14] = blake_u256[6]; - v[15] = blake_u256[7]; - - v[12] ^= S->t; - v[13] ^= S->t; - - for (r = 0; r < NB_ROUNDS32; ++r) - { - /* column step */ - G(0, 4, 8, 12, 0); - G(1, 5, 9, 13, 2); - G(2, 6, 10, 14, 4); - G(3, 7, 11, 15, 6); - /* diagonal step */ - G(0, 5, 10, 15, 8); - G(1, 6, 11, 12, 10); - G(2, 7, 8, 13, 12); - G(3, 4, 9, 14, 14); - } - - for (i = 0; i < 16; ++i) S->h[i & 7] ^= v[i]; -} - -void blake256_init(blake_state256 *S) -{ - S->h[0] = 0x6a09e667; - S->h[1] = 0xbb67ae85; - S->h[2] = 0x3c6ef372; - S->h[3] = 0xa54ff53a; - S->h[4] = 0x510e527f; - S->h[5] = 0x9b05688c; - S->h[6] = 0x1f83d9ab; - S->h[7] = 0x5be0cd19; - S->t = 0; -} - -void blake256_update(blake_state256 *S, const uint32_t *in) -{ - uint32_t m[16]; - int i; - S->t = 512; - for (i = 0; i < 16; ++i) m[i] = in[i]; - blake256_compress_block(S, m); -} - -void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data, int blake256_rounds) -{ - NB_ROUNDS32 = blake256_rounds; - - blake_state256 S; - blake256_init(&S); - blake256_update(&S, data); - - blk->ctx_a = S.h[0]; - blk->ctx_b = S.h[1]; - blk->ctx_c = S.h[2]; - blk->ctx_d = S.h[3]; - blk->ctx_e = S.h[4]; - blk->ctx_f = S.h[5]; - blk->ctx_g = S.h[6]; - blk->ctx_h = S.h[7]; - - blk->cty_a = data[16]; - blk->cty_b = data[17]; - blk->cty_c = data[18]; -} diff --git a/findnonce.h b/findnonce.h index e268ce1b8..9376a57be 100644 --- a/findnonce.h +++ b/findnonce.h @@ -10,6 +10,5 @@ extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res); -extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data, int blake256_rounds); #endif /*FINDNONCE_H*/ From 85290b21d82fabe16edbd87ea1d103607c5b83e6 Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 12 Feb 2016 18:54:35 +0100 Subject: [PATCH 34/63] Case-insensitive string algorithm comparison --- algorithm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithm.c b/algorithm.c index b53fd2974..8d436e17b 100644 --- a/algorithm.c +++ b/algorithm.c @@ -1051,7 +1051,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) // Find algorithm settings and copy for (src = algos; src->name; src++) { - if (strcmp(src->name, algo) == 0) + if (strcasecmp(src->name, algo) == 0) { strcpy(dest->name, src->name); dest->kernelfile = src->kernelfile; From e75e16ccaa2b8ed72b1089039c78cdf7b916db3d Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 12 Feb 2016 23:35:05 +0100 Subject: [PATCH 35/63] Version bump to 5.3.0 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 681c2b9c2..659ad8430 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [5]) -m4_define([v_min], [2]) +m4_define([v_min], [3]) m4_define([v_mic], [0]) m4_define([v_rev], [nicehash]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## From 20bfe4578999ef463d5fd655eb266abc5ecae71f Mon Sep 17 00:00:00 2001 From: elbandi Date: Mon, 21 Mar 2016 18:15:54 +0100 Subject: [PATCH 36/63] Calculate midstate for algos --- algorithm.c | 74 +++++++++++++++++++++++++------------------ algorithm.h | 6 ++-- algorithm/blake256.c | 45 ++++++++++++++++---------- algorithm/blake256.h | 3 +- algorithm/blakecoin.c | 45 ++++++++++++++++---------- algorithm/blakecoin.h | 3 +- driver-opencl.c | 2 +- sgminer.c | 23 ++------------ 8 files changed, 112 insertions(+), 89 deletions(-) diff --git a/algorithm.c b/algorithm.c index 8d436e17b..24165546b 100644 --- a/algorithm.c +++ b/algorithm.c @@ -94,6 +94,19 @@ void gen_hash(const unsigned char *data, unsigned int len, unsigned char *hash) sph_sha256_close(&ctx_sha2, hash); } +void sha256d_midstate(struct work *work) +{ + unsigned char data[64]; + uint32_t *data32 = (uint32_t *)data; + sph_sha256_context ctx; + + flip64(data32, work->data); + sph_sha256_init(&ctx); + sph_sha256(&ctx, data, 64); + memcpy(work->midstate, ctx.val, 32); + endian_flip32(work->midstate, work->midstate); +} + #define CL_SET_BLKARG(blkvar) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->blkvar) #define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var) #define CL_SET_ARG_N(n, var) do { status |= clSetKernelArg(*kernel, n, sizeof(var), (void *)&var); } while (0) @@ -951,7 +964,7 @@ static cl_int queue_blake_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ - { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, NULL, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options } + { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, NULL, NULL, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options } A_SCRYPT("ckolivas"), A_SCRYPT("alexkarnew"), A_SCRYPT("alexkarnold"), @@ -962,33 +975,33 @@ static algorithm_settings_t algos[] = { #undef A_SCRYPT #define A_NEOSCRYPT(a) \ - { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, NULL, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options } + { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, NULL, NULL, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options } A_NEOSCRYPT("neoscrypt"), #undef A_NEOSCRYPT #define A_PLUCK(a) \ - { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, NULL, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options } + { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, NULL, NULL, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options } A_PLUCK("pluck"), #undef A_PLUCK #define A_CREDITS(a) \ - { a, ALGO_CRE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, credits_regenhash, NULL, queue_credits_kernel, gen_hash, NULL} + { a, ALGO_CRE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, credits_regenhash, NULL, NULL, queue_credits_kernel, gen_hash, NULL} A_CREDITS("credits"), #undef A_CREDITS #define A_YESCRYPT(a) \ - { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} + { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} A_YESCRYPT("yescrypt"), #undef A_YESCRYPT #define A_YESCRYPT_MULTI(a) \ - { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, NULL, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options} + { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, NULL, NULL, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options} A_YESCRYPT_MULTI("yescrypt-multi"), #undef A_YESCRYPT_MULTI // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ - { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } + { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } A_QUARK("quarkcoin", quarkcoin_regenhash), A_QUARK("qubitcoin", qubitcoin_regenhash), A_QUARK("animecoin", animecoin_regenhash), @@ -997,51 +1010,51 @@ static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using bitcoin algorithm #define A_DARK(a, b) \ - { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } + { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } A_DARK("darkcoin", darkcoin_regenhash), A_DARK("inkcoin", inkcoin_regenhash), A_DARK("myriadcoin-groestl", myriadcoin_groestl_regenhash), #undef A_DARK - { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, NULL, queue_sph_kernel, sha256, NULL }, - { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, NULL, queue_maxcoin_kernel, sha256, NULL }, + { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, NULL, NULL, queue_sph_kernel, sha256, NULL }, + { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, NULL, NULL, queue_maxcoin_kernel, sha256, NULL }, - { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, NULL, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, NULL, NULL, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, NULL, queue_sph_kernel, gen_hash, append_x13_compiler_options }, - { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, - { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, NULL, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, + { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, NULL, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options }, - { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, NULL, queue_x14_kernel, gen_hash, append_x13_compiler_options }, - { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, NULL, queue_x14_old_kernel, gen_hash, append_x13_compiler_options }, + { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, NULL, NULL, queue_x14_kernel, gen_hash, append_x13_compiler_options }, + { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, NULL, NULL, queue_x14_old_kernel, gen_hash, append_x13_compiler_options }, - { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, queue_bitblock_kernel, gen_hash, append_x13_compiler_options }, - { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options }, + { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, NULL, queue_bitblock_kernel, gen_hash, append_x13_compiler_options }, + { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, NULL, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options }, - { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, NULL, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, NULL, NULL, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, NULL, queue_fresh_kernel, gen_hash, NULL }, + { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, NULL, NULL, queue_fresh_kernel, gen_hash, NULL }, - { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, precalc_hash_blake256, queue_lyra2re_kernel, gen_hash, NULL }, - { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, precalc_hash_blake256, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options }, + { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2re_kernel, gen_hash, NULL }, + { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options }, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ - { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, c, NULL } + { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, c, NULL } A_FUGUE("fuguecoin", fuguecoin_regenhash, sha256), A_FUGUE("groestlcoin", groestlcoin_regenhash, sha256), A_FUGUE("diamond", groestlcoin_regenhash, gen_hash), #undef A_FUGUE - { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, NULL, queue_whirlcoin_kernel, sha256, NULL }, - { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, NULL, queue_whirlpoolx_kernel, gen_hash, NULL }, + { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, NULL, NULL, queue_whirlcoin_kernel, sha256, NULL }, + { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, NULL, NULL, queue_whirlpoolx_kernel, gen_hash, NULL }, - { "blake256r8", ALGO_BLAKECOIN, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, precalc_hash_blakecoin, queue_blake_kernel, sha256, NULL }, - { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, precalc_hash_blake256, queue_blake_kernel, gen_hash, NULL }, - { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, precalc_hash_blakecoin, queue_blake_kernel, gen_hash, NULL }, + { "blake256r8", ALGO_BLAKECOIN, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, sha256, NULL }, + { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, blake256_midstate, blake256_prepare_work, queue_blake_kernel, gen_hash, NULL }, + { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL }, // Terminator (do not remove) - { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL } + { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; void copy_algorithm_settings(algorithm_t* dest, const char* algo) @@ -1069,7 +1082,8 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) dest->rw_buffer_size = src->rw_buffer_size; dest->cq_properties = src->cq_properties; dest->regenhash = src->regenhash; - dest->precalc_hash = src->precalc_hash; + dest->calc_midstate = src->calc_midstate; + dest->prepare_work = src->prepare_work; dest->queue_kernel = src->queue_kernel; dest->gen_hash = src->gen_hash; dest->set_compile_options = src->set_compile_options; diff --git a/algorithm.h b/algorithm.h index afbd2f7ab..fc116a7d7 100644 --- a/algorithm.h +++ b/algorithm.h @@ -70,7 +70,8 @@ typedef struct _algorithm_t { long rw_buffer_size; cl_command_queue_properties cq_properties; void(*regenhash)(struct work *); - void(*precalc_hash)(struct _dev_blk_ctx *, uint32_t *, uint32_t *); + void(*calc_midstate)(struct work *); + void(*prepare_work)(struct _dev_blk_ctx *, uint32_t *, uint32_t *); cl_int(*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); void(*gen_hash)(const unsigned char *, unsigned int, unsigned char *); void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *); @@ -93,7 +94,8 @@ typedef struct _algorithm_settings_t long rw_buffer_size; cl_command_queue_properties cq_properties; void (*regenhash)(struct work *); - void (*precalc_hash)(struct _dev_blk_ctx *, uint32_t *, uint32_t *); + void (*calc_midstate)(struct work *); + void (*prepare_work)(struct _dev_blk_ctx *, uint32_t *, uint32_t *); cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *); diff --git a/algorithm/blake256.c b/algorithm/blake256.c index ddd861229..1ed0f00cd 100644 --- a/algorithm/blake256.c +++ b/algorithm/blake256.c @@ -62,24 +62,35 @@ inline void blake256hash(void *state, const void *input) sph_blake256_close(&ctx_blake, state); } -void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata) +void blake256_midstate(struct work *work) { - sph_blake256_context ctx_blake; - uint32_t data[16]; - - be32enc_vect(data, (const uint32_t *)pdata, 16); - - sph_blake256_init(&ctx_blake); - sph_blake256(&ctx_blake, data, 64); - - blk->ctx_a = ctx_blake.H[0]; - blk->ctx_b = ctx_blake.H[1]; - blk->ctx_c = ctx_blake.H[2]; - blk->ctx_d = ctx_blake.H[3]; - blk->ctx_e = ctx_blake.H[4]; - blk->ctx_f = ctx_blake.H[5]; - blk->ctx_g = ctx_blake.H[6]; - blk->ctx_h = ctx_blake.H[7]; + sph_blake256_context ctx_blake; + uint32_t data[16]; + + be32enc_vect(data, (const uint32_t *)work->data, 19); + + sph_blake256_init(&ctx_blake); + sph_blake256 (&ctx_blake, (unsigned char *)data, 64); + + memcpy(work->midstate, ctx_blake.H, 32); + endian_flip32(work->midstate, work->midstate); + + char *strdata, *strmidstate; + strdata = bin2hex(work->data, 80); + strmidstate = bin2hex(work->midstate, 32); + applog(LOG_DEBUG, "data %s midstate %s", strdata, strmidstate); +} + +void blake256_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata) +{ + blk->ctx_a = state[0]; + blk->ctx_b = state[1]; + blk->ctx_c = state[2]; + blk->ctx_d = state[3]; + blk->ctx_e = state[4]; + blk->ctx_f = state[5]; + blk->ctx_g = state[6]; + blk->ctx_h = state[7]; blk->cty_a = pdata[16]; blk->cty_b = pdata[17]; diff --git a/algorithm/blake256.h b/algorithm/blake256.h index f021585d9..b155b4743 100644 --- a/algorithm/blake256.h +++ b/algorithm/blake256.h @@ -4,7 +4,8 @@ #include "miner.h" extern int blake256_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); -extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata); +extern void blake256_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata); +extern void blake256_midstate(struct work *work); extern void blake256_regenhash(struct work *work); #endif /* BLAKE256_H */ \ No newline at end of file diff --git a/algorithm/blakecoin.c b/algorithm/blakecoin.c index 6a118cef1..8af738973 100644 --- a/algorithm/blakecoin.c +++ b/algorithm/blakecoin.c @@ -62,24 +62,35 @@ inline void blakecoinhash(void *state, const void *input) sph_blake256r8_close(&ctx_blake, state); } -void precalc_hash_blakecoin(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata) +void blakecoin_midstate(struct work *work) { - sph_blake256_context ctx_blake; - uint32_t data[16]; - - be32enc_vect(data, (const uint32_t *)pdata, 16); - - sph_blake256_init(&ctx_blake); - sph_blake256r8(&ctx_blake, data, 64); - - blk->ctx_a = ctx_blake.H[0]; - blk->ctx_b = ctx_blake.H[1]; - blk->ctx_c = ctx_blake.H[2]; - blk->ctx_d = ctx_blake.H[3]; - blk->ctx_e = ctx_blake.H[4]; - blk->ctx_f = ctx_blake.H[5]; - blk->ctx_g = ctx_blake.H[6]; - blk->ctx_h = ctx_blake.H[7]; + sph_blake256_context ctx_blake; + uint32_t data[16]; + + be32enc_vect(data, (const uint32_t *)work->data, 19); + + sph_blake256_init(&ctx_blake); + sph_blake256r8 (&ctx_blake, (unsigned char *)data, 64); + + memcpy(work->midstate, ctx_blake.H, 32); + endian_flip32(work->midstate, work->midstate); + + char *strdata, *strmidstate; + strdata = bin2hex(work->data, 80); + strmidstate = bin2hex(work->midstate, 32); + applog(LOG_DEBUG, "data %s midstate %s", strdata, strmidstate); +} + +void blakecoin_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata) +{ + blk->ctx_a = state[0]; + blk->ctx_b = state[1]; + blk->ctx_c = state[2]; + blk->ctx_d = state[3]; + blk->ctx_e = state[4]; + blk->ctx_f = state[5]; + blk->ctx_g = state[6]; + blk->ctx_h = state[7]; blk->cty_a = pdata[16]; blk->cty_b = pdata[17]; diff --git a/algorithm/blakecoin.h b/algorithm/blakecoin.h index dcaeff71d..178dae538 100644 --- a/algorithm/blakecoin.h +++ b/algorithm/blakecoin.h @@ -4,7 +4,8 @@ #include "miner.h" extern int blakecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); -extern void precalc_hash_blakecoin(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); +extern void blakecoin_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); +extern void blakecoin_midstate(struct work *work); extern void blakecoin_regenhash(struct work *work); #endif /* BLAKECOIN_H */ \ No newline at end of file diff --git a/driver-opencl.c b/driver-opencl.c index 0e368782b..3e6667bcc 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1367,7 +1367,7 @@ static bool opencl_thread_init(struct thr_info *thr) static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work) { work->blk.work = work; - if (work->pool->algorithm.precalc_hash) work->pool->algorithm.precalc_hash(&work->blk, 0, (uint32_t *)(work->data)); + if (work->pool->algorithm.prepare_work) work->pool->algorithm.prepare_work(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data)); thr->pool_no = work->pool->pool_no; return true; } diff --git a/sgminer.c b/sgminer.c index ca58f701d..f317da7ba 100644 --- a/sgminer.c +++ b/sgminer.c @@ -1906,20 +1906,6 @@ static bool jobj_binary(const json_t *obj, const char *key, } #endif -static void calc_midstate(struct work *work) -{ - unsigned char data[64]; - uint32_t *data32 = (uint32_t *)data; - sph_sha256_context ctx; - - flip64(data32, work->data); - sph_sha256_init(&ctx); - sph_sha256(&ctx, data, 64); - memcpy(work->midstate, ctx.val, 32); - endian_flip32(work->midstate, work->midstate); -} - - static struct work *make_work(void) { struct work *w = (struct work *)calloc(1, sizeof(struct work)); @@ -2147,10 +2133,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work) free(header); } - // Neoscrypt doesn't calc_midstate() - if (pool->algorithm.type != ALGO_NEOSCRYPT) { - calc_midstate(work); - } + if (pool->algorithm.calc_midstate) pool->algorithm.calc_midstate(work); local_work++; work->pool = pool; work->gbt = true; @@ -2276,7 +2259,7 @@ static bool getwork_decode(json_t *res_val, struct work *work) if (opt_morenotices) { applog(LOG_DEBUG, "%s: Calculating midstate locally", isnull(get_pool_name(work->pool), "")); } - calc_midstate(work); + if (work->pool->algorithm.calc_midstate) work->pool->algorithm.calc_midstate(work); } } @@ -6143,7 +6126,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) if (pool->algorithm.type == ALGO_NEOSCRYPT) { set_target_neoscrypt(work->target, work->sdiff, work->thr_id); } else { - calc_midstate(work); + if (pool->algorithm.calc_midstate) pool->algorithm.calc_midstate(work); set_target(work->target, work->sdiff, pool->algorithm.diff_multiplier2, work->thr_id); } From 72a15389c9e8e47119e9232cf0435fe82191583c Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 31 Mar 2016 20:03:35 +0200 Subject: [PATCH 37/63] Remove inline from hash functions --- algorithm/animecoin.c | 2 +- algorithm/bitblock.c | 2 +- algorithm/blake256.c | 4 ++-- algorithm/blakecoin.c | 4 ++-- algorithm/credits.c | 2 +- algorithm/darkcoin.c | 2 +- algorithm/fresh.c | 2 +- algorithm/fuguecoin.c | 2 +- algorithm/groestlcoin.c | 2 +- algorithm/inkcoin.c | 2 +- algorithm/lyra2re.c | 2 +- algorithm/lyra2rev2.c | 2 +- algorithm/marucoin.c | 2 +- algorithm/myriadcoin-groestl.c | 2 +- algorithm/pluck.c | 2 +- algorithm/quarkcoin.c | 2 +- algorithm/qubitcoin.c | 2 +- algorithm/sifcoin.c | 2 +- algorithm/talkcoin.c | 2 +- algorithm/twecoin.c | 2 +- algorithm/whirlcoin.c | 2 +- algorithm/x14.c | 2 +- 22 files changed, 24 insertions(+), 24 deletions(-) diff --git a/algorithm/animecoin.c b/algorithm/animecoin.c index 93ee91b0a..00bbc70a7 100644 --- a/algorithm/animecoin.c +++ b/algorithm/animecoin.c @@ -55,7 +55,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void animehash(void *state, const void *input) +void animehash(void *state, const void *input) { sph_blake512_context ctx_blake; sph_bmw512_context ctx_bmw; diff --git a/algorithm/bitblock.c b/algorithm/bitblock.c index 1987359bd..b91675fd2 100644 --- a/algorithm/bitblock.c +++ b/algorithm/bitblock.c @@ -109,7 +109,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void bitblockhash(void *state, const void *input) +void bitblockhash(void *state, const void *input) { init_Bhash_contexts(); diff --git a/algorithm/blake256.c b/algorithm/blake256.c index 1ed0f00cd..8d4b6c56b 100644 --- a/algorithm/blake256.c +++ b/algorithm/blake256.c @@ -43,7 +43,7 @@ * Encode a length len/4 vector of (uint32_t) into a length len vector of * (unsigned char) in big-endian form. Assumes len is a multiple of 4. */ -static inline void +static void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) { uint32_t i; @@ -54,7 +54,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) static const uint32_t diff1targ_blake256 = 0x000000ff; -inline void blake256hash(void *state, const void *input) +void blake256hash(void *state, const void *input) { sph_blake256_context ctx_blake; sph_blake256_init(&ctx_blake); diff --git a/algorithm/blakecoin.c b/algorithm/blakecoin.c index 8af738973..9e2998563 100644 --- a/algorithm/blakecoin.c +++ b/algorithm/blakecoin.c @@ -43,7 +43,7 @@ * Encode a length len/4 vector of (uint32_t) into a length len vector of * (unsigned char) in big-endian form. Assumes len is a multiple of 4. */ -static inline void +static void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) { uint32_t i; @@ -54,7 +54,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) static const uint32_t diff1targ_blake256 = 0x000000ff; -inline void blakecoinhash(void *state, const void *input) +void blakecoinhash(void *state, const void *input) { sph_blake256_context ctx_blake; sph_blake256_init(&ctx_blake); diff --git a/algorithm/credits.c b/algorithm/credits.c index b69514bca..c543b611c 100644 --- a/algorithm/credits.c +++ b/algorithm/credits.c @@ -37,7 +37,7 @@ static const uint32_t diff1targ = 0x0000ffff; -inline void credits_hash(void *state, const void *input) +void credits_hash(void *state, const void *input) { sph_sha256_context sha1, sha2; uint32_t hash[8], hash2[8]; diff --git a/algorithm/darkcoin.c b/algorithm/darkcoin.c index 192ab2ab9..06e202f86 100644 --- a/algorithm/darkcoin.c +++ b/algorithm/darkcoin.c @@ -94,7 +94,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) } -static inline void xhash(void *state, const void *input) +static void xhash(void *state, const void *input) { init_Xhash_contexts(); diff --git a/algorithm/fresh.c b/algorithm/fresh.c index 883ced310..321300adf 100644 --- a/algorithm/fresh.c +++ b/algorithm/fresh.c @@ -78,7 +78,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void freshHash(void *state, const void *input) +void freshHash(void *state, const void *input) { init_freshHash_contexts(); diff --git a/algorithm/fuguecoin.c b/algorithm/fuguecoin.c index faa6d7fcf..387cd6cbe 100644 --- a/algorithm/fuguecoin.c +++ b/algorithm/fuguecoin.c @@ -50,7 +50,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void fuguehash(void *state, const void *input) +void fuguehash(void *state, const void *input) { sph_fugue256_context ctx_fugue; sph_fugue256_init(&ctx_fugue); diff --git a/algorithm/groestlcoin.c b/algorithm/groestlcoin.c index f1f109356..41e186898 100644 --- a/algorithm/groestlcoin.c +++ b/algorithm/groestlcoin.c @@ -54,7 +54,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void groestlhash(void *state, const void *input) +void groestlhash(void *state, const void *input) { sph_groestl512_context ctx_groestl; diff --git a/algorithm/inkcoin.c b/algorithm/inkcoin.c index abff2fdf6..b09b3966a 100644 --- a/algorithm/inkcoin.c +++ b/algorithm/inkcoin.c @@ -93,7 +93,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void inkhash(void *state, const void *input) +void inkhash(void *state, const void *input) { uint32_t hash[16]; sph_shavite512_context ctx_shavite; diff --git a/algorithm/lyra2re.c b/algorithm/lyra2re.c index 61f2b34f8..3ad136080 100644 --- a/algorithm/lyra2re.c +++ b/algorithm/lyra2re.c @@ -52,7 +52,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) } -inline void lyra2rehash(void *state, const void *input) +void lyra2rehash(void *state, const void *input) { sph_blake256_context ctx_blake; sph_groestl256_context ctx_groestl; diff --git a/algorithm/lyra2rev2.c b/algorithm/lyra2rev2.c index aea0082a7..7af069d5b 100644 --- a/algorithm/lyra2rev2.c +++ b/algorithm/lyra2rev2.c @@ -54,7 +54,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) } -inline void lyra2rev2hash(void *state, const void *input) +void lyra2rev2hash(void *state, const void *input) { sph_blake256_context ctx_blake; sph_bmw256_context ctx_bmw; diff --git a/algorithm/marucoin.c b/algorithm/marucoin.c index 38477d3f9..2cdc7de83 100644 --- a/algorithm/marucoin.c +++ b/algorithm/marucoin.c @@ -103,7 +103,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void maruhash(void *state, const void *input) +void maruhash(void *state, const void *input) { init_Mhash_contexts(); diff --git a/algorithm/myriadcoin-groestl.c b/algorithm/myriadcoin-groestl.c index 1d268f513..3eaa19fbe 100644 --- a/algorithm/myriadcoin-groestl.c +++ b/algorithm/myriadcoin-groestl.c @@ -54,7 +54,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void mghash(void *state, const void *input) +void mghash(void *state, const void *input) { sph_groestl512_context ctx_groestl; sph_sha256_context ctx_sha2; diff --git a/algorithm/pluck.c b/algorithm/pluck.c index 093dd68ee..fad95ca5b 100644 --- a/algorithm/pluck.c +++ b/algorithm/pluck.c @@ -331,7 +331,7 @@ void sha256_hash512(unsigned char *hash, const unsigned char *data) be32enc((uint32_t *)hash + i, S[i]); } -inline void pluckrehash(void *state, const void *input) +void pluckrehash(void *state, const void *input) { int i,j; diff --git a/algorithm/quarkcoin.c b/algorithm/quarkcoin.c index a2e47d7f7..8897ff7f6 100644 --- a/algorithm/quarkcoin.c +++ b/algorithm/quarkcoin.c @@ -55,7 +55,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void quarkhash(void *state, const void *input) +void quarkhash(void *state, const void *input) { sph_blake512_context ctx_blake; sph_bmw512_context ctx_bmw; diff --git a/algorithm/qubitcoin.c b/algorithm/qubitcoin.c index 547aed2a7..e644c2c7a 100644 --- a/algorithm/qubitcoin.c +++ b/algorithm/qubitcoin.c @@ -80,7 +80,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void qhash(void *state, const void *input) +void qhash(void *state, const void *input) { init_Qhash_contexts(); diff --git a/algorithm/sifcoin.c b/algorithm/sifcoin.c index 2787c2e6b..efa4baaca 100644 --- a/algorithm/sifcoin.c +++ b/algorithm/sifcoin.c @@ -55,7 +55,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void sifhash(void *state, const void *input) +void sifhash(void *state, const void *input) { sph_blake512_context ctx_blake; sph_bmw512_context ctx_bmw; diff --git a/algorithm/talkcoin.c b/algorithm/talkcoin.c index 16d6fa28a..e27f526c9 100644 --- a/algorithm/talkcoin.c +++ b/algorithm/talkcoin.c @@ -75,7 +75,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void talkhash(void *state, const void *input) +void talkhash(void *state, const void *input) { init_Nhash_contexts(); diff --git a/algorithm/twecoin.c b/algorithm/twecoin.c index 97ca34a0d..67cb0c001 100644 --- a/algorithm/twecoin.c +++ b/algorithm/twecoin.c @@ -56,7 +56,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void twehash(void *state, const void *input) +void twehash(void *state, const void *input) { sph_fugue256_context ctx_fugue; sph_shavite256_context ctx_shavite; diff --git a/algorithm/whirlcoin.c b/algorithm/whirlcoin.c index 12f35006f..8e1954c14 100644 --- a/algorithm/whirlcoin.c +++ b/algorithm/whirlcoin.c @@ -72,7 +72,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) #ifdef __APPLE_CC__ static #endif -inline void whirlcoin_hash(void *state, const void *input) +void whirlcoin_hash(void *state, const void *input) { init_whirlcoin_hash_contexts(); diff --git a/algorithm/x14.c b/algorithm/x14.c index 7cc3279eb..44c588923 100644 --- a/algorithm/x14.c +++ b/algorithm/x14.c @@ -104,7 +104,7 @@ static inline void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len #ifdef __APPLE_CC__ static #endif -inline void x14hash(void *state, const void *input) +void x14hash(void *state, const void *input) { init_X14hash_contexts(); From b86f89616c9fc9330347442cd8fd71b4969f357b Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 31 Mar 2016 20:03:51 +0200 Subject: [PATCH 38/63] Check pthread first --- configure.ac | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/configure.ac b/configure.ac index 659ad8430..1b9dbf45e 100644 --- a/configure.ac +++ b/configure.ac @@ -161,17 +161,16 @@ AC_LINK_IFELSE( LIBS=$SAVED_LIBS CFLAGS=$SAVED_CFLAGS -has_winpthread=false -if test "x$have_win32" = xtrue; then - has_winpthread=true - AC_CHECK_LIB(winpthread, nanosleep, , has_winpthread=false) - PTHREAD_LIBS=-lwinpthread -fi - -if test "x$has_winpthread" != xtrue; then - AC_CHECK_LIB(pthread, pthread_create, , - AC_MSG_ERROR([Could not find pthread library - please install libpthread])) - PTHREAD_LIBS=-lpthread +has_pthread=false +AC_CHECK_LIB(pthread, pthread_create, has_pthread=true) +PTHREAD_LIBS=-lpthread + +if test "x$has_pthread" != xtrue; then + if test "x$have_win32" = xtrue; then + AC_CHECK_LIB(winpthread, nanosleep, , + AC_MSG_ERROR([Could not find pthread library - please install libpthread])) + PTHREAD_LIBS=-lwinpthread + fi fi AC_ARG_ENABLE([adl], From 571af9377aa8d177eef2ceb5706265fbac4de441 Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 1 Apr 2016 00:53:43 +0200 Subject: [PATCH 39/63] Use sysendian.h for pluck hash --- algorithm/pluck.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/algorithm/pluck.c b/algorithm/pluck.c index fad95ca5b..c6e1fe9fd 100644 --- a/algorithm/pluck.c +++ b/algorithm/pluck.c @@ -32,7 +32,7 @@ #include #include - +#include "algorithm/sysendian.h" static const uint32_t sha256_h[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, @@ -199,20 +199,6 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) for (i = 0; i < len; i++) dst[i] = htobe32(src[i]); } -static inline void be32enc(void *pp, uint32_t x) -{ - uint8_t *p = (uint8_t *)pp; - p[3] = x & 0xff; - p[2] = (x >> 8) & 0xff; - p[1] = (x >> 16) & 0xff; - p[0] = (x >> 24) & 0xff; -} -static inline uint32_t be32dec(const void *pp) -{ - const uint8_t *p = (uint8_t const *)pp; - return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + - ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); -} #define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) //note, this is 64 bytes static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) From 303b27d3ac8b9ecc99579c62768bb156f02aec14 Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 1 Apr 2016 01:01:26 +0200 Subject: [PATCH 40/63] Move be32enc_vect to miner.h --- algorithm/animecoin.c | 14 -------------- algorithm/bitblock.c | 14 -------------- algorithm/blake256.c | 13 ------------- algorithm/blakecoin.c | 13 ------------- algorithm/credits.c | 9 --------- algorithm/darkcoin.c | 14 -------------- algorithm/fresh.c | 14 -------------- algorithm/fuguecoin.c | 14 -------------- algorithm/groestlcoin.c | 14 -------------- algorithm/inkcoin.c | 13 ------------- algorithm/lyra2re.c | 14 -------------- algorithm/lyra2rev2.c | 14 -------------- algorithm/marucoin.c | 14 -------------- algorithm/myriadcoin-groestl.c | 14 -------------- algorithm/pluck.c | 12 ------------ algorithm/quarkcoin.c | 14 -------------- algorithm/qubitcoin.c | 15 --------------- algorithm/scrypt.c | 13 ------------- algorithm/sifcoin.c | 14 -------------- algorithm/talkcoin.c | 14 -------------- algorithm/twecoin.c | 14 -------------- algorithm/whirlcoin.c | 14 -------------- algorithm/whirlpoolx.c | 14 -------------- algorithm/x14.c | 13 ------------- algorithm/yescrypt.c | 9 --------- miner.h | 12 ++++++++++++ 26 files changed, 12 insertions(+), 334 deletions(-) diff --git a/algorithm/animecoin.c b/algorithm/animecoin.c index 00bbc70a7..65687c042 100644 --- a/algorithm/animecoin.c +++ b/algorithm/animecoin.c @@ -38,20 +38,6 @@ #include "sph/sph_jh.h" #include "sph/sph_keccak.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/bitblock.c b/algorithm/bitblock.c index b91675fd2..bd3269d17 100644 --- a/algorithm/bitblock.c +++ b/algorithm/bitblock.c @@ -92,20 +92,6 @@ void init_Bhash_contexts() sph_whirlpool_init(&base_contexts.whirlpool1); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/blake256.c b/algorithm/blake256.c index 8d4b6c56b..b81b1fbac 100644 --- a/algorithm/blake256.c +++ b/algorithm/blake256.c @@ -39,19 +39,6 @@ #include "sph/sph_blake.h" #include "algorithm/blake256.h" -/* -* Encode a length len/4 vector of (uint32_t) into a length len vector of -* (unsigned char) in big-endian form. Assumes len is a multiple of 4. -*/ -static void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - static const uint32_t diff1targ_blake256 = 0x000000ff; void blake256hash(void *state, const void *input) diff --git a/algorithm/blakecoin.c b/algorithm/blakecoin.c index 9e2998563..e0949cfd2 100644 --- a/algorithm/blakecoin.c +++ b/algorithm/blakecoin.c @@ -39,19 +39,6 @@ #include "sph/sph_blake.h" #include "algorithm/blakecoin.h" -/* -* Encode a length len/4 vector of (uint32_t) into a length len vector of -* (unsigned char) in big-endian form. Assumes len is a multiple of 4. -*/ -static void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - static const uint32_t diff1targ_blake256 = 0x000000ff; void blakecoinhash(void *state, const void *input) diff --git a/algorithm/credits.c b/algorithm/credits.c index c543b611c..a0b51329d 100644 --- a/algorithm/credits.c +++ b/algorithm/credits.c @@ -52,15 +52,6 @@ void credits_hash(void *state, const void *input) sph_sha256_close(&sha2, hash2); memcpy(state, hash2, 32); - -} -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); } /* Used externally as confirmation of correct OCL code */ diff --git a/algorithm/darkcoin.c b/algorithm/darkcoin.c index 06e202f86..efc55cb46 100644 --- a/algorithm/darkcoin.c +++ b/algorithm/darkcoin.c @@ -80,20 +80,6 @@ static void init_Xhash_contexts() sph_echo512_init(&base_contexts.echo1); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - static void xhash(void *state, const void *input) { init_Xhash_contexts(); diff --git a/algorithm/fresh.c b/algorithm/fresh.c index 321300adf..a5ef6619e 100644 --- a/algorithm/fresh.c +++ b/algorithm/fresh.c @@ -61,20 +61,6 @@ void init_freshHash_contexts() sph_echo512_init(&base_contexts.echo2); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/fuguecoin.c b/algorithm/fuguecoin.c index 387cd6cbe..5ccdaecc9 100644 --- a/algorithm/fuguecoin.c +++ b/algorithm/fuguecoin.c @@ -33,20 +33,6 @@ #include "sph/sph_fugue.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/groestlcoin.c b/algorithm/groestlcoin.c index 41e186898..3bd96e81e 100644 --- a/algorithm/groestlcoin.c +++ b/algorithm/groestlcoin.c @@ -37,20 +37,6 @@ #include "sph/sph_groestl.h" #include "sph/sph_sha2.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/inkcoin.c b/algorithm/inkcoin.c index b09b3966a..bcb15b741 100644 --- a/algorithm/inkcoin.c +++ b/algorithm/inkcoin.c @@ -77,19 +77,6 @@ static void init_Xhash_contexts() sph_echo512_init(&base_contexts.echo1); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/lyra2re.c b/algorithm/lyra2re.c index 3ad136080..9a403ce92 100644 --- a/algorithm/lyra2re.c +++ b/algorithm/lyra2re.c @@ -38,20 +38,6 @@ #include "sph/sph_keccak.h" #include "lyra2.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - void lyra2rehash(void *state, const void *input) { sph_blake256_context ctx_blake; diff --git a/algorithm/lyra2rev2.c b/algorithm/lyra2rev2.c index 7af069d5b..66d3c5686 100644 --- a/algorithm/lyra2rev2.c +++ b/algorithm/lyra2rev2.c @@ -40,20 +40,6 @@ #include "sph/sph_cubehash.h" #include "lyra2.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - void lyra2rev2hash(void *state, const void *input) { sph_blake256_context ctx_blake; diff --git a/algorithm/marucoin.c b/algorithm/marucoin.c index 2cdc7de83..92c08bc46 100644 --- a/algorithm/marucoin.c +++ b/algorithm/marucoin.c @@ -86,20 +86,6 @@ void init_Mhash_contexts() sph_fugue512_init(&base_contexts.fugue1); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/myriadcoin-groestl.c b/algorithm/myriadcoin-groestl.c index 3eaa19fbe..b0a44689d 100644 --- a/algorithm/myriadcoin-groestl.c +++ b/algorithm/myriadcoin-groestl.c @@ -37,20 +37,6 @@ #include "sph/sph_groestl.h" #include "sph/sph_sha2.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/pluck.c b/algorithm/pluck.c index c6e1fe9fd..91f2bc6ab 100644 --- a/algorithm/pluck.c +++ b/algorithm/pluck.c @@ -187,18 +187,6 @@ void sha256_transform(uint32_t *state, const uint32_t *block, int swap) state[i] += S[i]; } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} #define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) //note, this is 64 bytes static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) diff --git a/algorithm/quarkcoin.c b/algorithm/quarkcoin.c index 8897ff7f6..d6cbdb675 100644 --- a/algorithm/quarkcoin.c +++ b/algorithm/quarkcoin.c @@ -38,20 +38,6 @@ #include "sph/sph_jh.h" #include "sph/sph_keccak.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/qubitcoin.c b/algorithm/qubitcoin.c index e644c2c7a..efbfa9133 100644 --- a/algorithm/qubitcoin.c +++ b/algorithm/qubitcoin.c @@ -62,21 +62,6 @@ void init_Qhash_contexts() sph_echo512_init(&base_contexts.echo1); } - -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/scrypt.c b/algorithm/scrypt.c index 1b1b329f1..968cda6d3 100644 --- a/algorithm/scrypt.c +++ b/algorithm/scrypt.c @@ -39,19 +39,6 @@ typedef struct SHA256Context { uint32_t buf[16]; } SHA256_CTX; -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - /* Elementary functions used by SHA256 */ #define Ch(x, y, z) ((x & (y ^ z)) ^ z) #define Maj(x, y, z) ((x & (y | z)) | (y & z)) diff --git a/algorithm/sifcoin.c b/algorithm/sifcoin.c index efa4baaca..c2c905c9d 100644 --- a/algorithm/sifcoin.c +++ b/algorithm/sifcoin.c @@ -38,20 +38,6 @@ #include "sph/sph_jh.h" #include "sph/sph_keccak.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/talkcoin.c b/algorithm/talkcoin.c index e27f526c9..c7260a8e1 100644 --- a/algorithm/talkcoin.c +++ b/algorithm/talkcoin.c @@ -58,20 +58,6 @@ void init_Nhash_contexts() sph_skein512_init(&base_contexts.skein1); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/twecoin.c b/algorithm/twecoin.c index 67cb0c001..2056e9c2f 100644 --- a/algorithm/twecoin.c +++ b/algorithm/twecoin.c @@ -39,20 +39,6 @@ #include "sph/sph_hamsi.h" #include "sph/sph_panama.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/whirlcoin.c b/algorithm/whirlcoin.c index 8e1954c14..211aeaf3d 100644 --- a/algorithm/whirlcoin.c +++ b/algorithm/whirlcoin.c @@ -55,20 +55,6 @@ void init_whirlcoin_hash_contexts() sph_whirlpool1_init(&base_contexts.whirlpool4); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/whirlpoolx.c b/algorithm/whirlpoolx.c index c419057a0..e12a82519 100644 --- a/algorithm/whirlpoolx.c +++ b/algorithm/whirlpoolx.c @@ -36,20 +36,6 @@ #include "whirlpoolx.h" -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - void whirlpool_compress(uint8_t state[64], const uint8_t block[64]) { const int NUM_ROUNDS = 10; diff --git a/algorithm/x14.c b/algorithm/x14.c index 44c588923..25ca1c20c 100644 --- a/algorithm/x14.c +++ b/algorithm/x14.c @@ -88,19 +88,6 @@ void init_X14hash_contexts() sph_shabal512_init(&base_contexts.shabal1); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - - #ifdef __APPLE_CC__ static #endif diff --git a/algorithm/yescrypt.c b/algorithm/yescrypt.c index de00d0f33..31e0c623b 100644 --- a/algorithm/yescrypt.c +++ b/algorithm/yescrypt.c @@ -35,15 +35,6 @@ static const uint32_t diff1targ = 0x0000ffff; -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - /* Used externally as confirmation of correct OCL code */ int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) { diff --git a/miner.h b/miner.h index ecaa6a134..00ebbd78c 100644 --- a/miner.h +++ b/miner.h @@ -743,6 +743,18 @@ static inline void flip168(void *dest_p, const void *src_p) dest[i] = swab32(src[i]); } +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} /* For flipping to the correct endianness if necessary */ #if defined(__BIG_ENDIAN__) || defined(MIPSEB) From d7df5db1f09dc5e18f32c90f0adef16c232dba52 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 31 Mar 2016 21:35:38 +0000 Subject: [PATCH 41/63] Fix inkcoin --- algorithm/inkcoin.c | 49 +-------------------------------------------- 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/algorithm/inkcoin.c b/algorithm/inkcoin.c index bcb15b741..56a200da2 100644 --- a/algorithm/inkcoin.c +++ b/algorithm/inkcoin.c @@ -132,53 +132,6 @@ void inkcoin_regenhash(struct work *work) inkhash(ohash, data); } -static inline void xhash(void *state, const void *input) -{ - init_Xhash_contexts(); - - Xhash_context_holder ctx; - - uint32_t hashA[16], hashB[16]; - //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo - memcpy(&ctx, &base_contexts, sizeof(base_contexts)); - - sph_blake512 (&ctx.blake1, input, 80); - sph_blake512_close (&ctx.blake1, hashA); - - sph_bmw512 (&ctx.bmw1, hashA, 64); - sph_bmw512_close(&ctx.bmw1, hashB); - - sph_groestl512 (&ctx.groestl1, hashB, 64); - sph_groestl512_close(&ctx.groestl1, hashA); - - sph_skein512 (&ctx.skein1, hashA, 64); - sph_skein512_close(&ctx.skein1, hashB); - - sph_jh512 (&ctx.jh1, hashB, 64); - sph_jh512_close(&ctx.jh1, hashA); - - sph_keccak512 (&ctx.keccak1, hashA, 64); - sph_keccak512_close(&ctx.keccak1, hashB); - - sph_luffa512 (&ctx.luffa1, hashB, 64); - sph_luffa512_close (&ctx.luffa1, hashA); - - sph_cubehash512 (&ctx.cubehash1, hashA, 64); - sph_cubehash512_close(&ctx.cubehash1, hashB); - - sph_shavite512 (&ctx.shavite1, hashB, 64); - sph_shavite512_close(&ctx.shavite1, hashA); - - sph_simd512 (&ctx.simd1, hashA, 64); - sph_simd512_close(&ctx.simd1, hashB); - - sph_echo512 (&ctx.echo1, hashB, 64); - sph_echo512_close(&ctx.echo1, hashA); - - memcpy(state, hashA, 32); - -} - bool scanhash_inkcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, unsigned char *pdata, unsigned char __maybe_unused *phash1, unsigned char __maybe_unused *phash, const unsigned char *ptarget, @@ -197,7 +150,7 @@ bool scanhash_inkcoin(struct thr_info *thr, const unsigned char __maybe_unused * *nonce = ++n; data[19] = (n); - xhash(ostate, data); + inkhash(ostate, data); tmp_hash7 = (ostate[7]); applog(LOG_INFO, "data7 %08lx", From 7f7557b59b08e6fc8c25db7e49242d4e8a1ec1f1 Mon Sep 17 00:00:00 2001 From: elbandi Date: Thu, 31 Mar 2016 21:43:25 +0000 Subject: [PATCH 42/63] Fix VERSION redefine warnings --- api.h | 2 -- config_parser.h | 3 --- findnonce.c | 2 -- findnonce.h | 1 - logging.c | 2 +- logging.h | 1 - miner.h | 35 ++++++++++++++--------------------- ocl.h | 9 --------- ocl/binary_kernel.c | 5 +++-- ocl/build_kernel.c | 3 ++- ocl/build_kernel.h | 1 - 11 files changed, 20 insertions(+), 44 deletions(-) diff --git a/api.h b/api.h index cf36628bc..4ac1c0ce1 100644 --- a/api.h +++ b/api.h @@ -1,8 +1,6 @@ #ifndef API_H #define API_H -#include "config.h" - #include "miner.h" // BUFSIZ varies on Windows and Linux diff --git a/config_parser.h b/config_parser.h index e5b5117d8..e42ec5f4e 100644 --- a/config_parser.h +++ b/config_parser.h @@ -1,11 +1,8 @@ #ifndef CONFIG_PARSER_H #define CONFIG_PARSER_H -#include "config.h" - #include "miner.h" #include "api.h" -#include "algorithm.h" //profile structure struct profile { diff --git a/findnonce.c b/findnonce.c index 72dcaaef4..4e80dac17 100644 --- a/findnonce.c +++ b/findnonce.c @@ -8,8 +8,6 @@ * any later version. See COPYING for more details. */ -#include "config.h" - #include #include #include diff --git a/findnonce.h b/findnonce.h index 9376a57be..64a8361bb 100644 --- a/findnonce.h +++ b/findnonce.h @@ -2,7 +2,6 @@ #define FINDNONCE_H #include "miner.h" -#include "config.h" #define MAXTHREADS (0xFFFFFFFEULL) #define MAXBUFFERS (0x100) diff --git a/logging.c b/logging.c index 7f8b5d652..1429831ad 100644 --- a/logging.c +++ b/logging.c @@ -12,8 +12,8 @@ #include -#include "logging.h" #include "miner.h" +#include "logging.h" bool opt_debug = false; bool opt_debug_console = false; diff --git a/logging.h b/logging.h index 07d373513..3c554542e 100644 --- a/logging.h +++ b/logging.h @@ -1,7 +1,6 @@ #ifndef LOGGING_H #define LOGGING_H -#include "config.h" #include #include diff --git a/miner.h b/miner.h index 00ebbd78c..6d08f2463 100644 --- a/miner.h +++ b/miner.h @@ -3,24 +3,12 @@ #include "config.h" -#if defined(USE_GIT_VERSION) && defined(GIT_VERSION) -#undef VERSION -#define VERSION GIT_VERSION -#endif - -#ifdef BUILD_NUMBER -#define CGMINER_VERSION VERSION "-" BUILD_NUMBER -#else -#define CGMINER_VERSION VERSION -#endif - -#include "algorithm.h" - #include #include #include #include #include +#include #ifdef HAVE_LIBCURL #include #else @@ -34,22 +22,29 @@ extern char *curly; #endif #include +#if defined(USE_GIT_VERSION) && defined(GIT_VERSION) +#undef VERSION +#define VERSION GIT_VERSION +#endif + +#ifdef BUILD_NUMBER +#define CGMINER_VERSION VERSION "-" BUILD_NUMBER +#else +#define CGMINER_VERSION VERSION +#endif + #include "elist.h" #include "uthash.h" #include "logging.h" #include "util.h" +#include "algorithm.h" + #include #ifndef WIN32 # include # include #endif -#ifdef __APPLE_CC__ -#include -#else -#include -#endif - #ifdef STDC_HEADERS # include # include @@ -135,8 +130,6 @@ static inline int fsync (int fd) #include "ADL_SDK/adl_sdk.h" #endif -#include - #if (!defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ || (defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) #ifndef bswap_16 diff --git a/ocl.h b/ocl.h index 8d6467f11..5d4232499 100644 --- a/ocl.h +++ b/ocl.h @@ -1,16 +1,7 @@ #ifndef OCL_H #define OCL_H -#include "config.h" - #include -#ifdef __APPLE_CC__ -#include -#else -#include -#endif - -#include "algorithm.h" typedef struct __clState { cl_context context; diff --git a/ocl/binary_kernel.c b/ocl/binary_kernel.c index 4fd778752..874ce61ea 100644 --- a/ocl/binary_kernel.c +++ b/ocl/binary_kernel.c @@ -1,8 +1,9 @@ -#include "binary_kernel.h" -#include "miner.h" #include #include +#include "miner.h" +#include "binary_kernel.h" + cl_program load_opencl_binary_kernel(build_kernel_data *data) { FILE *binaryfile = NULL; diff --git a/ocl/build_kernel.c b/ocl/build_kernel.c index 2e1b73838..a5407a010 100644 --- a/ocl/build_kernel.c +++ b/ocl/build_kernel.c @@ -1,6 +1,7 @@ #include -#include "build_kernel.h" + #include "miner.h" +#include "build_kernel.h" static char *file_contents(const char *filename, int *length) { diff --git a/ocl/build_kernel.h b/ocl/build_kernel.h index 89fb8db8a..edb57c892 100644 --- a/ocl/build_kernel.h +++ b/ocl/build_kernel.h @@ -2,7 +2,6 @@ #define BUILD_KERNEL_H #include -#include "logging.h" #ifdef __APPLE_CC__ #include From d6e6f108818d035c2fe6da0cbdf68c10e5e83fc3 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 27 Mar 2016 12:54:11 +0000 Subject: [PATCH 43/63] intensity: prevent double free on change from (t)ui --- config_parser.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/config_parser.c b/config_parser.c index 9175785f8..7c12b6a38 100644 --- a/config_parser.c +++ b/config_parser.c @@ -2193,9 +2193,8 @@ void api_pool_profile(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char void update_config_intensity(struct profile *profile) { + char buf[256] = { 0 }; int i; - char buf[255]; - memset(buf, 0, 255); for (i = 0; iintensity) { + if (profile->intensity && profile->intensity != default_profile.intensity) { free(profile->intensity); } From 29c3f1c7144c148d3ea9ee40a510fe3c77e01d76 Mon Sep 17 00:00:00 2001 From: elbandi Date: Mon, 21 Mar 2016 17:34:35 +0100 Subject: [PATCH 44/63] Add decred algo support --- Makefile.am | 1 + algorithm.c | 99 +++++++++++++++++++------- algorithm.h | 1 + algorithm/decred.c | 170 +++++++++++++++++++++++++++++++++++++++++++++ algorithm/decred.h | 11 +++ kernel/decred.cl | 161 ++++++++++++++++++++++++++++++++++++++++++ miner.h | 24 ++++++- ocl.c | 4 +- ocl.h | 2 +- sgminer.c | 169 +++++++++++++++++++++++++++++++++----------- 10 files changed, 575 insertions(+), 67 deletions(-) create mode 100644 algorithm/decred.c create mode 100644 algorithm/decred.h create mode 100644 kernel/decred.cl diff --git a/Makefile.am b/Makefile.am index c36ea1dbd..e8a3f3d48 100644 --- a/Makefile.am +++ b/Makefile.am @@ -79,6 +79,7 @@ sgminer_SOURCES += algorithm/credits.c algorithm/credits.h sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h +sgminer_SOURCES += algorithm/decred.c algorithm/decred.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index 24165546b..ad41a494f 100644 --- a/algorithm.c +++ b/algorithm.c @@ -39,6 +39,7 @@ #include "algorithm/credits.h" #include "algorithm/blake256.h" #include "algorithm/blakecoin.h" +#include "algorithm/decred.h" #include "compat.h" @@ -70,6 +71,7 @@ const char *algorithm_type_str[] = { "Yescrypt-multi", "Blakecoin", "Blake", + "Decred", "Vanilla" }; @@ -138,6 +140,17 @@ static void append_neoscrypt_compiler_options(struct _build_kernel_data *data, s strcat(data->binary_filename, buf); } +static void append_blake256_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm) +{ + char buf[255]; + sprintf(buf, " -D LOOKUP_GAP=%d -D MAX_GLOBAL_THREADS=%lu ", + cgpu->lookup_gap, (unsigned long)cgpu->thread_concurrency); + strcat(data->compiler_options, buf); + + sprintf(buf, "tc%lu", (unsigned long)cgpu->thread_concurrency); + strcat(data->binary_filename, buf); +} + static void append_x11_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm) { char buf[255]; @@ -935,30 +948,63 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un static cl_int queue_blake_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) { - cl_kernel *kernel = &clState->kernel; - unsigned int num = 0; - cl_int status = 0; - cl_ulong le_target; - - le_target = *(cl_ulong *)(blk->work->device_target + 24); - flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); - - CL_SET_ARG(clState->outputBuffer); - CL_SET_ARG(blk->work->blk.ctx_a); - CL_SET_ARG(blk->work->blk.ctx_b); - CL_SET_ARG(blk->work->blk.ctx_c); - CL_SET_ARG(blk->work->blk.ctx_d); - CL_SET_ARG(blk->work->blk.ctx_e); - CL_SET_ARG(blk->work->blk.ctx_f); - CL_SET_ARG(blk->work->blk.ctx_g); - CL_SET_ARG(blk->work->blk.ctx_h); - - CL_SET_ARG(blk->work->blk.cty_a); - CL_SET_ARG(blk->work->blk.cty_b); - CL_SET_ARG(blk->work->blk.cty_c); - - return status; + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_int status = 0; + cl_ulong le_target; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(blk->work->blk.ctx_a); + CL_SET_ARG(blk->work->blk.ctx_b); + CL_SET_ARG(blk->work->blk.ctx_c); + CL_SET_ARG(blk->work->blk.ctx_d); + CL_SET_ARG(blk->work->blk.ctx_e); + CL_SET_ARG(blk->work->blk.ctx_f); + CL_SET_ARG(blk->work->blk.ctx_g); + CL_SET_ARG(blk->work->blk.ctx_h); + + CL_SET_ARG(blk->work->blk.cty_a); + CL_SET_ARG(blk->work->blk.cty_b); + CL_SET_ARG(blk->work->blk.cty_c); + + return status; +} + +static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_int status = 0; + + CL_SET_ARG(clState->outputBuffer); + /* Midstate */ + CL_SET_BLKARG(ctx_a); + CL_SET_BLKARG(ctx_b); + CL_SET_BLKARG(ctx_c); + CL_SET_BLKARG(ctx_d); + CL_SET_BLKARG(ctx_e); + CL_SET_BLKARG(ctx_f); + CL_SET_BLKARG(ctx_g); + CL_SET_BLKARG(ctx_h); + /* Last 52 bytes of data (without nonce) */ + CL_SET_BLKARG(cty_a); + CL_SET_BLKARG(cty_b); + CL_SET_BLKARG(cty_c); + CL_SET_BLKARG(cty_d); + CL_SET_BLKARG(cty_e); + CL_SET_BLKARG(cty_f); + CL_SET_BLKARG(cty_g); + CL_SET_BLKARG(cty_h); + CL_SET_BLKARG(cty_i); + CL_SET_BLKARG(cty_j); + CL_SET_BLKARG(cty_k); + CL_SET_BLKARG(cty_l); + + return status; } static algorithm_settings_t algos[] = { @@ -989,6 +1035,11 @@ static algorithm_settings_t algos[] = { A_CREDITS("credits"), #undef A_CREDITS +#define A_DECRED(a) \ + { a, ALGO_DECRED, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, decred_regenhash, decred_midstate, decred_prepare_work, queue_decred_kernel, gen_hash, append_blake256_compiler_options } + A_DECRED("decred"), +#undef A_DECRED + #define A_YESCRYPT(a) \ { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} A_YESCRYPT("yescrypt"), diff --git a/algorithm.h b/algorithm.h index fc116a7d7..5628d0067 100644 --- a/algorithm.h +++ b/algorithm.h @@ -36,6 +36,7 @@ typedef enum { ALGO_YESCRYPT_MULTI, ALGO_BLAKECOIN, ALGO_BLAKE, + ALGO_DECRED, ALGO_VANILLA } algorithm_type_t; diff --git a/algorithm/decred.c b/algorithm/decred.c new file mode 100644 index 000000000..dd1654a77 --- /dev/null +++ b/algorithm/decred.c @@ -0,0 +1,170 @@ +/* + * BLAKE implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + * + * Modified for more speed by BlueDragon747 for the Blakecoin project + */ + +#include +#include +#include +#include + +#include "sph/sph_blake.h" +#include "algorithm/decred.h" + +static const uint32_t diff1targ_decred = 0x000000ff; + +void decredhash(void *state, const void *input) +{ + sph_blake256_context ctx_blake; + sph_blake256_init(&ctx_blake); + sph_blake256(&ctx_blake, input, 180); + sph_blake256_close(&ctx_blake, state); +} + +void decred_midstate(struct work *work) +{ + sph_blake256_context ctx_blake; + sph_blake256_init(&ctx_blake); + sph_blake256 (&ctx_blake, (unsigned char *)work->data, 128); + + memcpy(work->midstate, ctx_blake.H, 32); + endian_flip32(work->midstate, work->midstate); + + char *strdata, *strmidstate; + strdata = bin2hex(work->data, 128); + strmidstate = bin2hex(work->midstate, 32); + applog(LOG_DEBUG, "data %s midstate %s", strdata, strmidstate); +} + +void decred_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata) +{ + blk->ctx_a = state[0]; + blk->ctx_b = state[1]; + blk->ctx_c = state[2]; + blk->ctx_d = state[3]; + blk->ctx_e = state[4]; + blk->ctx_f = state[5]; + blk->ctx_g = state[6]; + blk->ctx_h = state[7]; + + blk->cty_a = htobe32(pdata[32]); + blk->cty_b = htobe32(pdata[33]); + blk->cty_c = htobe32(pdata[34]); + /* blk->cty_d = htobe32(pdata[35] = nonce) */ + + blk->cty_d = htobe32(pdata[36]); + blk->cty_e = htobe32(pdata[37]); + blk->cty_f = htobe32(pdata[38]); + blk->cty_g = htobe32(pdata[39]); + + blk->cty_h = htobe32(pdata[40]); + blk->cty_i = htobe32(pdata[41]); + blk->cty_j = htobe32(pdata[42]); + blk->cty_k = htobe32(pdata[43]); + + blk->cty_l = htobe32(pdata[44]); +} + + +static const uint32_t diff1targ = 0x0000ffff; + +/* Used externally as confirmation of correct OCL code */ +int decred_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[45], ohash[8]; + + memcpy(data, pdata, 180); + data[35] = htobe32(nonce); + decredhash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void decred_regenhash(struct work *work) +{ + uint32_t data[45]; + uint32_t *nonce = (uint32_t *)(work->data + 140); + uint32_t *ohash = (uint32_t *)(work->hash); + + memcpy(data, work->data, 180); + data[35] = htobe32(*nonce); + decredhash(ohash, data); +} + +bool scanhash_decred(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 140); + uint32_t data[45]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + memcpy(data, pdata, 180); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[35] = (n); + decredhash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[35] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} diff --git a/algorithm/decred.h b/algorithm/decred.h new file mode 100644 index 000000000..43045fa4d --- /dev/null +++ b/algorithm/decred.h @@ -0,0 +1,11 @@ +#ifndef DECRED_H +#define DECRED_H + +#include "miner.h" + +extern int decred_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void decred_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata); +extern void decred_midstate(struct work *work); +extern void decred_regenhash(struct work *work); + +#endif /* DECRED_H */ \ No newline at end of file diff --git a/kernel/decred.cl b/kernel/decred.cl new file mode 100644 index 000000000..0cdf30b24 --- /dev/null +++ b/kernel/decred.cl @@ -0,0 +1,161 @@ +/** + * BLAKE256 14-round kernel + * + * Copyright 2015 Company Zero + * A complete kernel re-write + * with inspiration from the Golang BLAKE256 repo (github.com/dchest/blake256) + */ + +/** + * optimized by tpruvot 02/2016 : + * + * GTX 960 | (5s):735.3M (avg):789.3Mh/s + * GTX 750 | (5s):443.3M (avg):476.8Mh/s + * to + * GTX 960 | (5s):875.0M (avg):899.2Mh/s + * GTX 750 | (5s):523.1M (avg):536.8Mh/s + */ +#define ROTR(v,n) rotate(v,(uint)(32U-n)) +#define ROTL(v,n) rotate(v, n) + +#ifdef _AMD_OPENCL +#define SWAP(v) rotate(v, 16U) +#define ROTR8(v) rotate(v, 24U) +#else +#define SWAP(v) as_uint(as_uchar4(v).zwxy) +#define ROTR8(v) as_uint(as_uchar4(v).yzwx) +#endif + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search( + volatile __global uint * restrict output, + // Midstate + const uint h0, + const uint h1, + const uint h2, + const uint h3, + const uint h4, + const uint h5, + const uint h6, + const uint h7, + + // last 52 bytes of data + const uint M0, + const uint M1, + const uint M2, + // const uint M3 : nonce + const uint M4, + const uint M5, + const uint M6, + const uint M7, + const uint M8, + const uint M9, + const uint MA, + const uint MB, + const uint MC +) +{ + /* Load the block header and padding */ + const uint M3 = get_global_id(0); + const uint MD = 0x80000001UL; + const uint ME = 0x00000000UL; + const uint MF = 0x000005a0UL; + + const uint cst0 = 0x243F6A88UL; + const uint cst1 = 0x85A308D3UL; + const uint cst2 = 0x13198A2EUL; + const uint cst3 = 0x03707344UL; + const uint cst4 = 0xA4093822UL; + const uint cst5 = 0x299F31D0UL; + const uint cst6 = 0x082EFA98UL; + const uint cst7 = 0xEC4E6C89UL; + const uint cst8 = 0x452821E6UL; + const uint cst9 = 0x38D01377UL; + const uint cstA = 0xBE5466CFUL; + const uint cstB = 0x34E90C6CUL; + const uint cstC = 0xC0AC29B7UL; + const uint cstD = 0xC97C50DDUL; + const uint cstE = 0x3F84D5B5UL; + const uint cstF = 0xB5470917UL; + + uint V0, V1, V2, V3, V4, V5, V6, V7; + uint V8, V9, VA, VB, VC, VD, VE, VF; + uint pre7; + + /* Load the midstate and initialize */ + V0 = h0; + V1 = h1; + V2 = h2; + V3 = h3; + V4 = h4; + V5 = h5; + V6 = h6; + pre7 = V7 = h7; + + V8 = cst0; + V9 = cst1; + VA = cst2; + VB = cst3; + VC = 0xA4093D82UL; + VD = 0x299F3470UL; + VE = cst6; + VF = cst7; + + /* 14 rounds */ + + V0 = V0 + (M0 ^ cst1); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M2 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M4 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M6 ^ cst7); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M5 ^ cst4); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M7 ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M3 ^ cst2); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M1 ^ cst0); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M8 ^ cst9); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MA ^ cstB); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (ME ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MF ^ cstE); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MB ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M9 ^ cst8); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (ME ^ cstA); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M4 ^ cst8); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M9 ^ cstF); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MD ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MF ^ cst9); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M6 ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M8 ^ cst4); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M1 ^ cstC); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M0 ^ cst2); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MB ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M5 ^ cst3); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M7 ^ cstB); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M3 ^ cst5); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M2 ^ cst0); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MC ^ cst1); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (MB ^ cst8); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (MC ^ cst0); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M5 ^ cst2); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MF ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M2 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MD ^ cstF); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M0 ^ cstC); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M8 ^ cstB); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M3 ^ cst6); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M7 ^ cst1); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M9 ^ cst4); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M1 ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M4 ^ cst9); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M6 ^ cst3); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (ME ^ cstA); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (M7 ^ cst9); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M3 ^ cst1); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MB ^ cstE); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (ME ^ cstB); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M1 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M9 ^ cst7); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M2 ^ cst6); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M5 ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M4 ^ cst0); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MF ^ cst8); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M0 ^ cst4); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M8 ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MA ^ cst5); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M6 ^ cst2); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (M9 ^ cst0); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M5 ^ cst7); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M2 ^ cst4); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MA ^ cstF); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M4 ^ cst2); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MF ^ cstA); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M7 ^ cst5); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M0 ^ cst9); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (ME ^ cst1); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MB ^ cstC); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M6 ^ cst8); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M3 ^ cstD); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M8 ^ cst6); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MD ^ cst3); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MC ^ cstB); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M1 ^ cstE); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (M2 ^ cstC); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M6 ^ cstA); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M0 ^ cstB); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M8 ^ cst3); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MB ^ cst0); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M3 ^ cst8); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (MA ^ cst6); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MC ^ cst2); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M4 ^ cstD); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M7 ^ cst5); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MF ^ cstE); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M1 ^ cst9); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (ME ^ cstF); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M9 ^ cst1); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M5 ^ cst7); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MD ^ cst4); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (MC ^ cst5); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M1 ^ cstF); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (ME ^ cstD); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M4 ^ cstA); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MD ^ cstE); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MA ^ cst4); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (MF ^ cst1); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M5 ^ cstC); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M0 ^ cst7); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M6 ^ cst3); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M9 ^ cst2); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M8 ^ cstB); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M2 ^ cst9); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MB ^ cst8); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M3 ^ cst6); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M7 ^ cst0); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (MD ^ cstB); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M7 ^ cstE); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MC ^ cst1); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M3 ^ cst9); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M1 ^ cstC); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M9 ^ cst3); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (ME ^ cst7); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MB ^ cstD); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M5 ^ cst0); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MF ^ cst4); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M8 ^ cst6); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M2 ^ cstA); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M6 ^ cst8); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MA ^ cst2); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M4 ^ cstF); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M0 ^ cst5); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (M6 ^ cstF); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (ME ^ cst9); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MB ^ cst3); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M0 ^ cst8); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M3 ^ cstB); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M8 ^ cst0); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M9 ^ cstE); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MF ^ cst6); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MC ^ cst2); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MD ^ cst7); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M1 ^ cst4); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MA ^ cst5); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M4 ^ cst1); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M5 ^ cstA); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M7 ^ cstD); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M2 ^ cstC); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (MA ^ cst2); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M8 ^ cst4); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M7 ^ cst6); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M1 ^ cst5); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M6 ^ cst7); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M5 ^ cst1); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M4 ^ cst8); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M2 ^ cstA); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MF ^ cstB); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M9 ^ cstE); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M3 ^ cstC); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MD ^ cst0); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (MC ^ cst3); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M0 ^ cstD); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (ME ^ cst9); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MB ^ cstF); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (M0 ^ cst1); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M2 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M4 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M6 ^ cst7); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M5 ^ cst4); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M7 ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M3 ^ cst2); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M1 ^ cst0); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M8 ^ cst9); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MA ^ cstB); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (ME ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MF ^ cstE); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MB ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M9 ^ cst8); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (ME ^ cstA); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M4 ^ cst8); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M9 ^ cstF); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MD ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MF ^ cst9); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M6 ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M8 ^ cst4); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M1 ^ cstC); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M0 ^ cst2); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MB ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M5 ^ cst3); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M7 ^ cstB); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M3 ^ cst5); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M2 ^ cst0); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MC ^ cst1); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (MB ^ cst8); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (MC ^ cst0); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M5 ^ cst2); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MF ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M2 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MD ^ cstF); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M0 ^ cstC); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M8 ^ cstB); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M3 ^ cst6); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M7 ^ cst1); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M9 ^ cst4); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M1 ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M4 ^ cst9); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M6 ^ cst3); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (ME ^ cstA); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U); + V0 = V0 + (M7 ^ cst9); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M3 ^ cst1); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MB ^ cstE); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (ME ^ cstB); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M1 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M9 ^ cst7); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M2 ^ cst6); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M5 ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M4 ^ cst0); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MF ^ cst8); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M0 ^ cst4); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M8 ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MA ^ cst5); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M6 ^ cst2); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF);/*VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);*/ + + /* The final chunks of the hash + * are calculated as: + * h0 = h0 ^ V0 ^ V8; + * h1 = h1 ^ V1 ^ V9; + * h2 = h2 ^ V2 ^ VA; + * h3 = h3 ^ V3 ^ VB; + * h4 = h4 ^ V4 ^ VC; + * h5 = h5 ^ V5 ^ VD; + * h6 = h6 ^ V6 ^ VE; + * h7 = h7 ^ V7 ^ VF; + * + * We just check if the last byte + * is zeroed and if it is, we tell + * cgminer that we've found a + * and to check it against the + * target. + */ + + /* Debug code to help you assess the correctness + * of your hashing function in case someone decides + * to try to optimize. + if (!((pre7 ^ V7 ^ VF) & 0xFFFF0000)) { + printf("hash on gpu %x %x %x %x %x %x %x %x\n", + h0 ^ V0 ^ V8, + h1 ^ V1 ^ V9, + h2 ^ V2 ^ VA, + h3 ^ V3 ^ VB, + h4 ^ V4 ^ VC, + h5 ^ V5 ^ VD, + h6 ^ V6 ^ VE, + h7 ^ V7 ^ VF); + printf("nonce for hash on gpu %x\n", + nonce); + } + */ + + if (pre7 ^ V7 ^ VF) return; + + /* Push this share */ + output[output[0xFF]++] = M3; +} diff --git a/miner.h b/miner.h index 6d08f2463..3d6ce4296 100644 --- a/miner.h +++ b/miner.h @@ -736,6 +736,16 @@ static inline void flip168(void *dest_p, const void *src_p) dest[i] = swab32(src[i]); } +static inline void flip180(void *dest_p, const void *src_p) +{ + uint32_t *dest = (uint32_t *)dest_p; + const uint32_t *src = (uint32_t *)src_p; + int i; + + for (i = 0; i < 45; i++) + dest[i] = swab32(src[i]); +} + /* * Encode a length len/4 vector of (uint32_t) into a length len vector of * (unsigned char) in big-endian form. Assumes len is a multiple of 4. @@ -762,7 +772,11 @@ static inline void endian_flip128(void *dest_p, const void *src_p) } static inline void endian_flip168(void *dest_p, const void *src_p) { - flip168(dest_p, src_p); + flip168(dest_p, src_p); +} +static inline void endian_flip180(void *dest_p, const void *src_p) +{ + flip180(dest_p, src_p); } #else @@ -779,6 +793,10 @@ static inline void endian_flip168(void __maybe_unused *dest_p, const void __maybe_unused *src_p) { } +static inline void +endian_flip180(void __maybe_unused *dest_p, const void __maybe_unused *src_p) +{ +} #endif @@ -1232,6 +1250,8 @@ typedef struct _dev_blk_ctx { cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h; cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d; cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h; + cl_uint cty_i; cl_uint cty_j; cl_uint cty_k; cl_uint cty_l; + cl_uint cty_m; cl_uint cty_n; cl_uint cty_o; cl_uint cty_p; cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce; cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15; cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2; @@ -1442,7 +1462,7 @@ struct pool { #define GETWORK_MODE_GBT 'G' struct work { - unsigned char data[168]; + unsigned char data[256]; unsigned char midstate[32]; unsigned char target[32]; unsigned char hash[32]; diff --git a/ocl.c b/ocl.c index ecc383c87..c34f191b3 100644 --- a/ocl.c +++ b/ocl.c @@ -758,7 +758,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg size_t buf1size; size_t buf3size; size_t buf2size; - size_t readbufsize = (algorithm->type == ALGO_CRE) ? 168 : 128; + size_t readbufsize = 128; + if (algorithm->type == ALGO_CRE) readbufsize = 168; + else if (algorithm->type == ALGO_DECRED) readbufsize = 192; if (algorithm->rw_buffer_size < 0) { // calc buffer size for neoscrypt diff --git a/ocl.h b/ocl.h index 5d4232499..311db29ec 100644 --- a/ocl.h +++ b/ocl.h @@ -17,7 +17,7 @@ typedef struct __clState { cl_mem buffer1; cl_mem buffer2; cl_mem buffer3; - unsigned char cldata[168]; + unsigned char cldata[256]; bool goffset; cl_uint vwidth; size_t max_work_size; diff --git a/sgminer.c b/sgminer.c index f317da7ba..a208f510c 100644 --- a/sgminer.c +++ b/sgminer.c @@ -2064,17 +2064,31 @@ static void update_gbt(struct pool *pool) /* Return the work coin/network difficulty */ static double get_work_blockdiff(const struct work *work) { + uint32_t* data = (uint32_t*) work->data; uint64_t diff64; double numerator; + int powdiff; + uint8_t shift; // Neoscrypt has the data reversed if (work->pool->algorithm.type == ALGO_NEOSCRYPT) { diff64 = bswap_64(((uint64_t)(be32toh(*((uint32_t *)(work->data + 72))) & 0xFFFFFF00)) << 8); numerator = (double)work->pool->algorithm.diff_numerator; } + else if (work->pool->algorithm.type == ALGO_DECRED) { + shift = work->data[116+3]; + powdiff = (8 * (0x1d - 3)) - (8 * (shift - 3)); + diff64 = data[29] & 0xFFFFFF; + if (!diff64) diff64 = 1; + double d = (double)work->pool->algorithm.diff_numerator / (double)diff64; + for (int m = shift; m < 29; m++) d *= 256.0; + for (int m = 29; m < shift; m++) d /= 256.0; + if (shift == 28) d *= 256.0; // testnet + return d; + } else { - uint8_t pow = work->data[72]; - int powdiff = (8 * (0x1d - 3)) - (8 * (pow - 3));; + shift = work->data[72]; + powdiff = (8 * (0x1d - 3)) - (8 * (shift - 3));; diff64 = be32toh(*((uint32_t *)(work->data + 72))) & 0x0000000000FFFFFF; numerator = work->pool->algorithm.diff_numerator << powdiff; } @@ -2245,15 +2259,15 @@ static bool gbt_decode(struct pool *pool, json_t *res_val) static bool getwork_decode(json_t *res_val, struct work *work) { size_t worklen = 128; - worklen = ((work->pool->algorithm.type == ALGO_CRE) ? sizeof(work->data) : worklen); + if (work->pool->algorithm.type == ALGO_CRE) worklen = 168; + else if (work->pool->algorithm.type == ALGO_DECRED) worklen = 192; if (unlikely(!jobj_binary(res_val, "data", work->data, worklen, true))) { if (opt_morenotices) applog(LOG_ERR, "%s: JSON inval data", isnull(get_pool_name(work->pool), "")); return false; } - // Neoscrypt doesn't calc midstate - if (work->pool->algorithm.type != ALGO_NEOSCRYPT) { + if (work->pool->algorithm.type == ALGO_CRE || work->pool->algorithm.type == ALGO_SCRYPT) { if (!jobj_binary(res_val, "midstate", work->midstate, sizeof(work->midstate), false)) { // Calculate it ourselves if (opt_morenotices) { @@ -2268,6 +2282,11 @@ static bool getwork_decode(json_t *res_val, struct work *work) applog(LOG_ERR, "%s: JSON inval target", isnull(get_pool_name(work->pool), "")); return false; } + if (work->pool->algorithm.type == ALGO_DECRED) { + // some random extradata to make it unique + ((uint32_t*)work->data)[36] = (rand()*4); + ((uint32_t*)work->data)[37] = (rand()*4) << 8 | work->thr_id; + } return true; } @@ -3004,7 +3023,9 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s cgpu = get_thr_cgpu(thr_id); - if (work->pool->algorithm.type == ALGO_CRE) { + if (work->pool->algorithm.type == ALGO_DECRED) { + endian_flip180(work->data, work->data); + } else if (work->pool->algorithm.type == ALGO_CRE) { endian_flip168(work->data, work->data); } else { endian_flip128(work->data, work->data); @@ -3014,6 +3035,13 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s int datasize = 128; if (work->pool->algorithm.type == ALGO_NEOSCRYPT) datasize = 80; else if (work->pool->algorithm.type == ALGO_CRE) datasize = 168; + else if (work->pool->algorithm.type == ALGO_DECRED) { + datasize = 192; + ((uint32_t*)work->data)[45] = 0x80000001UL; + ((uint32_t*)work->data)[46] = 0; + ((uint32_t*)work->data)[47] = 0x000005a0UL; + } + hexstr = bin2hex(work->data, datasize); /* build JSON-RPC request */ @@ -3670,15 +3698,44 @@ static inline bool can_roll(struct work *work) work->rolls < 7000 && !stale_work(work, false)); } +static uint32_t _get_work_time(struct work *work) +{ + uint32_t *data = (uint32_t*) work->data; + uint32_t work_ntime = data[17]; + if (work->pool && work->pool->algorithm.type == ALGO_DECRED) { + work_ntime = data[34]; + } + return work_ntime; +} + +static void _set_work_time(struct work *work, uint32_t ntime) +{ + uint32_t *data = (uint32_t*) work->data; + uint32_t *work_ntime = &data[17]; + if (work->pool && work->pool->algorithm.type == ALGO_DECRED) { + work_ntime = &data[34]; + } + (*work_ntime) = ntime; +} + static void roll_work(struct work *work) { - uint32_t *work_ntime; + uint32_t work_ntime; uint32_t ntime; - work_ntime = (uint32_t *)(work->data + 68); - ntime = be32toh(*work_ntime); + work_ntime = _get_work_time(work); + ntime = be32toh(work_ntime); ntime++; - *work_ntime = htobe32(ntime); + + if (work->pool->algorithm.type == ALGO_DECRED) { + uint32_t* data = (uint32_t*) work->data; + // dont mess with ntime, use extranonce + data[36]++; + data[37] = ((rand()*4) << 8) | work->thr_id; + } else { + _set_work_time(work, htobe32(ntime)); + } + local_work++; work->rolls++; work->blk.nonce = 0; @@ -3736,6 +3793,12 @@ static struct work *make_clone(struct work *work) { struct work *work_clone = copy_work(work); + if (work->pool->algorithm.type == ALGO_DECRED) { + // maybe not useful here + ((uint32_t*)work->data)[36] = (rand()*4); + ((uint32_t*)work->data)[37] = (rand()*4) << 8; + } + work_clone->clone = true; cgtime((struct timeval *)&(work_clone->tv_cloned)); work_clone->longpoll = false; @@ -3854,20 +3917,18 @@ static void _copy_work(struct work *work, const struct work *base_work, int noff /* If we are passed an noffset the binary work->data ntime and * the work->ntime hex string need to be adjusted. */ if (noffset) { - uint32_t *work_ntime = (uint32_t *)(work->data + 68); - uint32_t ntime = be32toh(*work_ntime); - + uint32_t work_ntime = _get_work_time(work); + uint32_t ntime = be32toh(work_ntime); ntime += noffset; - *work_ntime = htobe32(ntime); + _set_work_time(work, htobe32(ntime)); work->ntime = offset_ntime(base_work->ntime, noffset); } else work->ntime = strdup(base_work->ntime); } else if (noffset) { - uint32_t *work_ntime = (uint32_t *)(work->data + 68); - uint32_t ntime = be32toh(*work_ntime); - + uint32_t work_ntime = _get_work_time(work); + uint32_t ntime = be32toh(work_ntime); ntime += noffset; - *work_ntime = htobe32(ntime); + _set_work_time(work, htobe32(ntime)); } if (base_work->coinbase) work->coinbase = strdup(base_work->coinbase); @@ -5522,11 +5583,10 @@ static void *stratum_sthread(void *userdata) quit(1, "Failed to create stratum_q in stratum_sthread"); while (42) { - char noncehex[12], nonce2hex[20], s[1024]; + char noncehex[12], nonce2hex[33], s[1024] = { 0 }; struct stratum_share *sshare; uint32_t *hash32, nonce; - unsigned char nonce2[8]; - uint64_t *nonce2_64; + unsigned char nonce2[16]; struct work *work; bool submitted; @@ -5538,7 +5598,7 @@ static void *stratum_sthread(void *userdata) if (unlikely(!work)) quit(1, "Stratum q returned empty work"); - if (unlikely(work->nonce2_len > 8)) { + if ((pool->algorithm.type != ALGO_DECRED && unlikely(work->nonce2_len > 8)) || (pool->algorithm.type == ALGO_DECRED && unlikely(work->nonce2_len > 16))) { applog(LOG_ERR, "%s asking for inappropriately long nonce2 length %d", get_pool_name(pool), (int)work->nonce2_len); applog(LOG_ERR, "Not attempting to submit shares"); free_work(work); @@ -5561,12 +5621,19 @@ static void *stratum_sthread(void *userdata) nonce = htobe32(*((uint32_t *)(work->data + 76))); //*((uint32_t *)nonce2) = htole32(work->nonce2); } + else if (pool->algorithm.type == ALGO_DECRED) { + nonce = *((uint32_t *)(work->data + 140)); + } else { nonce = *((uint32_t *)(work->data + 76)); } __bin2hex(noncehex, (const unsigned char *)&nonce, 4); - *((uint64_t *)nonce2) = htole64(work->nonce2); + if (pool->algorithm.type == ALGO_DECRED) { + memcpy(nonce2, work->data + 144, work->nonce2_len); + } else { + *((uint64_t *)nonce2) = htole64(work->nonce2); + } __bin2hex(nonce2hex, nonce2, work->nonce2_len); memset(s, 0, 1024); @@ -6033,29 +6100,38 @@ void set_target_neoscrypt(unsigned char *target, double diff, const int thr_id) static void gen_stratum_work(struct pool *pool, struct work *work) { unsigned char merkle_root[32], merkle_sha[64]; + int datasize = 128; uint32_t *data32, *swap32; uint64_t nonce2le; int i, j; cg_wlock(&pool->data_lock); - /* Update coinbase. Always use an LE encoded nonce2 to fill in values - * from left to right and prevent overflow errors with small n2sizes */ - nonce2le = htole64(pool->nonce2); - memcpy(pool->coinbase + pool->nonce2_offset, &nonce2le, pool->n2size); - work->nonce2 = pool->nonce2++; - work->nonce2_len = pool->n2size; + if (pool->algorithm.type == ALGO_DECRED) { + datasize = 180; + work->nonce2 = pool->nonce2++; + work->nonce2_len = pool->n2size; + } else { + /* Update coinbase. Always use an LE encoded nonce2 to fill in values + * from left to right and prevent overflow errors with small n2sizes */ + nonce2le = htole64(pool->nonce2); + memcpy(pool->coinbase + pool->nonce2_offset, &nonce2le, pool->n2size); + work->nonce2 = pool->nonce2++; + work->nonce2_len = pool->n2size; + } /* Downgrade to a read lock to read off the pool variables */ cg_dwlock(&pool->data_lock); - /* Generate merkle root */ - pool->algorithm.gen_hash(pool->coinbase, pool->swork.cb_len, merkle_root); - memcpy(merkle_sha, merkle_root, 32); - for (i = 0; i < pool->swork.merkles; i++) { - memcpy(merkle_sha + 32, pool->swork.merkle_bin[i], 32); - gen_hash(merkle_sha, 64, merkle_root); + if (pool->algorithm.type != ALGO_DECRED) { + /* Generate merkle root */ + pool->algorithm.gen_hash(pool->coinbase, pool->swork.cb_len, merkle_root); memcpy(merkle_sha, merkle_root, 32); + for (i = 0; i < pool->swork.merkles; i++) { + memcpy(merkle_sha + 32, pool->swork.merkle_bin[i], 32); + gen_hash(merkle_sha, 64, merkle_root); + memcpy(merkle_sha, merkle_root, 32); + } } applog(LOG_DEBUG, "[THR%d] gen_stratum_work() - algorithm = %s", work->thr_id, pool->algorithm.name); @@ -6089,6 +6165,18 @@ static void gen_stratum_work(struct pool *pool, struct work *work) ((uint32_t *)work->data)[20] = 0x80000000; ((uint32_t *)work->data)[31] = 0x00000280; } + else if (pool->algorithm.type == ALGO_DECRED) { + memcpy(work->data, pool->header_bin, 4); // version + flip32(work->data + 4, pool->header_bin + 4); // prevhash + memcpy(work->data + 4 + 32, pool->coinbase, MIN((int)pool->swork.cb_len, 108)); + memcpy(work->data + 144, pool->nonce1bin, MIN(pool->n1_len, 36)); + ((uint32_t *)work->data)[36] = work->nonce2; +// ((uint32_t *)work->data)[36] = 2; + ((uint32_t *)work->data)[37] = ((rand() * 4) << 8) | work->thr_id; +// ((uint32_t *)work->data)[37] = 0x0000a400; + for (i = 39; i < 45; i++) + ((uint32_t *)work->data)[i] = 0; + } else { data32 = (uint32_t *)merkle_sha; swap32 = (uint32_t *)merkle_root; @@ -6112,14 +6200,16 @@ static void gen_stratum_work(struct pool *pool, struct work *work) if (opt_debug) { char *header, *merkle_hash; - header = bin2hex(work->data, 128); - merkle_hash = bin2hex((const unsigned char *)merkle_root, 32); - applog(LOG_DEBUG, "[THR%d] Generated stratum merkle %s", work->thr_id, merkle_hash); + header = bin2hex(work->data, datasize); + if (pool->algorithm.type != ALGO_DECRED) { + merkle_hash = bin2hex((const unsigned char *)merkle_root, 32); + applog(LOG_DEBUG, "[THR%d] Generated stratum merkle %s", work->thr_id, merkle_hash); + free(merkle_hash); + } applog(LOG_DEBUG, "[THR%d] Generated stratum header %s", work->thr_id, header); applog(LOG_DEBUG, "[THR%d] Work job_id %s nonce2 %"PRIu64" ntime %s", work->thr_id, work->job_id, work->nonce2, work->ntime); free(header); - free(merkle_hash); } // For Neoscrypt use set_target_neoscrypt() function @@ -7055,6 +7145,7 @@ static void rebuild_nonce(struct work *work, uint32_t nonce) { uint32_t nonce_pos = 76; if (work->pool->algorithm.type == ALGO_CRE) nonce_pos = 140; + else if (work->pool->algorithm.type == ALGO_DECRED) nonce_pos = 140; uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos); From 27c32cd5a93ee93adc1c0d5ede86b509f2acc3d1 Mon Sep 17 00:00:00 2001 From: elbandi Date: Fri, 1 Apr 2016 14:43:22 +0200 Subject: [PATCH 45/63] decred: optional --vote argument without arg or 0, dont vote (votebits set to 1) --- sgminer.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sgminer.c b/sgminer.c index a208f510c..3f59ce924 100644 --- a/sgminer.c +++ b/sgminer.c @@ -107,6 +107,7 @@ int nDevs; int opt_dynamic_interval = 7; int opt_g_threads = -1; bool opt_restart = true; +int opt_vote = 0; /***************************************** * Xn Algorithm options @@ -1803,6 +1804,9 @@ struct opt_table opt_config_table[] = { OPT_WITHOUT_ARG("--verbose|-v", opt_set_bool, &opt_verbose, "Log verbose output to stderr as well as status output"), + OPT_WITH_ARG("--vote", + set_int_1_to_65535, opt_show_intval, &opt_vote, + "Optional vote value for decred blocks"), OPT_WITH_ARG("--watchpool-refresh", set_int_1_to_65535, opt_show_intval, &opt_watchpool_refresh, "Interval in seconds to refresh pool status"), @@ -2283,6 +2287,8 @@ static bool getwork_decode(json_t *res_val, struct work *work) return false; } if (work->pool->algorithm.type == ALGO_DECRED) { + uint16_t vote = (uint16_t) (opt_vote << 1) | 1; + memcpy(&work->data[100], &vote, 2); // some random extradata to make it unique ((uint32_t*)work->data)[36] = (rand()*4); ((uint32_t*)work->data)[37] = (rand()*4) << 8 | work->thr_id; @@ -5642,9 +5648,16 @@ static void *stratum_sthread(void *userdata) sshare->id = swork_id++; mutex_unlock(&sshare_lock); - snprintf(s, sizeof(s), - "{\"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\": %d, \"method\": \"mining.submit\"}", - pool->rpc_user, work->job_id, nonce2hex, work->ntime, noncehex, sshare->id); + + if (pool->algorithm.type == ALGO_DECRED && opt_vote) { + snprintf(s, sizeof(s), + "{\"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%04x\"], \"id\": %d, \"method\": \"mining.submit\"}", + pool->rpc_user, work->job_id, nonce2hex, work->ntime, noncehex, (opt_vote << 1) | 1, sshare->id); + } else { + snprintf(s, sizeof(s), + "{\"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\": %d, \"method\": \"mining.submit\"}", + pool->rpc_user, work->job_id, nonce2hex, work->ntime, noncehex, sshare->id); + } applog(LOG_INFO, "Submitting share %08lx to %s", (long unsigned int)htole32(hash32[6]), get_pool_name(pool)); @@ -6166,10 +6179,12 @@ static void gen_stratum_work(struct pool *pool, struct work *work) ((uint32_t *)work->data)[31] = 0x00000280; } else if (pool->algorithm.type == ALGO_DECRED) { + uint16_t vote = (uint16_t) (opt_vote << 1) | 1; memcpy(work->data, pool->header_bin, 4); // version flip32(work->data + 4, pool->header_bin + 4); // prevhash memcpy(work->data + 4 + 32, pool->coinbase, MIN((int)pool->swork.cb_len, 108)); memcpy(work->data + 144, pool->nonce1bin, MIN(pool->n1_len, 36)); + memcpy(work->data + 100, &vote, 2); ((uint32_t *)work->data)[36] = work->nonce2; // ((uint32_t *)work->data)[36] = 2; ((uint32_t *)work->data)[37] = ((rand() * 4) << 8) | work->thr_id; From 4bf876dffb1d3aadcd76c7af97e37e77ff38ac64 Mon Sep 17 00:00:00 2001 From: elbandi Date: Tue, 26 Apr 2016 13:56:47 +0000 Subject: [PATCH 46/63] new stratum protocol --- sgminer.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/sgminer.c b/sgminer.c index 3f59ce924..a0e4ed1d8 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5604,7 +5604,7 @@ static void *stratum_sthread(void *userdata) if (unlikely(!work)) quit(1, "Stratum q returned empty work"); - if ((pool->algorithm.type != ALGO_DECRED && unlikely(work->nonce2_len > 8)) || (pool->algorithm.type == ALGO_DECRED && unlikely(work->nonce2_len > 16))) { + if (unlikely(work->nonce2_len > 8)) { applog(LOG_ERR, "%s asking for inappropriately long nonce2 length %d", get_pool_name(pool), (int)work->nonce2_len); applog(LOG_ERR, "Not attempting to submit shares"); free_work(work); @@ -5635,11 +5635,7 @@ static void *stratum_sthread(void *userdata) } __bin2hex(noncehex, (const unsigned char *)&nonce, 4); - if (pool->algorithm.type == ALGO_DECRED) { - memcpy(nonce2, work->data + 144, work->nonce2_len); - } else { - *((uint64_t *)nonce2) = htole64(work->nonce2); - } + *((uint64_t *)nonce2) = htole64(work->nonce2); __bin2hex(nonce2hex, nonce2, work->nonce2_len); memset(s, 0, 1024); @@ -6113,25 +6109,20 @@ void set_target_neoscrypt(unsigned char *target, double diff, const int thr_id) static void gen_stratum_work(struct pool *pool, struct work *work) { unsigned char merkle_root[32], merkle_sha[64]; - int datasize = 128; uint32_t *data32, *swap32; uint64_t nonce2le; int i, j; cg_wlock(&pool->data_lock); - if (pool->algorithm.type == ALGO_DECRED) { - datasize = 180; - work->nonce2 = pool->nonce2++; - work->nonce2_len = pool->n2size; - } else { + nonce2le = htole64(pool->nonce2); + if (pool->algorithm.type != ALGO_DECRED) { /* Update coinbase. Always use an LE encoded nonce2 to fill in values * from left to right and prevent overflow errors with small n2sizes */ - nonce2le = htole64(pool->nonce2); memcpy(pool->coinbase + pool->nonce2_offset, &nonce2le, pool->n2size); - work->nonce2 = pool->nonce2++; - work->nonce2_len = pool->n2size; } + work->nonce2 = pool->nonce2++; + work->nonce2_len = pool->n2size; /* Downgrade to a read lock to read off the pool variables */ cg_dwlock(&pool->data_lock); @@ -6180,17 +6171,15 @@ static void gen_stratum_work(struct pool *pool, struct work *work) } else if (pool->algorithm.type == ALGO_DECRED) { uint16_t vote = (uint16_t) (opt_vote << 1) | 1; + size_t nonce2_offset = MIN(pool->n1_len, 36); memcpy(work->data, pool->header_bin, 4); // version flip32(work->data + 4, pool->header_bin + 4); // prevhash memcpy(work->data + 4 + 32, pool->coinbase, MIN((int)pool->swork.cb_len, 108)); - memcpy(work->data + 144, pool->nonce1bin, MIN(pool->n1_len, 36)); memcpy(work->data + 100, &vote, 2); - ((uint32_t *)work->data)[36] = work->nonce2; -// ((uint32_t *)work->data)[36] = 2; - ((uint32_t *)work->data)[37] = ((rand() * 4) << 8) | work->thr_id; -// ((uint32_t *)work->data)[37] = 0x0000a400; - for (i = 39; i < 45; i++) + for (i = 36; i < 45; i++) ((uint32_t *)work->data)[i] = 0; + memcpy(work->data + 144, pool->nonce1bin, nonce2_offset); + memcpy(work->data + 144 + nonce2_offset, &nonce2le, pool->n2size); } else { data32 = (uint32_t *)merkle_sha; @@ -6214,6 +6203,8 @@ static void gen_stratum_work(struct pool *pool, struct work *work) if (opt_debug) { char *header, *merkle_hash; + int datasize = 128; + if (pool->algorithm.type == ALGO_DECRED) datasize = 180; header = bin2hex(work->data, datasize); if (pool->algorithm.type != ALGO_DECRED) { From 12fbc94288f56d9a0582c8cace4d3cedd7026968 Mon Sep 17 00:00:00 2001 From: elbandi Date: Mon, 23 May 2016 23:34:00 +0200 Subject: [PATCH 47/63] Version bump to 5.4.0 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 1b9dbf45e..1c8b6c09b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [5]) -m4_define([v_min], [3]) +m4_define([v_min], [4]) m4_define([v_mic], [0]) m4_define([v_rev], [nicehash]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## From 3a66fcf80a70ae3b28faf1eef5c8a428d3b9a8da Mon Sep 17 00:00:00 2001 From: elbandi Date: Mon, 18 Jul 2016 01:12:46 +0200 Subject: [PATCH 48/63] Fix blockfound diff --- sgminer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sgminer.c b/sgminer.c index a0e4ed1d8..83eddd838 100644 --- a/sgminer.c +++ b/sgminer.c @@ -7183,7 +7183,6 @@ bool test_nonce(struct work *work, uint32_t nonce) static void update_work_stats(struct thr_info *thr, struct work *work) { double test_diff = current_diff; - test_diff *= work->pool->algorithm.share_diff_multiplier; work->share_diff = share_diff(work); From 34a8140eb6a6b34ba0bebf68451ac7bf7c4b3b9f Mon Sep 17 00:00:00 2001 From: elbandi Date: Mon, 18 Jul 2016 17:48:45 +0000 Subject: [PATCH 49/63] Remove workpadding --- miner.h | 1 - sgminer.c | 5 +---- util.c | 23 +++++++++-------------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/miner.h b/miner.h index 3d6ce4296..cb28f7c2c 100644 --- a/miner.h +++ b/miner.h @@ -1240,7 +1240,6 @@ extern char current_hash[68]; extern double current_diff; extern double best_diff; extern struct timeval block_timeval; -extern char *workpadding; //config options table extern struct opt_table opt_config_table[]; diff --git a/sgminer.c b/sgminer.c index 83eddd838..6cb7944b9 100644 --- a/sgminer.c +++ b/sgminer.c @@ -1944,7 +1944,6 @@ void free_work(struct work *w) } static void calc_diff(struct work *work, double known); -char *workpadding = "000000800000000000000000000000000000000000000000000000000000000000000000000000000000000080020000"; #ifdef HAVE_LIBCURL /* Process transactions with GBT by storing the binary value of the first @@ -2139,9 +2138,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work) flip32(work->data + 4 + 32, merkleroot); free(merkleroot); - memset(work->data + 4 + 32 + 32 + 4 + 4, 0, 4); /* nonce */ - - hex2bin(work->data + 4 + 32 + 32 + 4 + 4 + 4, workpadding, 48); + memset(work->data + 4 + 32 + 32 + 4 + 4, 0, 4 + 48); /* nonce + padding */ if (opt_debug) { char *header = bin2hex(work->data, 128); diff --git a/util.c b/util.c index 3f76aa137..8bf6e22eb 100644 --- a/util.c +++ b/util.c @@ -1506,7 +1506,7 @@ static bool parse_notify(struct pool *pool, json_t *val) { char *job_id, *prev_hash, *coinbase1, *coinbase2, *bbversion, *nbit, *ntime, *header; - size_t cb1_len, cb2_len, alloc_len; + size_t cb1_len, cb2_len, alloc_len, header_len; unsigned char *cb1, *cb2; bool clean, ret = false; int merkles, i; @@ -1586,25 +1586,20 @@ static bool parse_notify(struct pool *pool, json_t *val) pool->nonce2 = 0; pool->merkle_offset = strlen(pool->swork.bbversion) + strlen(pool->swork.prev_hash); - pool->swork.header_len = pool->merkle_offset + - /* merkle_hash */ 32 + - strlen(pool->swork.ntime) + - strlen(pool->swork.nbit) + - /* nonce */ 8 + - /* workpadding */ 96; pool->merkle_offset /= 2; - pool->swork.header_len = pool->swork.header_len * 2 + 1; - align_len(&pool->swork.header_len); - header = (char *)alloca(pool->swork.header_len); - snprintf(header, pool->swork.header_len, - "%s%s%s%s%s%s%s", + header = (char *)alloca(257); + snprintf(header, 257, + "%s%s%s%s%s%s", pool->swork.bbversion, pool->swork.prev_hash, blank_merkel, pool->swork.ntime, pool->swork.nbit, - "00000000", /* nonce */ - workpadding); + "00000000" /* nonce */ + ); + header_len = strlen(header); + memset(header + header_len, '0', 256 - header_len); + header[256] = '\0'; if (unlikely(!hex2bin(pool->header_bin, header, 128))) { applog(LOG_WARNING, "%s: Failed to convert header to header_bin, got %s", __func__, header); pool_failed(pool); From 02e0fc4db5cdd2c80ea08d45b1be6ae4b81e0512 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 17 Jul 2016 22:36:27 +0000 Subject: [PATCH 50/63] Add lbry algo support --- Makefile.am | 1 + algorithm.c | 31 +- algorithm.h | 3 +- algorithm/lbry.c | 60 +++ algorithm/lbry.h | 8 + kernel/lbry.cl | 179 +++++++++ kernel/ripemd160.cl | 423 +++++++++++++++++++++ kernel/sha256.cl | 149 ++++++++ kernel/wolf-sha512.cl | 108 ++++++ miner.h | 10 + ocl.c | 1 + sgminer.c | 4 + sph/Makefile.am | 2 +- sph/ripemd.c | 833 ++++++++++++++++++++++++++++++++++++++++++ sph/sph_ripemd.h | 273 ++++++++++++++ util.c | 37 +- 16 files changed, 2105 insertions(+), 17 deletions(-) create mode 100644 algorithm/lbry.c create mode 100644 algorithm/lbry.h create mode 100644 kernel/lbry.cl create mode 100644 kernel/ripemd160.cl create mode 100644 kernel/sha256.cl create mode 100644 kernel/wolf-sha512.cl create mode 100644 sph/ripemd.c create mode 100644 sph/sph_ripemd.h diff --git a/Makefile.am b/Makefile.am index e8a3f3d48..137a5723e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -80,6 +80,7 @@ sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_ sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h sgminer_SOURCES += algorithm/decred.c algorithm/decred.h +sgminer_SOURCES += algorithm/lbry.c algorithm/lbry.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index ad41a494f..a4637ba0a 100644 --- a/algorithm.c +++ b/algorithm.c @@ -40,6 +40,7 @@ #include "algorithm/blake256.h" #include "algorithm/blakecoin.h" #include "algorithm/decred.h" +#include "algorithm/lbry.h" #include "compat.h" @@ -72,7 +73,8 @@ const char *algorithm_type_str[] = { "Blakecoin", "Blake", "Decred", - "Vanilla" + "Vanilla", + "Lbry" }; void sha256(const unsigned char *message, unsigned int len, unsigned char *digest) @@ -1007,6 +1009,31 @@ static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u return status; } +static cl_int queue_lbry_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->target + 24); + flip112(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 112, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + num = 0; + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + num = 0; + + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ @@ -1104,6 +1131,8 @@ static algorithm_settings_t algos[] = { { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, blake256_midstate, blake256_prepare_work, queue_blake_kernel, gen_hash, NULL }, { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL }, + { "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL }, + // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; diff --git a/algorithm.h b/algorithm.h index 5628d0067..c02da8a66 100644 --- a/algorithm.h +++ b/algorithm.h @@ -37,7 +37,8 @@ typedef enum { ALGO_BLAKECOIN, ALGO_BLAKE, ALGO_DECRED, - ALGO_VANILLA + ALGO_VANILLA, + ALGO_LBRY } algorithm_type_t; extern const char *algorithm_type_str[]; diff --git a/algorithm/lbry.c b/algorithm/lbry.c new file mode 100644 index 000000000..163c2e73f --- /dev/null +++ b/algorithm/lbry.c @@ -0,0 +1,60 @@ +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "sph/sph_sha2.h" +#include "sph/sph_ripemd.h" + +typedef struct { + sph_sha256_context sha256; + sph_sha512_context sha512; + sph_ripemd160_context ripemd; +} lbryhash_context_holder; + +void lbryhash(void* output, const void* input) +{ + uint32_t hashA[16], hashB[16], hashC[16]; + lbryhash_context_holder ctx; + + sph_sha256_init(&ctx.sha256); + sph_sha512_init(&ctx.sha512); + sph_ripemd160_init(&ctx.ripemd); + + sph_sha256 (&ctx.sha256, input, 112); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha256 (&ctx.sha256, hashA, 32); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha512 (&ctx.sha512, hashA, 32); + sph_sha512_close(&ctx.sha512, hashA); + + sph_ripemd160 (&ctx.ripemd, hashA, 32); + sph_ripemd160_close(&ctx.ripemd, hashB); + + sph_ripemd160 (&ctx.ripemd, hashA+8, 32); + sph_ripemd160_close(&ctx.ripemd, hashC); + + sph_sha256 (&ctx.sha256, hashB, 20); + sph_sha256 (&ctx.sha256, hashC, 20); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha256 (&ctx.sha256, hashA, 32); + sph_sha256_close(&ctx.sha256, hashA); + + memcpy(output, hashA, 32); +} + +void lbry_regenhash(struct work *work) +{ + uint32_t data[28]; + uint32_t *nonce = (uint32_t *)(work->data + 108); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 27); + data[27] = htobe32(*nonce); + lbryhash(ohash, data); +} diff --git a/algorithm/lbry.h b/algorithm/lbry.h new file mode 100644 index 000000000..c9bbdbdf1 --- /dev/null +++ b/algorithm/lbry.h @@ -0,0 +1,8 @@ +#ifndef LBRY_H +#define LBRY_H + +#include "miner.h" + +extern void lbry_regenhash(struct work *work); + +#endif diff --git a/kernel/lbry.cl b/kernel/lbry.cl new file mode 100644 index 000000000..fef4f90e7 --- /dev/null +++ b/kernel/lbry.cl @@ -0,0 +1,179 @@ +#include "sha256.cl" +#include "wolf-sha512.cl" +#include "ripemd160.cl" + +#define SWAP32(x) as_uint(as_uchar4(x).s3210) +#define SWAP64(x) as_ulong(as_uchar8(x).s76543210) + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uint *input, __global uint8 *ctx) +{ + // SHA256 takes 16 uints of input per block - we have 112 bytes to process + // 8 * 16 == 64, meaning two block transforms. + + uint SHA256Buf[16]; + uint gid = get_global_id(0); + + // Remember the last four is the nonce - so 108 bytes / 4 bytes per dword + #pragma unroll + for(int i = 0; i < 16; ++i) SHA256Buf[i] = SWAP32(input[i]); + + + + // SHA256 initialization constants + uint8 outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + + #pragma unroll + for(int i = 0; i < 3; ++i) + { + if(i == 1) + { + #pragma unroll + for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]); + SHA256Buf[11] = SWAP32(gid); + SHA256Buf[12] = 0x80000000; + SHA256Buf[13] = 0x00000000; + SHA256Buf[14] = 0x00000000; + SHA256Buf[15] = 0x00000380; + } + if(i == 2) + { + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + #pragma unroll + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + } + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + } + + /* + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + #pragma unroll + for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]); + SHA256Buf[11] = SWAP32(gid); + SHA256Buf[12] = 0x80000000; + SHA256Buf[13] = 0x00000000; + SHA256Buf[14] = 0x00000000; + SHA256Buf[15] = 0x00000380; + + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + */ + + + /* + + //outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + //outbuf = sha256_round(((uint16 *)SHA256Buf)[1], outbuf); + + // outbuf would normall be SWAP32'd here, but it'll need it again + // once we use it as input to the next SHA256, so it negates. + + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + */ + + + + outbuf.s0 = SWAP32(outbuf.s0); + outbuf.s1 = SWAP32(outbuf.s1); + outbuf.s2 = SWAP32(outbuf.s2); + outbuf.s3 = SWAP32(outbuf.s3); + outbuf.s4 = SWAP32(outbuf.s4); + outbuf.s5 = SWAP32(outbuf.s5); + outbuf.s6 = SWAP32(outbuf.s6); + outbuf.s7 = SWAP32(outbuf.s7); + + ctx[get_global_id(0) - get_global_offset(0)] = outbuf; +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global uint8 *ctx) +{ + ulong W[16] = { 0UL }, SHA512Out[8]; + uint SHA256Buf[16]; + uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)]; + + ((uint8 *)W)[0] = outbuf; + + for(int i = 0; i < 4; ++i) W[i] = SWAP64(W[i]); + + W[4] = 0x8000000000000000UL; + W[15] = 0x0000000000000100UL; + + for(int i = 0; i < 8; ++i) SHA512Out[i] = SHA512_INIT[i]; + + SHA512Block(W, SHA512Out); + + for(int i = 0; i < 8; ++i) SHA512Out[i] = SWAP64(SHA512Out[i]); + + uint RMD160_0[16] = { 0U }; + uint RMD160_1[16] = { 0U }; + uint RMD160_0_Out[5], RMD160_1_Out[5]; + + for(int i = 0; i < 4; ++i) + { + ((ulong *)RMD160_0)[i] = SHA512Out[i]; + ((ulong *)RMD160_1)[i] = SHA512Out[i + 4]; + } + + RMD160_0[8] = RMD160_1[8] = 0x00000080; + RMD160_0[14] = RMD160_1[14] = 0x00000100; + + for(int i = 0; i < 5; ++i) + { + RMD160_0_Out[i] = RMD160_IV[i]; + RMD160_1_Out[i] = RMD160_IV[i]; + } + + RIPEMD160_ROUND_BODY(RMD160_0, RMD160_0_Out); + RIPEMD160_ROUND_BODY(RMD160_1, RMD160_1_Out); + + for(int i = 0; i < 5; ++i) SHA256Buf[i] = SWAP32(RMD160_0_Out[i]); + for(int i = 5; i < 10; ++i) SHA256Buf[i] = SWAP32(RMD160_1_Out[i - 5]); + SHA256Buf[10] = 0x80000000; + + for(int i = 11; i < 15; ++i) SHA256Buf[i] = 0x00000000U; + + SHA256Buf[15] = 0x00000140; + + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + + ctx[get_global_id(0) - get_global_offset(0)] = outbuf; +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global uint8 *ctx, __global uint *output, ulong target) +{ + uint SHA256Buf[16] = { 0U }; + uint gid = get_global_id(0); + uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)]; + + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + + outbuf.s6 = SWAP32(outbuf.s6); + outbuf.s7 = SWAP32(outbuf.s7); + + if(as_ulong(outbuf.s67) <= target) + output[atomic_inc(output+0xFF)] = SWAP32(gid); +} diff --git a/kernel/ripemd160.cl b/kernel/ripemd160.cl new file mode 100644 index 000000000..b481b4232 --- /dev/null +++ b/kernel/ripemd160.cl @@ -0,0 +1,423 @@ +#define RIPEMD160_IN(x) W[x] + +// Round functions for RIPEMD-128 and RIPEMD-160. + +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +#define K11 0x00000000 +#define K12 0x5A827999 +#define K13 0x6ED9EBA1 +#define K14 0x8F1BBCDC +#define K15 0xA953FD4E + +#define K21 0x50A28BE6 +#define K22 0x5C4DD124 +#define K23 0x6D703EF3 +#define K24 0x7A6D76E9 +#define K25 0x00000000 + +const __constant uint RMD160_IV[5] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 }; + +#define RR(a, b, c, d, e, f, s, r, k) do { \ + const uint rrtmp = a + f(b, c, d) + r + k; \ + a = amd_bitalign(rrtmp, rrtmp, 32U - (uint)s) + e; \ + c = amd_bitalign(c, c, 32U - 10U); \ + } while (0) + +#define ROUND1(a, b, c, d, e, f, s, r, k) \ + RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) + +#define ROUND2(a, b, c, d, e, f, s, r, k) \ + RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) + +/* + * This macro defines the body for a RIPEMD-160 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 5 words which + * contains the input and output of the compression function. + */ + +//#define RIPEMD160_ROUND_BODY(in, h) do { \ + uint A1, B1, C1, D1, E1; \ + uint A2, B2, C2, D2, E2; \ + uint tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + E1 = E2 = (h)[4]; \ + \ + ROUND1(A, B, C, D, E, F1, 11, (in)[ 0], 1); \ + ROUND1(E, A, B, C, D, F1, 14, (in)[ 1], 1); \ + ROUND1(D, E, A, B, C, F1, 15, (in)[ 2], 1); \ + ROUND1(C, D, E, A, B, F1, 12, (in)[ 3], 1); \ + ROUND1(B, C, D, E, A, F1, 5, (in)[ 4], 1); \ + ROUND1(A, B, C, D, E, F1, 8, (in)[ 5], 1); \ + ROUND1(E, A, B, C, D, F1, 7, (in)[ 6], 1); \ + ROUND1(D, E, A, B, C, F1, 9, (in)[ 7], 1); \ + ROUND1(C, D, E, A, B, F1, 11, (in)[ 8], 1); \ + ROUND1(B, C, D, E, A, F1, 13, (in)[ 9], 1); \ + ROUND1(A, B, C, D, E, F1, 14, (in)[10], 1); \ + ROUND1(E, A, B, C, D, F1, 15, (in)[11], 1); \ + ROUND1(D, E, A, B, C, F1, 6, (in)[12], 1); \ + ROUND1(C, D, E, A, B, F1, 7, (in)[13], 1); \ + ROUND1(B, C, D, E, A, F1, 9, (in)[14], 1); \ + ROUND1(A, B, C, D, E, F1, 8, (in)[15], 1); \ + \ + ROUND1(E, A, B, C, D, F2, 7, (in)[ 7], 2); \ + ROUND1(D, E, A, B, C, F2, 6, (in)[ 4], 2); \ + ROUND1(C, D, E, A, B, F2, 8, (in)[13], 2); \ + ROUND1(B, C, D, E, A, F2, 13, (in)[ 1], 2); \ + ROUND1(A, B, C, D, E, F2, 11, (in)[10], 2); \ + ROUND1(E, A, B, C, D, F2, 9, (in)[ 6], 2); \ + ROUND1(D, E, A, B, C, F2, 7, (in)[15], 2); \ + ROUND1(C, D, E, A, B, F2, 15, (in)[ 3], 2); \ + ROUND1(B, C, D, E, A, F2, 7, (in)[12], 2); \ + ROUND1(A, B, C, D, E, F2, 12, (in)[ 0], 2); \ + ROUND1(E, A, B, C, D, F2, 15, (in)[ 9], 2); \ + ROUND1(D, E, A, B, C, F2, 9, (in)[ 5], 2); \ + ROUND1(C, D, E, A, B, F2, 11, (in)[ 2], 2); \ + ROUND1(B, C, D, E, A, F2, 7, (in)[14], 2); \ + ROUND1(A, B, C, D, E, F2, 13, (in)[11], 2); \ + ROUND1(E, A, B, C, D, F2, 12, (in)[ 8], 2); \ + \ + ROUND1(D, E, A, B, C, F3, 11, (in)[ 3], 3); \ + ROUND1(C, D, E, A, B, F3, 13, (in)[10], 3); \ + ROUND1(B, C, D, E, A, F3, 6, (in)[14], 3); \ + ROUND1(A, B, C, D, E, F3, 7, (in)[ 4], 3); \ + ROUND1(E, A, B, C, D, F3, 14, (in)[ 9], 3); \ + ROUND1(D, E, A, B, C, F3, 9, (in)[15], 3); \ + ROUND1(C, D, E, A, B, F3, 13, (in)[ 8], 3); \ + ROUND1(B, C, D, E, A, F3, 15, (in)[ 1], 3); \ + ROUND1(A, B, C, D, E, F3, 14, (in)[ 2], 3); \ + ROUND1(E, A, B, C, D, F3, 8, (in)[ 7], 3); \ + ROUND1(D, E, A, B, C, F3, 13, (in)[ 0], 3); \ + ROUND1(C, D, E, A, B, F3, 6, (in)[ 6], 3); \ + ROUND1(B, C, D, E, A, F3, 5, (in)[13], 3); \ + ROUND1(A, B, C, D, E, F3, 12, (in)[11], 3); \ + ROUND1(E, A, B, C, D, F3, 7, (in)[ 5], 3); \ + ROUND1(D, E, A, B, C, F3, 5, (in)[12], 3); \ + \ + ROUND1(C, D, E, A, B, F4, 11, (in)[ 1], 4); \ + ROUND1(B, C, D, E, A, F4, 12, (in)[ 9], 4); \ + ROUND1(A, B, C, D, E, F4, 14, (in)[11], 4); \ + ROUND1(E, A, B, C, D, F4, 15, (in)[10], 4); \ + ROUND1(D, E, A, B, C, F4, 14, (in)[ 0], 4); \ + ROUND1(C, D, E, A, B, F4, 15, (in)[ 8], 4); \ + ROUND1(B, C, D, E, A, F4, 9, (in)[12], 4); \ + ROUND1(A, B, C, D, E, F4, 8, (in)[ 4], 4); \ + ROUND1(E, A, B, C, D, F4, 9, (in)[13], 4); \ + ROUND1(D, E, A, B, C, F4, 14, (in)[ 3], 4); \ + ROUND1(C, D, E, A, B, F4, 5, (in)[ 7], 4); \ + ROUND1(B, C, D, E, A, F4, 6, (in)[15], 4); \ + ROUND1(A, B, C, D, E, F4, 8, (in)[14], 4); \ + ROUND1(E, A, B, C, D, F4, 6, (in)[ 5], 4); \ + ROUND1(D, E, A, B, C, F4, 5, (in)[ 6], 4); \ + ROUND1(C, D, E, A, B, F4, 12, (in)[ 2], 4); \ + \ + ROUND1(B, C, D, E, A, F5, 9, (in)[ 4], 5); \ + ROUND1(A, B, C, D, E, F5, 15, (in)[ 0], 5); \ + ROUND1(E, A, B, C, D, F5, 5, (in)[ 5], 5); \ + ROUND1(D, E, A, B, C, F5, 11, (in)[ 9], 5); \ + ROUND1(C, D, E, A, B, F5, 6, (in)[ 7], 5); \ + ROUND1(B, C, D, E, A, F5, 8, (in)[12], 5); \ + ROUND1(A, B, C, D, E, F5, 13, (in)[ 2], 5); \ + ROUND1(E, A, B, C, D, F5, 12, (in)[10], 5); \ + ROUND1(D, E, A, B, C, F5, 5, (in)[14], 5); \ + ROUND1(C, D, E, A, B, F5, 12, (in)[ 1], 5); \ + ROUND1(B, C, D, E, A, F5, 13, (in)[ 3], 5); \ + ROUND1(A, B, C, D, E, F5, 14, (in)[ 8], 5); \ + ROUND1(E, A, B, C, D, F5, 11, (in)[11], 5); \ + ROUND1(D, E, A, B, C, F5, 8, (in)[ 6], 5); \ + ROUND1(C, D, E, A, B, F5, 5, (in)[15], 5); \ + ROUND1(B, C, D, E, A, F5, 6, (in)[13], 5); \ + \ + ROUND2(A, B, C, D, E, F5, 8, (in)[ 5], 1); \ + ROUND2(E, A, B, C, D, F5, 9, (in)[14], 1); \ + ROUND2(D, E, A, B, C, F5, 9, (in)[ 7], 1); \ + ROUND2(C, D, E, A, B, F5, 11, (in)[ 0], 1); \ + ROUND2(B, C, D, E, A, F5, 13, (in)[ 9], 1); \ + ROUND2(A, B, C, D, E, F5, 15, (in)[ 2], 1); \ + ROUND2(E, A, B, C, D, F5, 15, (in)[11], 1); \ + ROUND2(D, E, A, B, C, F5, 5, (in)[ 4], 1); \ + ROUND2(C, D, E, A, B, F5, 7, (in)[13], 1); \ + ROUND2(B, C, D, E, A, F5, 7, (in)[ 6], 1); \ + ROUND2(A, B, C, D, E, F5, 8, (in)[15], 1); \ + ROUND2(E, A, B, C, D, F5, 11, (in)[ 8], 1); \ + ROUND2(D, E, A, B, C, F5, 14, (in)[ 1], 1); \ + ROUND2(C, D, E, A, B, F5, 14, (in)[10], 1); \ + ROUND2(B, C, D, E, A, F5, 12, (in)[ 3], 1); \ + ROUND2(A, B, C, D, E, F5, 6, (in)[12], 1); \ + \ + ROUND2(E, A, B, C, D, F4, 9, (in)[ 6], 2); \ + ROUND2(D, E, A, B, C, F4, 13, (in)[11], 2); \ + ROUND2(C, D, E, A, B, F4, 15, (in)[ 3], 2); \ + ROUND2(B, C, D, E, A, F4, 7, (in)[ 7], 2); \ + ROUND2(A, B, C, D, E, F4, 12, (in)[ 0], 2); \ + ROUND2(E, A, B, C, D, F4, 8, (in)[13], 2); \ + ROUND2(D, E, A, B, C, F4, 9, (in)[ 5], 2); \ + ROUND2(C, D, E, A, B, F4, 11, (in)[10], 2); \ + ROUND2(B, C, D, E, A, F4, 7, (in)[14], 2); \ + ROUND2(A, B, C, D, E, F4, 7, (in)[15], 2); \ + ROUND2(E, A, B, C, D, F4, 12, (in)[ 8], 2); \ + ROUND2(D, E, A, B, C, F4, 7, (in)[12], 2); \ + ROUND2(C, D, E, A, B, F4, 6, (in)[ 4], 2); \ + ROUND2(B, C, D, E, A, F4, 15, (in)[ 9], 2); \ + ROUND2(A, B, C, D, E, F4, 13, (in)[ 1], 2); \ + ROUND2(E, A, B, C, D, F4, 11, (in)[ 2], 2); \ + \ + ROUND2(D, E, A, B, C, F3, 9, (in)[15], 3); \ + ROUND2(C, D, E, A, B, F3, 7, (in)[ 5], 3); \ + ROUND2(B, C, D, E, A, F3, 15, (in)[ 1], 3); \ + ROUND2(A, B, C, D, E, F3, 11, (in)[ 3], 3); \ + ROUND2(E, A, B, C, D, F3, 8, (in)[ 7], 3); \ + ROUND2(D, E, A, B, C, F3, 6, (in)[14], 3); \ + ROUND2(C, D, E, A, B, F3, 6, (in)[ 6], 3); \ + ROUND2(B, C, D, E, A, F3, 14, (in)[ 9], 3); \ + ROUND2(A, B, C, D, E, F3, 12, (in)[11], 3); \ + ROUND2(E, A, B, C, D, F3, 13, (in)[ 8], 3); \ + ROUND2(D, E, A, B, C, F3, 5, (in)[12], 3); \ + ROUND2(C, D, E, A, B, F3, 14, (in)[ 2], 3); \ + ROUND2(B, C, D, E, A, F3, 13, (in)[10], 3); \ + ROUND2(A, B, C, D, E, F3, 13, (in)[ 0], 3); \ + ROUND2(E, A, B, C, D, F3, 7, (in)[ 4], 3); \ + ROUND2(D, E, A, B, C, F3, 5, (in)[13], 3); \ + \ + ROUND2(C, D, E, A, B, F2, 15, (in)[ 8], 4); \ + ROUND2(B, C, D, E, A, F2, 5, (in)[ 6], 4); \ + ROUND2(A, B, C, D, E, F2, 8, (in)[ 4], 4); \ + ROUND2(E, A, B, C, D, F2, 11, (in)[ 1], 4); \ + ROUND2(D, E, A, B, C, F2, 14, (in)[ 3], 4); \ + ROUND2(C, D, E, A, B, F2, 14, (in)[11], 4); \ + ROUND2(B, C, D, E, A, F2, 6, (in)[15], 4); \ + ROUND2(A, B, C, D, E, F2, 14, (in)[ 0], 4); \ + ROUND2(E, A, B, C, D, F2, 6, (in)[ 5], 4); \ + ROUND2(D, E, A, B, C, F2, 9, (in)[12], 4); \ + ROUND2(C, D, E, A, B, F2, 12, (in)[ 2], 4); \ + ROUND2(B, C, D, E, A, F2, 9, (in)[13], 4); \ + ROUND2(A, B, C, D, E, F2, 12, (in)[ 9], 4); \ + ROUND2(E, A, B, C, D, F2, 5, (in)[ 7], 4); \ + ROUND2(D, E, A, B, C, F2, 15, (in)[10], 4); \ + ROUND2(C, D, E, A, B, F2, 8, (in)[14], 4); \ + \ + ROUND2(B, C, D, E, A, F1, 8, (in)[12], 5); \ + ROUND2(A, B, C, D, E, F1, 5, (in)[15], 5); \ + ROUND2(E, A, B, C, D, F1, 12, (in)[10], 5); \ + ROUND2(D, E, A, B, C, F1, 9, (in)[ 4], 5); \ + ROUND2(C, D, E, A, B, F1, 12, (in)[ 1], 5); \ + ROUND2(B, C, D, E, A, F1, 5, (in)[ 5], 5); \ + ROUND2(A, B, C, D, E, F1, 14, (in)[ 8], 5); \ + ROUND2(E, A, B, C, D, F1, 6, (in)[ 7], 5); \ + ROUND2(D, E, A, B, C, F1, 8, (in)[ 6], 5); \ + ROUND2(C, D, E, A, B, F1, 13, (in)[ 2], 5); \ + ROUND2(B, C, D, E, A, F1, 6, (in)[13], 5); \ + ROUND2(A, B, C, D, E, F1, 5, (in)[14], 5); \ + ROUND2(E, A, B, C, D, F1, 15, (in)[ 0], 5); \ + ROUND2(D, E, A, B, C, F1, 13, (in)[ 3], 5); \ + ROUND2(C, D, E, A, B, F1, 11, (in)[ 9], 5); \ + ROUND2(B, C, D, E, A, F1, 11, (in)[11], 5); \ + \ + tmp = (h)[1] + C1 + D2; \ + (h)[1] = (h)[2] + D1 + E2; \ + (h)[2] = (h)[3] + E1 + A2; \ + (h)[3] = (h)[4] + A1 + B2; \ + (h)[4] = (h)[0] + B1 + C2; \ + (h)[0] = tmp; \ + } while (0) + +void RIPEMD160_ROUND_BODY(uint *in, uint *h) +{ + uint A1, B1, C1, D1, E1; + uint A2, B2, C2, D2, E2; + uint tmp; + + A1 = A2 = (h)[0]; + B1 = B2 = (h)[1]; + C1 = C2 = (h)[2]; + D1 = D2 = (h)[3]; + E1 = E2 = (h)[4]; + + ROUND1(A, B, C, D, E, F1, 11, (in)[ 0], 1); + ROUND1(E, A, B, C, D, F1, 14, (in)[ 1], 1); + ROUND1(D, E, A, B, C, F1, 15, (in)[ 2], 1); + ROUND1(C, D, E, A, B, F1, 12, (in)[ 3], 1); + ROUND1(B, C, D, E, A, F1, 5, (in)[ 4], 1); + ROUND1(A, B, C, D, E, F1, 8, (in)[ 5], 1); + ROUND1(E, A, B, C, D, F1, 7, (in)[ 6], 1); + ROUND1(D, E, A, B, C, F1, 9, (in)[ 7], 1); + ROUND1(C, D, E, A, B, F1, 11, (in)[ 8], 1); + ROUND1(B, C, D, E, A, F1, 13, (in)[ 9], 1); + ROUND1(A, B, C, D, E, F1, 14, (in)[10], 1); + ROUND1(E, A, B, C, D, F1, 15, (in)[11], 1); + ROUND1(D, E, A, B, C, F1, 6, (in)[12], 1); + ROUND1(C, D, E, A, B, F1, 7, (in)[13], 1); + ROUND1(B, C, D, E, A, F1, 9, (in)[14], 1); + ROUND1(A, B, C, D, E, F1, 8, (in)[15], 1); + + ROUND1(E, A, B, C, D, F2, 7, (in)[ 7], 2); + ROUND1(D, E, A, B, C, F2, 6, (in)[ 4], 2); + ROUND1(C, D, E, A, B, F2, 8, (in)[13], 2); + ROUND1(B, C, D, E, A, F2, 13, (in)[ 1], 2); + ROUND1(A, B, C, D, E, F2, 11, (in)[10], 2); + ROUND1(E, A, B, C, D, F2, 9, (in)[ 6], 2); + ROUND1(D, E, A, B, C, F2, 7, (in)[15], 2); + ROUND1(C, D, E, A, B, F2, 15, (in)[ 3], 2); + ROUND1(B, C, D, E, A, F2, 7, (in)[12], 2); + ROUND1(A, B, C, D, E, F2, 12, (in)[ 0], 2); + ROUND1(E, A, B, C, D, F2, 15, (in)[ 9], 2); + ROUND1(D, E, A, B, C, F2, 9, (in)[ 5], 2); + ROUND1(C, D, E, A, B, F2, 11, (in)[ 2], 2); + ROUND1(B, C, D, E, A, F2, 7, (in)[14], 2); + ROUND1(A, B, C, D, E, F2, 13, (in)[11], 2); + ROUND1(E, A, B, C, D, F2, 12, (in)[ 8], 2); + + ROUND1(D, E, A, B, C, F3, 11, (in)[ 3], 3); + ROUND1(C, D, E, A, B, F3, 13, (in)[10], 3); + ROUND1(B, C, D, E, A, F3, 6, (in)[14], 3); + ROUND1(A, B, C, D, E, F3, 7, (in)[ 4], 3); + ROUND1(E, A, B, C, D, F3, 14, (in)[ 9], 3); + ROUND1(D, E, A, B, C, F3, 9, (in)[15], 3); + ROUND1(C, D, E, A, B, F3, 13, (in)[ 8], 3); + ROUND1(B, C, D, E, A, F3, 15, (in)[ 1], 3); + ROUND1(A, B, C, D, E, F3, 14, (in)[ 2], 3); + ROUND1(E, A, B, C, D, F3, 8, (in)[ 7], 3); + ROUND1(D, E, A, B, C, F3, 13, (in)[ 0], 3); + ROUND1(C, D, E, A, B, F3, 6, (in)[ 6], 3); + ROUND1(B, C, D, E, A, F3, 5, (in)[13], 3); + ROUND1(A, B, C, D, E, F3, 12, (in)[11], 3); + ROUND1(E, A, B, C, D, F3, 7, (in)[ 5], 3); + ROUND1(D, E, A, B, C, F3, 5, (in)[12], 3); + + ROUND1(C, D, E, A, B, F4, 11, (in)[ 1], 4); + ROUND1(B, C, D, E, A, F4, 12, (in)[ 9], 4); + ROUND1(A, B, C, D, E, F4, 14, (in)[11], 4); + ROUND1(E, A, B, C, D, F4, 15, (in)[10], 4); + ROUND1(D, E, A, B, C, F4, 14, (in)[ 0], 4); + ROUND1(C, D, E, A, B, F4, 15, (in)[ 8], 4); + ROUND1(B, C, D, E, A, F4, 9, (in)[12], 4); + ROUND1(A, B, C, D, E, F4, 8, (in)[ 4], 4); + ROUND1(E, A, B, C, D, F4, 9, (in)[13], 4); + ROUND1(D, E, A, B, C, F4, 14, (in)[ 3], 4); + ROUND1(C, D, E, A, B, F4, 5, (in)[ 7], 4); + ROUND1(B, C, D, E, A, F4, 6, (in)[15], 4); + ROUND1(A, B, C, D, E, F4, 8, (in)[14], 4); + ROUND1(E, A, B, C, D, F4, 6, (in)[ 5], 4); + ROUND1(D, E, A, B, C, F4, 5, (in)[ 6], 4); + ROUND1(C, D, E, A, B, F4, 12, (in)[ 2], 4); + + ROUND1(B, C, D, E, A, F5, 9, (in)[ 4], 5); + ROUND1(A, B, C, D, E, F5, 15, (in)[ 0], 5); + ROUND1(E, A, B, C, D, F5, 5, (in)[ 5], 5); + ROUND1(D, E, A, B, C, F5, 11, (in)[ 9], 5); + ROUND1(C, D, E, A, B, F5, 6, (in)[ 7], 5); + ROUND1(B, C, D, E, A, F5, 8, (in)[12], 5); + ROUND1(A, B, C, D, E, F5, 13, (in)[ 2], 5); + ROUND1(E, A, B, C, D, F5, 12, (in)[10], 5); + ROUND1(D, E, A, B, C, F5, 5, (in)[14], 5); + ROUND1(C, D, E, A, B, F5, 12, (in)[ 1], 5); + ROUND1(B, C, D, E, A, F5, 13, (in)[ 3], 5); + ROUND1(A, B, C, D, E, F5, 14, (in)[ 8], 5); + ROUND1(E, A, B, C, D, F5, 11, (in)[11], 5); + ROUND1(D, E, A, B, C, F5, 8, (in)[ 6], 5); + ROUND1(C, D, E, A, B, F5, 5, (in)[15], 5); + ROUND1(B, C, D, E, A, F5, 6, (in)[13], 5); + + ROUND2(A, B, C, D, E, F5, 8, (in)[ 5], 1); + ROUND2(E, A, B, C, D, F5, 9, (in)[14], 1); + ROUND2(D, E, A, B, C, F5, 9, (in)[ 7], 1); + ROUND2(C, D, E, A, B, F5, 11, (in)[ 0], 1); + ROUND2(B, C, D, E, A, F5, 13, (in)[ 9], 1); + ROUND2(A, B, C, D, E, F5, 15, (in)[ 2], 1); + ROUND2(E, A, B, C, D, F5, 15, (in)[11], 1); + ROUND2(D, E, A, B, C, F5, 5, (in)[ 4], 1); + ROUND2(C, D, E, A, B, F5, 7, (in)[13], 1); + ROUND2(B, C, D, E, A, F5, 7, (in)[ 6], 1); + ROUND2(A, B, C, D, E, F5, 8, (in)[15], 1); + ROUND2(E, A, B, C, D, F5, 11, (in)[ 8], 1); + ROUND2(D, E, A, B, C, F5, 14, (in)[ 1], 1); + ROUND2(C, D, E, A, B, F5, 14, (in)[10], 1); + ROUND2(B, C, D, E, A, F5, 12, (in)[ 3], 1); + ROUND2(A, B, C, D, E, F5, 6, (in)[12], 1); + + ROUND2(E, A, B, C, D, F4, 9, (in)[ 6], 2); + ROUND2(D, E, A, B, C, F4, 13, (in)[11], 2); + ROUND2(C, D, E, A, B, F4, 15, (in)[ 3], 2); + ROUND2(B, C, D, E, A, F4, 7, (in)[ 7], 2); + ROUND2(A, B, C, D, E, F4, 12, (in)[ 0], 2); + ROUND2(E, A, B, C, D, F4, 8, (in)[13], 2); + ROUND2(D, E, A, B, C, F4, 9, (in)[ 5], 2); + ROUND2(C, D, E, A, B, F4, 11, (in)[10], 2); + ROUND2(B, C, D, E, A, F4, 7, (in)[14], 2); + ROUND2(A, B, C, D, E, F4, 7, (in)[15], 2); + ROUND2(E, A, B, C, D, F4, 12, (in)[ 8], 2); + ROUND2(D, E, A, B, C, F4, 7, (in)[12], 2); + ROUND2(C, D, E, A, B, F4, 6, (in)[ 4], 2); + ROUND2(B, C, D, E, A, F4, 15, (in)[ 9], 2); + ROUND2(A, B, C, D, E, F4, 13, (in)[ 1], 2); + ROUND2(E, A, B, C, D, F4, 11, (in)[ 2], 2); + + ROUND2(D, E, A, B, C, F3, 9, (in)[15], 3); + ROUND2(C, D, E, A, B, F3, 7, (in)[ 5], 3); + ROUND2(B, C, D, E, A, F3, 15, (in)[ 1], 3); + ROUND2(A, B, C, D, E, F3, 11, (in)[ 3], 3); + ROUND2(E, A, B, C, D, F3, 8, (in)[ 7], 3); + ROUND2(D, E, A, B, C, F3, 6, (in)[14], 3); + ROUND2(C, D, E, A, B, F3, 6, (in)[ 6], 3); + ROUND2(B, C, D, E, A, F3, 14, (in)[ 9], 3); + ROUND2(A, B, C, D, E, F3, 12, (in)[11], 3); + ROUND2(E, A, B, C, D, F3, 13, (in)[ 8], 3); + ROUND2(D, E, A, B, C, F3, 5, (in)[12], 3); + ROUND2(C, D, E, A, B, F3, 14, (in)[ 2], 3); + ROUND2(B, C, D, E, A, F3, 13, (in)[10], 3); + ROUND2(A, B, C, D, E, F3, 13, (in)[ 0], 3); + ROUND2(E, A, B, C, D, F3, 7, (in)[ 4], 3); + ROUND2(D, E, A, B, C, F3, 5, (in)[13], 3); + + ROUND2(C, D, E, A, B, F2, 15, (in)[ 8], 4); + ROUND2(B, C, D, E, A, F2, 5, (in)[ 6], 4); + ROUND2(A, B, C, D, E, F2, 8, (in)[ 4], 4); + ROUND2(E, A, B, C, D, F2, 11, (in)[ 1], 4); + ROUND2(D, E, A, B, C, F2, 14, (in)[ 3], 4); + ROUND2(C, D, E, A, B, F2, 14, (in)[11], 4); + ROUND2(B, C, D, E, A, F2, 6, (in)[15], 4); + ROUND2(A, B, C, D, E, F2, 14, (in)[ 0], 4); + ROUND2(E, A, B, C, D, F2, 6, (in)[ 5], 4); + ROUND2(D, E, A, B, C, F2, 9, (in)[12], 4); + ROUND2(C, D, E, A, B, F2, 12, (in)[ 2], 4); + ROUND2(B, C, D, E, A, F2, 9, (in)[13], 4); + ROUND2(A, B, C, D, E, F2, 12, (in)[ 9], 4); + ROUND2(E, A, B, C, D, F2, 5, (in)[ 7], 4); + ROUND2(D, E, A, B, C, F2, 15, (in)[10], 4); + ROUND2(C, D, E, A, B, F2, 8, (in)[14], 4); + + ROUND2(B, C, D, E, A, F1, 8, (in)[12], 5); + ROUND2(A, B, C, D, E, F1, 5, (in)[15], 5); + ROUND2(E, A, B, C, D, F1, 12, (in)[10], 5); + ROUND2(D, E, A, B, C, F1, 9, (in)[ 4], 5); + ROUND2(C, D, E, A, B, F1, 12, (in)[ 1], 5); + ROUND2(B, C, D, E, A, F1, 5, (in)[ 5], 5); + ROUND2(A, B, C, D, E, F1, 14, (in)[ 8], 5); + ROUND2(E, A, B, C, D, F1, 6, (in)[ 7], 5); + ROUND2(D, E, A, B, C, F1, 8, (in)[ 6], 5); + ROUND2(C, D, E, A, B, F1, 13, (in)[ 2], 5); + ROUND2(B, C, D, E, A, F1, 6, (in)[13], 5); + ROUND2(A, B, C, D, E, F1, 5, (in)[14], 5); + ROUND2(E, A, B, C, D, F1, 15, (in)[ 0], 5); + ROUND2(D, E, A, B, C, F1, 13, (in)[ 3], 5); + ROUND2(C, D, E, A, B, F1, 11, (in)[ 9], 5); + ROUND2(B, C, D, E, A, F1, 11, (in)[11], 5); + + tmp = (h)[1] + C1 + D2; + (h)[1] = (h)[2] + D1 + E2; + (h)[2] = (h)[3] + E1 + A2; + (h)[3] = (h)[4] + A1 + B2; + (h)[4] = (h)[0] + B1 + C2; + (h)[0] = tmp; +} diff --git a/kernel/sha256.cl b/kernel/sha256.cl new file mode 100644 index 000000000..354695aef --- /dev/null +++ b/kernel/sha256.cl @@ -0,0 +1,149 @@ +#define ROL32(x, y) rotate(x, y ## U) +#define SHR(x, y) (x >> y) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} + +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) + + +inline uint8 sha256_round(uint16 data, uint8 buf) +{ + uint temp1; + uint8 res; + uint W0 = (data.s0); + uint W1 = (data.s1); + uint W2 = (data.s2); + uint W3 = (data.s3); + uint W4 = (data.s4); + uint W5 = (data.s5); + uint W6 = (data.s6); + uint W7 = (data.s7); + uint W8 = (data.s8); + uint W9 = (data.s9); + uint W10 = (data.sA); + uint W11 = (data.sB); + uint W12 = (data.sC); + uint W13 = (data.sD); + uint W14 = (data.sE); + uint W15 = (data.sF); + + uint v0 = buf.s0; + uint v1 = buf.s1; + uint v2 = buf.s2; + uint v3 = buf.s3; + uint v4 = buf.s4; + uint v5 = buf.s5; + uint v6 = buf.s6; + uint v7 = buf.s7; + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); + + res.s0 = (v0 + buf.s0); + res.s1 = (v1 + buf.s1); + res.s2 = (v2 + buf.s2); + res.s3 = (v3 + buf.s3); + res.s4 = (v4 + buf.s4); + res.s5 = (v5 + buf.s5); + res.s6 = (v6 + buf.s6); + res.s7 = (v7 + buf.s7); + return (res); +} + diff --git a/kernel/wolf-sha512.cl b/kernel/wolf-sha512.cl new file mode 100644 index 000000000..62953b05d --- /dev/null +++ b/kernel/wolf-sha512.cl @@ -0,0 +1,108 @@ +static const __constant ulong K512[80] = +{ + 0x428A2F98D728AE22UL, 0x7137449123EF65CDUL, + 0xB5C0FBCFEC4D3B2FUL, 0xE9B5DBA58189DBBCUL, + 0x3956C25BF348B538UL, 0x59F111F1B605D019UL, + 0x923F82A4AF194F9BUL, 0xAB1C5ED5DA6D8118UL, + 0xD807AA98A3030242UL, 0x12835B0145706FBEUL, + 0x243185BE4EE4B28CUL, 0x550C7DC3D5FFB4E2UL, + 0x72BE5D74F27B896FUL, 0x80DEB1FE3B1696B1UL, + 0x9BDC06A725C71235UL, 0xC19BF174CF692694UL, + 0xE49B69C19EF14AD2UL, 0xEFBE4786384F25E3UL, + 0x0FC19DC68B8CD5B5UL, 0x240CA1CC77AC9C65UL, + 0x2DE92C6F592B0275UL, 0x4A7484AA6EA6E483UL, + 0x5CB0A9DCBD41FBD4UL, 0x76F988DA831153B5UL, + 0x983E5152EE66DFABUL, 0xA831C66D2DB43210UL, + 0xB00327C898FB213FUL, 0xBF597FC7BEEF0EE4UL, + 0xC6E00BF33DA88FC2UL, 0xD5A79147930AA725UL, + 0x06CA6351E003826FUL, 0x142929670A0E6E70UL, + 0x27B70A8546D22FFCUL, 0x2E1B21385C26C926UL, + 0x4D2C6DFC5AC42AEDUL, 0x53380D139D95B3DFUL, + 0x650A73548BAF63DEUL, 0x766A0ABB3C77B2A8UL, + 0x81C2C92E47EDAEE6UL, 0x92722C851482353BUL, + 0xA2BFE8A14CF10364UL, 0xA81A664BBC423001UL, + 0xC24B8B70D0F89791UL, 0xC76C51A30654BE30UL, + 0xD192E819D6EF5218UL, 0xD69906245565A910UL, + 0xF40E35855771202AUL, 0x106AA07032BBD1B8UL, + 0x19A4C116B8D2D0C8UL, 0x1E376C085141AB53UL, + 0x2748774CDF8EEB99UL, 0x34B0BCB5E19B48A8UL, + 0x391C0CB3C5C95A63UL, 0x4ED8AA4AE3418ACBUL, + 0x5B9CCA4F7763E373UL, 0x682E6FF3D6B2B8A3UL, + 0x748F82EE5DEFB2FCUL, 0x78A5636F43172F60UL, + 0x84C87814A1F0AB72UL, 0x8CC702081A6439ECUL, + 0x90BEFFFA23631E28UL, 0xA4506CEBDE82BDE9UL, + 0xBEF9A3F7B2C67915UL, 0xC67178F2E372532BUL, + 0xCA273ECEEA26619CUL, 0xD186B8C721C0C207UL, + 0xEADA7DD6CDE0EB1EUL, 0xF57D4F7FEE6ED178UL, + 0x06F067AA72176FBAUL, 0x0A637DC5A2C898A6UL, + 0x113F9804BEF90DAEUL, 0x1B710B35131C471BUL, + 0x28DB77F523047D84UL, 0x32CAAB7B40C72493UL, + 0x3C9EBE0A15C9BEBCUL, 0x431D67C49C100D4CUL, + 0x4CC5D4BECB3E42B6UL, 0x597F299CFC657E2AUL, + 0x5FCB6FAB3AD6FAECUL, 0x6C44198C4A475817UL +}; + +static const __constant ulong SHA512_INIT[8] = +{ + 0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, + 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, + 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, + 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL +}; + +#define ROTR64(x, y) rotate((x), 64UL - (y)) + +ulong FAST_ROTR64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, y))); } +ulong FAST_ROTR64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, (y - 32)))); } + +/* +#define BSG5_0(x) (FAST_ROTR64_LO(x, 28) ^ FAST_ROTR64_HI(x, 34) ^ FAST_ROTR64_HI(x, 39)) +#define BSG5_1(x) (FAST_ROTR64_LO(x, 14) ^ FAST_ROTR64_LO(x, 18) ^ ROTR64(x, 41)) +#define SSG5_0(x) (FAST_ROTR64_LO(x, 1) ^ FAST_ROTR64_LO(x, 8) ^ ((x) >> 7)) +#define SSG5_1(x) (FAST_ROTR64_LO(x, 19) ^ FAST_ROTR64_HI(x, 61) ^ ((x) >> 6)) +*/ + +#define BSG5_0(x) (FAST_ROTR64_LO(as_uint2(x), 28) ^ FAST_ROTR64_HI(as_uint2(x), 34) ^ FAST_ROTR64_HI(as_uint2(x), 39)) +#define BSG5_1(x) (FAST_ROTR64_LO(as_uint2(x), 14) ^ FAST_ROTR64_LO(as_uint2(x), 18) ^ FAST_ROTR64_HI(as_uint2(x), 41)) +#define SSG5_0(x) (FAST_ROTR64_LO(as_uint2(x), 1) ^ FAST_ROTR64_LO(as_uint2(x), 8) ^ ((x) >> 7)) +#define SSG5_1(x) (FAST_ROTR64_LO(as_uint2(x), 19) ^ FAST_ROTR64_HI(as_uint2(x), 61) ^ ((x) >> 6)) + +#define CH(X, Y, Z) bitselect(Z, Y, X) +#define MAJ(X, Y, Z) CH((X ^ Z), Y, Z) + +void SHA2_512_STEP2(const ulong *W, uint ord, ulong *r, int i) +{ + ulong T1; + int x = 8 - ord; + + ulong a = r[x & 7], b = r[(x + 1) & 7], c = r[(x + 2) & 7], d = r[(x + 3) & 7]; + ulong e = r[(x + 4) & 7], f = r[(x + 5) & 7], g = r[(x + 6) & 7], h = r[(x + 7) & 7]; + + T1 = h + BSG5_1(e) + CH(e, f, g) + W[i] + K512[i]; + r[(3 + x) & 7] = d + T1; + r[(7 + x) & 7] = T1 + BSG5_0(a) + MAJ(a, b, c); +} + +void SHA512Block(ulong *data, ulong *buf) +{ + ulong W[80], r[8]; + + for(int i = 0; i < 8; ++i) r[i] = buf[i]; + + for(int i = 0; i < 16; ++i) W[i] = data[i]; + + #pragma unroll 4 + for(int i = 16; i < 80; ++i) W[i] = SSG5_1(W[i - 2]) + W[i - 7] + SSG5_0(W[i - 15]) + W[i - 16]; + + #pragma unroll 1 + for(int i = 0; i < 80; i += 8) + { + #pragma unroll + for(int j = 0; j < 8; ++j) + { + SHA2_512_STEP2(W, j, r, i + j); + } + } + + for(int i = 0; i < 8; ++i) buf[i] += r[i]; +} diff --git a/miner.h b/miner.h index cb28f7c2c..af9cdf90f 100644 --- a/miner.h +++ b/miner.h @@ -716,6 +716,16 @@ static inline void flip80(void *dest_p, const void *src_p) dest[i] = swab32(src[i]); } +static inline void flip112(void *dest_p, const void *src_p) +{ + uint32_t *dest = (uint32_t *)dest_p; + const uint32_t *src = (uint32_t *)src_p; + int i; + + for (i = 0; i < 28; i++) + dest[i] = swab32(src[i]); +} + static inline void flip128(void *dest_p, const void *src_p) { uint32_t *dest = (uint32_t *)dest_p; diff --git a/ocl.c b/ocl.c index c34f191b3..1dd74ee46 100644 --- a/ocl.c +++ b/ocl.c @@ -761,6 +761,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg size_t readbufsize = 128; if (algorithm->type == ALGO_CRE) readbufsize = 168; else if (algorithm->type == ALGO_DECRED) readbufsize = 192; + else if (algorithm->type == ALGO_LBRY) readbufsize = 112; if (algorithm->rw_buffer_size < 0) { // calc buffer size for neoscrypt diff --git a/sgminer.c b/sgminer.c index 6cb7944b9..896f7abce 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5627,6 +5627,9 @@ static void *stratum_sthread(void *userdata) else if (pool->algorithm.type == ALGO_DECRED) { nonce = *((uint32_t *)(work->data + 140)); } + else if (pool->algorithm.type == ALGO_LBRY) { + nonce = *((uint32_t *)(work->data + 108)); + } else { nonce = *((uint32_t *)(work->data + 76)); } @@ -7149,6 +7152,7 @@ static void rebuild_nonce(struct work *work, uint32_t nonce) uint32_t nonce_pos = 76; if (work->pool->algorithm.type == ALGO_CRE) nonce_pos = 140; else if (work->pool->algorithm.type == ALGO_DECRED) nonce_pos = 140; + else if (work->pool->algorithm.type == ALGO_LBRY) nonce_pos = 108; uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos); diff --git a/sph/Makefile.am b/sph/Makefile.am index bc2f4b238..2c2a69502 100644 --- a/sph/Makefile.am +++ b/sph/Makefile.am @@ -1,3 +1,3 @@ noinst_LIBRARIES = libsph.a -libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c +libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c ripemd.c diff --git a/sph/ripemd.c b/sph/ripemd.c new file mode 100644 index 000000000..e242ac254 --- /dev/null +++ b/sph/ripemd.c @@ -0,0 +1,833 @@ +/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * RIPEMD-160 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_ripemd.h" + +/* + * Round functions for RIPEMD (original). + */ +#define F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define G(x, y, z) (((x) & (y)) | (((x) | (y)) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +static const sph_u32 oIV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), + SPH_C32(0x98BADCFE), SPH_C32(0x10325476) +}; + +/* + * Round functions for RIPEMD-128 and RIPEMD-160. + */ +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +static const sph_u32 IV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE), + SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0) +}; + +#define ROTL SPH_ROTL32 + +/* ===================================================================== */ +/* + * RIPEMD (original hash, deprecated). + */ + +#define FF1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) \ + + (X) + SPH_C32(0x5A827999)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x6ED9EBA1)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define FF2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) \ + + (X) + SPH_C32(0x50A28BE6)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x5C4DD124)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define RIPEMD_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + FF1(A1, B1, C1, D1, in( 0), 11); \ + FF1(D1, A1, B1, C1, in( 1), 14); \ + FF1(C1, D1, A1, B1, in( 2), 15); \ + FF1(B1, C1, D1, A1, in( 3), 12); \ + FF1(A1, B1, C1, D1, in( 4), 5); \ + FF1(D1, A1, B1, C1, in( 5), 8); \ + FF1(C1, D1, A1, B1, in( 6), 7); \ + FF1(B1, C1, D1, A1, in( 7), 9); \ + FF1(A1, B1, C1, D1, in( 8), 11); \ + FF1(D1, A1, B1, C1, in( 9), 13); \ + FF1(C1, D1, A1, B1, in(10), 14); \ + FF1(B1, C1, D1, A1, in(11), 15); \ + FF1(A1, B1, C1, D1, in(12), 6); \ + FF1(D1, A1, B1, C1, in(13), 7); \ + FF1(C1, D1, A1, B1, in(14), 9); \ + FF1(B1, C1, D1, A1, in(15), 8); \ + \ + GG1(A1, B1, C1, D1, in( 7), 7); \ + GG1(D1, A1, B1, C1, in( 4), 6); \ + GG1(C1, D1, A1, B1, in(13), 8); \ + GG1(B1, C1, D1, A1, in( 1), 13); \ + GG1(A1, B1, C1, D1, in(10), 11); \ + GG1(D1, A1, B1, C1, in( 6), 9); \ + GG1(C1, D1, A1, B1, in(15), 7); \ + GG1(B1, C1, D1, A1, in( 3), 15); \ + GG1(A1, B1, C1, D1, in(12), 7); \ + GG1(D1, A1, B1, C1, in( 0), 12); \ + GG1(C1, D1, A1, B1, in( 9), 15); \ + GG1(B1, C1, D1, A1, in( 5), 9); \ + GG1(A1, B1, C1, D1, in(14), 7); \ + GG1(D1, A1, B1, C1, in( 2), 11); \ + GG1(C1, D1, A1, B1, in(11), 13); \ + GG1(B1, C1, D1, A1, in( 8), 12); \ + \ + HH1(A1, B1, C1, D1, in( 3), 11); \ + HH1(D1, A1, B1, C1, in(10), 13); \ + HH1(C1, D1, A1, B1, in( 2), 14); \ + HH1(B1, C1, D1, A1, in( 4), 7); \ + HH1(A1, B1, C1, D1, in( 9), 14); \ + HH1(D1, A1, B1, C1, in(15), 9); \ + HH1(C1, D1, A1, B1, in( 8), 13); \ + HH1(B1, C1, D1, A1, in( 1), 15); \ + HH1(A1, B1, C1, D1, in(14), 6); \ + HH1(D1, A1, B1, C1, in( 7), 8); \ + HH1(C1, D1, A1, B1, in( 0), 13); \ + HH1(B1, C1, D1, A1, in( 6), 6); \ + HH1(A1, B1, C1, D1, in(11), 12); \ + HH1(D1, A1, B1, C1, in(13), 5); \ + HH1(C1, D1, A1, B1, in( 5), 7); \ + HH1(B1, C1, D1, A1, in(12), 5); \ + \ + FF2(A2, B2, C2, D2, in( 0), 11); \ + FF2(D2, A2, B2, C2, in( 1), 14); \ + FF2(C2, D2, A2, B2, in( 2), 15); \ + FF2(B2, C2, D2, A2, in( 3), 12); \ + FF2(A2, B2, C2, D2, in( 4), 5); \ + FF2(D2, A2, B2, C2, in( 5), 8); \ + FF2(C2, D2, A2, B2, in( 6), 7); \ + FF2(B2, C2, D2, A2, in( 7), 9); \ + FF2(A2, B2, C2, D2, in( 8), 11); \ + FF2(D2, A2, B2, C2, in( 9), 13); \ + FF2(C2, D2, A2, B2, in(10), 14); \ + FF2(B2, C2, D2, A2, in(11), 15); \ + FF2(A2, B2, C2, D2, in(12), 6); \ + FF2(D2, A2, B2, C2, in(13), 7); \ + FF2(C2, D2, A2, B2, in(14), 9); \ + FF2(B2, C2, D2, A2, in(15), 8); \ + \ + GG2(A2, B2, C2, D2, in( 7), 7); \ + GG2(D2, A2, B2, C2, in( 4), 6); \ + GG2(C2, D2, A2, B2, in(13), 8); \ + GG2(B2, C2, D2, A2, in( 1), 13); \ + GG2(A2, B2, C2, D2, in(10), 11); \ + GG2(D2, A2, B2, C2, in( 6), 9); \ + GG2(C2, D2, A2, B2, in(15), 7); \ + GG2(B2, C2, D2, A2, in( 3), 15); \ + GG2(A2, B2, C2, D2, in(12), 7); \ + GG2(D2, A2, B2, C2, in( 0), 12); \ + GG2(C2, D2, A2, B2, in( 9), 15); \ + GG2(B2, C2, D2, A2, in( 5), 9); \ + GG2(A2, B2, C2, D2, in(14), 7); \ + GG2(D2, A2, B2, C2, in( 2), 11); \ + GG2(C2, D2, A2, B2, in(11), 13); \ + GG2(B2, C2, D2, A2, in( 8), 12); \ + \ + HH2(A2, B2, C2, D2, in( 3), 11); \ + HH2(D2, A2, B2, C2, in(10), 13); \ + HH2(C2, D2, A2, B2, in( 2), 14); \ + HH2(B2, C2, D2, A2, in( 4), 7); \ + HH2(A2, B2, C2, D2, in( 9), 14); \ + HH2(D2, A2, B2, C2, in(15), 9); \ + HH2(C2, D2, A2, B2, in( 8), 13); \ + HH2(B2, C2, D2, A2, in( 1), 15); \ + HH2(A2, B2, C2, D2, in(14), 6); \ + HH2(D2, A2, B2, C2, in( 7), 8); \ + HH2(C2, D2, A2, B2, in( 0), 13); \ + HH2(B2, C2, D2, A2, in( 6), 6); \ + HH2(A2, B2, C2, D2, in(11), 12); \ + HH2(D2, A2, B2, C2, in(13), 5); \ + HH2(C2, D2, A2, B2, in( 5), 7); \ + HH2(B2, C2, D2, A2, in(12), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD. The data must be aligned for 32-bit access. + */ +static void +ripemd_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD_IN(x) X_var[x] + +#endif + RIPEMD_ROUND_BODY(RIPEMD_IN, r); +#undef RIPEMD_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd_init(void *cc) +{ + sph_ripemd_context *sc; + + sc = cc; + memcpy(sc->val, oIV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd_round +#define HASH ripemd +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd_close(void *cc, void *dst) +{ + ripemd_close(cc, dst, 4); + sph_ripemd_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD_IN(x) msg[x] + RIPEMD_ROUND_BODY(RIPEMD_IN, val); +#undef RIPEMD_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-128. + */ + +/* + * Round constants for RIPEMD-128. + */ +#define sK11 SPH_C32(0x00000000) +#define sK12 SPH_C32(0x5A827999) +#define sK13 SPH_C32(0x6ED9EBA1) +#define sK14 SPH_C32(0x8F1BBCDC) + +#define sK21 SPH_C32(0x50A28BE6) +#define sK22 SPH_C32(0x5C4DD124) +#define sK23 SPH_C32(0x6D703EF3) +#define sK24 SPH_C32(0x00000000) + +#define sRR(a, b, c, d, f, s, r, k) do { \ + a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \ + } while (0) + +#define sROUND1(a, b, c, d, f, s, r, k) \ + sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k) + +#define sROUND2(a, b, c, d, f, s, r, k) \ + sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k) + +/* + * This macro defines the body for a RIPEMD-128 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 4 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD128_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + sROUND1(A, B, C, D, F1, 11, in( 0), 1); \ + sROUND1(D, A, B, C, F1, 14, in( 1), 1); \ + sROUND1(C, D, A, B, F1, 15, in( 2), 1); \ + sROUND1(B, C, D, A, F1, 12, in( 3), 1); \ + sROUND1(A, B, C, D, F1, 5, in( 4), 1); \ + sROUND1(D, A, B, C, F1, 8, in( 5), 1); \ + sROUND1(C, D, A, B, F1, 7, in( 6), 1); \ + sROUND1(B, C, D, A, F1, 9, in( 7), 1); \ + sROUND1(A, B, C, D, F1, 11, in( 8), 1); \ + sROUND1(D, A, B, C, F1, 13, in( 9), 1); \ + sROUND1(C, D, A, B, F1, 14, in(10), 1); \ + sROUND1(B, C, D, A, F1, 15, in(11), 1); \ + sROUND1(A, B, C, D, F1, 6, in(12), 1); \ + sROUND1(D, A, B, C, F1, 7, in(13), 1); \ + sROUND1(C, D, A, B, F1, 9, in(14), 1); \ + sROUND1(B, C, D, A, F1, 8, in(15), 1); \ + \ + sROUND1(A, B, C, D, F2, 7, in( 7), 2); \ + sROUND1(D, A, B, C, F2, 6, in( 4), 2); \ + sROUND1(C, D, A, B, F2, 8, in(13), 2); \ + sROUND1(B, C, D, A, F2, 13, in( 1), 2); \ + sROUND1(A, B, C, D, F2, 11, in(10), 2); \ + sROUND1(D, A, B, C, F2, 9, in( 6), 2); \ + sROUND1(C, D, A, B, F2, 7, in(15), 2); \ + sROUND1(B, C, D, A, F2, 15, in( 3), 2); \ + sROUND1(A, B, C, D, F2, 7, in(12), 2); \ + sROUND1(D, A, B, C, F2, 12, in( 0), 2); \ + sROUND1(C, D, A, B, F2, 15, in( 9), 2); \ + sROUND1(B, C, D, A, F2, 9, in( 5), 2); \ + sROUND1(A, B, C, D, F2, 11, in( 2), 2); \ + sROUND1(D, A, B, C, F2, 7, in(14), 2); \ + sROUND1(C, D, A, B, F2, 13, in(11), 2); \ + sROUND1(B, C, D, A, F2, 12, in( 8), 2); \ + \ + sROUND1(A, B, C, D, F3, 11, in( 3), 3); \ + sROUND1(D, A, B, C, F3, 13, in(10), 3); \ + sROUND1(C, D, A, B, F3, 6, in(14), 3); \ + sROUND1(B, C, D, A, F3, 7, in( 4), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 9), 3); \ + sROUND1(D, A, B, C, F3, 9, in(15), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 8), 3); \ + sROUND1(B, C, D, A, F3, 15, in( 1), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 2), 3); \ + sROUND1(D, A, B, C, F3, 8, in( 7), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 0), 3); \ + sROUND1(B, C, D, A, F3, 6, in( 6), 3); \ + sROUND1(A, B, C, D, F3, 5, in(13), 3); \ + sROUND1(D, A, B, C, F3, 12, in(11), 3); \ + sROUND1(C, D, A, B, F3, 7, in( 5), 3); \ + sROUND1(B, C, D, A, F3, 5, in(12), 3); \ + \ + sROUND1(A, B, C, D, F4, 11, in( 1), 4); \ + sROUND1(D, A, B, C, F4, 12, in( 9), 4); \ + sROUND1(C, D, A, B, F4, 14, in(11), 4); \ + sROUND1(B, C, D, A, F4, 15, in(10), 4); \ + sROUND1(A, B, C, D, F4, 14, in( 0), 4); \ + sROUND1(D, A, B, C, F4, 15, in( 8), 4); \ + sROUND1(C, D, A, B, F4, 9, in(12), 4); \ + sROUND1(B, C, D, A, F4, 8, in( 4), 4); \ + sROUND1(A, B, C, D, F4, 9, in(13), 4); \ + sROUND1(D, A, B, C, F4, 14, in( 3), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 7), 4); \ + sROUND1(B, C, D, A, F4, 6, in(15), 4); \ + sROUND1(A, B, C, D, F4, 8, in(14), 4); \ + sROUND1(D, A, B, C, F4, 6, in( 5), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 6), 4); \ + sROUND1(B, C, D, A, F4, 12, in( 2), 4); \ + \ + sROUND2(A, B, C, D, F4, 8, in( 5), 1); \ + sROUND2(D, A, B, C, F4, 9, in(14), 1); \ + sROUND2(C, D, A, B, F4, 9, in( 7), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 0), 1); \ + sROUND2(A, B, C, D, F4, 13, in( 9), 1); \ + sROUND2(D, A, B, C, F4, 15, in( 2), 1); \ + sROUND2(C, D, A, B, F4, 15, in(11), 1); \ + sROUND2(B, C, D, A, F4, 5, in( 4), 1); \ + sROUND2(A, B, C, D, F4, 7, in(13), 1); \ + sROUND2(D, A, B, C, F4, 7, in( 6), 1); \ + sROUND2(C, D, A, B, F4, 8, in(15), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 8), 1); \ + sROUND2(A, B, C, D, F4, 14, in( 1), 1); \ + sROUND2(D, A, B, C, F4, 14, in(10), 1); \ + sROUND2(C, D, A, B, F4, 12, in( 3), 1); \ + sROUND2(B, C, D, A, F4, 6, in(12), 1); \ + \ + sROUND2(A, B, C, D, F3, 9, in( 6), 2); \ + sROUND2(D, A, B, C, F3, 13, in(11), 2); \ + sROUND2(C, D, A, B, F3, 15, in( 3), 2); \ + sROUND2(B, C, D, A, F3, 7, in( 7), 2); \ + sROUND2(A, B, C, D, F3, 12, in( 0), 2); \ + sROUND2(D, A, B, C, F3, 8, in(13), 2); \ + sROUND2(C, D, A, B, F3, 9, in( 5), 2); \ + sROUND2(B, C, D, A, F3, 11, in(10), 2); \ + sROUND2(A, B, C, D, F3, 7, in(14), 2); \ + sROUND2(D, A, B, C, F3, 7, in(15), 2); \ + sROUND2(C, D, A, B, F3, 12, in( 8), 2); \ + sROUND2(B, C, D, A, F3, 7, in(12), 2); \ + sROUND2(A, B, C, D, F3, 6, in( 4), 2); \ + sROUND2(D, A, B, C, F3, 15, in( 9), 2); \ + sROUND2(C, D, A, B, F3, 13, in( 1), 2); \ + sROUND2(B, C, D, A, F3, 11, in( 2), 2); \ + \ + sROUND2(A, B, C, D, F2, 9, in(15), 3); \ + sROUND2(D, A, B, C, F2, 7, in( 5), 3); \ + sROUND2(C, D, A, B, F2, 15, in( 1), 3); \ + sROUND2(B, C, D, A, F2, 11, in( 3), 3); \ + sROUND2(A, B, C, D, F2, 8, in( 7), 3); \ + sROUND2(D, A, B, C, F2, 6, in(14), 3); \ + sROUND2(C, D, A, B, F2, 6, in( 6), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 9), 3); \ + sROUND2(A, B, C, D, F2, 12, in(11), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 8), 3); \ + sROUND2(C, D, A, B, F2, 5, in(12), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 2), 3); \ + sROUND2(A, B, C, D, F2, 13, in(10), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 0), 3); \ + sROUND2(C, D, A, B, F2, 7, in( 4), 3); \ + sROUND2(B, C, D, A, F2, 5, in(13), 3); \ + \ + sROUND2(A, B, C, D, F1, 15, in( 8), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 6), 4); \ + sROUND2(C, D, A, B, F1, 8, in( 4), 4); \ + sROUND2(B, C, D, A, F1, 11, in( 1), 4); \ + sROUND2(A, B, C, D, F1, 14, in( 3), 4); \ + sROUND2(D, A, B, C, F1, 14, in(11), 4); \ + sROUND2(C, D, A, B, F1, 6, in(15), 4); \ + sROUND2(B, C, D, A, F1, 14, in( 0), 4); \ + sROUND2(A, B, C, D, F1, 6, in( 5), 4); \ + sROUND2(D, A, B, C, F1, 9, in(12), 4); \ + sROUND2(C, D, A, B, F1, 12, in( 2), 4); \ + sROUND2(B, C, D, A, F1, 9, in(13), 4); \ + sROUND2(A, B, C, D, F1, 12, in( 9), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 7), 4); \ + sROUND2(C, D, A, B, F1, 15, in(10), 4); \ + sROUND2(B, C, D, A, F1, 8, in(14), 4); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-128. The data must be aligned for 32-bit access. + */ +static void +ripemd128_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD128_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD128_IN(x) X_var[x] + +#endif + RIPEMD128_ROUND_BODY(RIPEMD128_IN, r); +#undef RIPEMD128_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_init(void *cc) +{ + sph_ripemd128_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd128_round +#define HASH ripemd128 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd128_close(void *cc, void *dst) +{ + ripemd128_close(cc, dst, 4); + sph_ripemd128_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD128_IN(x) msg[x] + RIPEMD128_ROUND_BODY(RIPEMD128_IN, val); +#undef RIPEMD128_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-160. + */ + +/* + * Round constants for RIPEMD-160. + */ +#define K11 SPH_C32(0x00000000) +#define K12 SPH_C32(0x5A827999) +#define K13 SPH_C32(0x6ED9EBA1) +#define K14 SPH_C32(0x8F1BBCDC) +#define K15 SPH_C32(0xA953FD4E) + +#define K21 SPH_C32(0x50A28BE6) +#define K22 SPH_C32(0x5C4DD124) +#define K23 SPH_C32(0x6D703EF3) +#define K24 SPH_C32(0x7A6D76E9) +#define K25 SPH_C32(0x00000000) + +#define RR(a, b, c, d, e, f, s, r, k) do { \ + a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \ + c = ROTL(c, 10); \ + } while (0) + +#define ROUND1(a, b, c, d, e, f, s, r, k) \ + RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) + +#define ROUND2(a, b, c, d, e, f, s, r, k) \ + RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) + +/* + * This macro defines the body for a RIPEMD-160 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 5 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD160_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1, E1; \ + sph_u32 A2, B2, C2, D2, E2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + E1 = E2 = (h)[4]; \ + \ + ROUND1(A, B, C, D, E, F1, 11, in( 0), 1); \ + ROUND1(E, A, B, C, D, F1, 14, in( 1), 1); \ + ROUND1(D, E, A, B, C, F1, 15, in( 2), 1); \ + ROUND1(C, D, E, A, B, F1, 12, in( 3), 1); \ + ROUND1(B, C, D, E, A, F1, 5, in( 4), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in( 5), 1); \ + ROUND1(E, A, B, C, D, F1, 7, in( 6), 1); \ + ROUND1(D, E, A, B, C, F1, 9, in( 7), 1); \ + ROUND1(C, D, E, A, B, F1, 11, in( 8), 1); \ + ROUND1(B, C, D, E, A, F1, 13, in( 9), 1); \ + ROUND1(A, B, C, D, E, F1, 14, in(10), 1); \ + ROUND1(E, A, B, C, D, F1, 15, in(11), 1); \ + ROUND1(D, E, A, B, C, F1, 6, in(12), 1); \ + ROUND1(C, D, E, A, B, F1, 7, in(13), 1); \ + ROUND1(B, C, D, E, A, F1, 9, in(14), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in(15), 1); \ + \ + ROUND1(E, A, B, C, D, F2, 7, in( 7), 2); \ + ROUND1(D, E, A, B, C, F2, 6, in( 4), 2); \ + ROUND1(C, D, E, A, B, F2, 8, in(13), 2); \ + ROUND1(B, C, D, E, A, F2, 13, in( 1), 2); \ + ROUND1(A, B, C, D, E, F2, 11, in(10), 2); \ + ROUND1(E, A, B, C, D, F2, 9, in( 6), 2); \ + ROUND1(D, E, A, B, C, F2, 7, in(15), 2); \ + ROUND1(C, D, E, A, B, F2, 15, in( 3), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(12), 2); \ + ROUND1(A, B, C, D, E, F2, 12, in( 0), 2); \ + ROUND1(E, A, B, C, D, F2, 15, in( 9), 2); \ + ROUND1(D, E, A, B, C, F2, 9, in( 5), 2); \ + ROUND1(C, D, E, A, B, F2, 11, in( 2), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(14), 2); \ + ROUND1(A, B, C, D, E, F2, 13, in(11), 2); \ + ROUND1(E, A, B, C, D, F2, 12, in( 8), 2); \ + \ + ROUND1(D, E, A, B, C, F3, 11, in( 3), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in(10), 3); \ + ROUND1(B, C, D, E, A, F3, 6, in(14), 3); \ + ROUND1(A, B, C, D, E, F3, 7, in( 4), 3); \ + ROUND1(E, A, B, C, D, F3, 14, in( 9), 3); \ + ROUND1(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in( 8), 3); \ + ROUND1(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND1(A, B, C, D, E, F3, 14, in( 2), 3); \ + ROUND1(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND1(D, E, A, B, C, F3, 13, in( 0), 3); \ + ROUND1(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND1(B, C, D, E, A, F3, 5, in(13), 3); \ + ROUND1(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND1(E, A, B, C, D, F3, 7, in( 5), 3); \ + ROUND1(D, E, A, B, C, F3, 5, in(12), 3); \ + \ + ROUND1(C, D, E, A, B, F4, 11, in( 1), 4); \ + ROUND1(B, C, D, E, A, F4, 12, in( 9), 4); \ + ROUND1(A, B, C, D, E, F4, 14, in(11), 4); \ + ROUND1(E, A, B, C, D, F4, 15, in(10), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 0), 4); \ + ROUND1(C, D, E, A, B, F4, 15, in( 8), 4); \ + ROUND1(B, C, D, E, A, F4, 9, in(12), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in( 4), 4); \ + ROUND1(E, A, B, C, D, F4, 9, in(13), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 3), 4); \ + ROUND1(C, D, E, A, B, F4, 5, in( 7), 4); \ + ROUND1(B, C, D, E, A, F4, 6, in(15), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in(14), 4); \ + ROUND1(E, A, B, C, D, F4, 6, in( 5), 4); \ + ROUND1(D, E, A, B, C, F4, 5, in( 6), 4); \ + ROUND1(C, D, E, A, B, F4, 12, in( 2), 4); \ + \ + ROUND1(B, C, D, E, A, F5, 9, in( 4), 5); \ + ROUND1(A, B, C, D, E, F5, 15, in( 0), 5); \ + ROUND1(E, A, B, C, D, F5, 5, in( 5), 5); \ + ROUND1(D, E, A, B, C, F5, 11, in( 9), 5); \ + ROUND1(C, D, E, A, B, F5, 6, in( 7), 5); \ + ROUND1(B, C, D, E, A, F5, 8, in(12), 5); \ + ROUND1(A, B, C, D, E, F5, 13, in( 2), 5); \ + ROUND1(E, A, B, C, D, F5, 12, in(10), 5); \ + ROUND1(D, E, A, B, C, F5, 5, in(14), 5); \ + ROUND1(C, D, E, A, B, F5, 12, in( 1), 5); \ + ROUND1(B, C, D, E, A, F5, 13, in( 3), 5); \ + ROUND1(A, B, C, D, E, F5, 14, in( 8), 5); \ + ROUND1(E, A, B, C, D, F5, 11, in(11), 5); \ + ROUND1(D, E, A, B, C, F5, 8, in( 6), 5); \ + ROUND1(C, D, E, A, B, F5, 5, in(15), 5); \ + ROUND1(B, C, D, E, A, F5, 6, in(13), 5); \ + \ + ROUND2(A, B, C, D, E, F5, 8, in( 5), 1); \ + ROUND2(E, A, B, C, D, F5, 9, in(14), 1); \ + ROUND2(D, E, A, B, C, F5, 9, in( 7), 1); \ + ROUND2(C, D, E, A, B, F5, 11, in( 0), 1); \ + ROUND2(B, C, D, E, A, F5, 13, in( 9), 1); \ + ROUND2(A, B, C, D, E, F5, 15, in( 2), 1); \ + ROUND2(E, A, B, C, D, F5, 15, in(11), 1); \ + ROUND2(D, E, A, B, C, F5, 5, in( 4), 1); \ + ROUND2(C, D, E, A, B, F5, 7, in(13), 1); \ + ROUND2(B, C, D, E, A, F5, 7, in( 6), 1); \ + ROUND2(A, B, C, D, E, F5, 8, in(15), 1); \ + ROUND2(E, A, B, C, D, F5, 11, in( 8), 1); \ + ROUND2(D, E, A, B, C, F5, 14, in( 1), 1); \ + ROUND2(C, D, E, A, B, F5, 14, in(10), 1); \ + ROUND2(B, C, D, E, A, F5, 12, in( 3), 1); \ + ROUND2(A, B, C, D, E, F5, 6, in(12), 1); \ + \ + ROUND2(E, A, B, C, D, F4, 9, in( 6), 2); \ + ROUND2(D, E, A, B, C, F4, 13, in(11), 2); \ + ROUND2(C, D, E, A, B, F4, 15, in( 3), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in( 7), 2); \ + ROUND2(A, B, C, D, E, F4, 12, in( 0), 2); \ + ROUND2(E, A, B, C, D, F4, 8, in(13), 2); \ + ROUND2(D, E, A, B, C, F4, 9, in( 5), 2); \ + ROUND2(C, D, E, A, B, F4, 11, in(10), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in(14), 2); \ + ROUND2(A, B, C, D, E, F4, 7, in(15), 2); \ + ROUND2(E, A, B, C, D, F4, 12, in( 8), 2); \ + ROUND2(D, E, A, B, C, F4, 7, in(12), 2); \ + ROUND2(C, D, E, A, B, F4, 6, in( 4), 2); \ + ROUND2(B, C, D, E, A, F4, 15, in( 9), 2); \ + ROUND2(A, B, C, D, E, F4, 13, in( 1), 2); \ + ROUND2(E, A, B, C, D, F4, 11, in( 2), 2); \ + \ + ROUND2(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND2(C, D, E, A, B, F3, 7, in( 5), 3); \ + ROUND2(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND2(A, B, C, D, E, F3, 11, in( 3), 3); \ + ROUND2(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND2(D, E, A, B, C, F3, 6, in(14), 3); \ + ROUND2(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND2(B, C, D, E, A, F3, 14, in( 9), 3); \ + ROUND2(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND2(E, A, B, C, D, F3, 13, in( 8), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(12), 3); \ + ROUND2(C, D, E, A, B, F3, 14, in( 2), 3); \ + ROUND2(B, C, D, E, A, F3, 13, in(10), 3); \ + ROUND2(A, B, C, D, E, F3, 13, in( 0), 3); \ + ROUND2(E, A, B, C, D, F3, 7, in( 4), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(13), 3); \ + \ + ROUND2(C, D, E, A, B, F2, 15, in( 8), 4); \ + ROUND2(B, C, D, E, A, F2, 5, in( 6), 4); \ + ROUND2(A, B, C, D, E, F2, 8, in( 4), 4); \ + ROUND2(E, A, B, C, D, F2, 11, in( 1), 4); \ + ROUND2(D, E, A, B, C, F2, 14, in( 3), 4); \ + ROUND2(C, D, E, A, B, F2, 14, in(11), 4); \ + ROUND2(B, C, D, E, A, F2, 6, in(15), 4); \ + ROUND2(A, B, C, D, E, F2, 14, in( 0), 4); \ + ROUND2(E, A, B, C, D, F2, 6, in( 5), 4); \ + ROUND2(D, E, A, B, C, F2, 9, in(12), 4); \ + ROUND2(C, D, E, A, B, F2, 12, in( 2), 4); \ + ROUND2(B, C, D, E, A, F2, 9, in(13), 4); \ + ROUND2(A, B, C, D, E, F2, 12, in( 9), 4); \ + ROUND2(E, A, B, C, D, F2, 5, in( 7), 4); \ + ROUND2(D, E, A, B, C, F2, 15, in(10), 4); \ + ROUND2(C, D, E, A, B, F2, 8, in(14), 4); \ + \ + ROUND2(B, C, D, E, A, F1, 8, in(12), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(15), 5); \ + ROUND2(E, A, B, C, D, F1, 12, in(10), 5); \ + ROUND2(D, E, A, B, C, F1, 9, in( 4), 5); \ + ROUND2(C, D, E, A, B, F1, 12, in( 1), 5); \ + ROUND2(B, C, D, E, A, F1, 5, in( 5), 5); \ + ROUND2(A, B, C, D, E, F1, 14, in( 8), 5); \ + ROUND2(E, A, B, C, D, F1, 6, in( 7), 5); \ + ROUND2(D, E, A, B, C, F1, 8, in( 6), 5); \ + ROUND2(C, D, E, A, B, F1, 13, in( 2), 5); \ + ROUND2(B, C, D, E, A, F1, 6, in(13), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(14), 5); \ + ROUND2(E, A, B, C, D, F1, 15, in( 0), 5); \ + ROUND2(D, E, A, B, C, F1, 13, in( 3), 5); \ + ROUND2(C, D, E, A, B, F1, 11, in( 9), 5); \ + ROUND2(B, C, D, E, A, F1, 11, in(11), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + E2); \ + (h)[2] = SPH_T32((h)[3] + E1 + A2); \ + (h)[3] = SPH_T32((h)[4] + A1 + B2); \ + (h)[4] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-160. The data must be aligned for 32-bit access. + */ +static void +ripemd160_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD160_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD160_IN(x) X_var[x] + +#endif + RIPEMD160_ROUND_BODY(RIPEMD160_IN, r); +#undef RIPEMD160_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_init(void *cc) +{ + sph_ripemd160_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd160_round +#define HASH ripemd160 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd160_close(void *cc, void *dst) +{ + ripemd160_close(cc, dst, 5); + sph_ripemd160_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]) +{ +#define RIPEMD160_IN(x) msg[x] + RIPEMD160_ROUND_BODY(RIPEMD160_IN, val); +#undef RIPEMD160_IN +} diff --git a/sph/sph_ripemd.h b/sph/sph_ripemd.h new file mode 100644 index 000000000..256776830 --- /dev/null +++ b/sph/sph_ripemd.h @@ -0,0 +1,273 @@ +/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * RIPEMD, RIPEMD-128 and RIPEMD-160 interface. + * + * RIPEMD was first described in: Research and Development in Advanced + * Communication Technologies in Europe, "RIPE Integrity Primitives: + * Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE, + * June 1992. + * + * A new, strengthened version, dubbed RIPEMD-160, was published in: H. + * Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened + * version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039, + * Springer (1996), pp. 71--82. + * + * This article describes both RIPEMD-160, with a 160-bit output, and a + * reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128 + * was meant as a "drop-in" replacement for any hash function with 128-bit + * output, especially the original RIPEMD. + * + * @warning Collisions, and an efficient method to build other collisions, + * have been published for the original RIPEMD, which is thus considered as + * cryptographically broken. It is also very rarely encountered, and there + * seems to exist no free description or implementation of RIPEMD (except + * the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160 + * seem as secure as their output length allows. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_ripemd.h + * @author Thomas Pornin + */ + +#ifndef SPH_RIPEMD_H__ +#define SPH_RIPEMD_H__ + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for RIPEMD. + */ +#define SPH_SIZE_ripemd 128 + +/** + * Output size (in bits) for RIPEMD-128. + */ +#define SPH_SIZE_ripemd128 128 + +/** + * Output size (in bits) for RIPEMD-160. + */ +#define SPH_SIZE_ripemd160 160 + +/** + * This structure is a context for RIPEMD computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd_context; + +/** + * Initialize a RIPEMD context. This process performs no memory allocation. + * + * @param cc the RIPEMD context (pointer to + * a sph_ripemd_context) + */ +void sph_ripemd_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD context + * @param dst the destination buffer + */ +void sph_ripemd_close(void *cc, void *dst); + +/** + * Apply the RIPEMD compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-128 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-128 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-128 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd128_context; + +/** + * Initialize a RIPEMD-128 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-128 context (pointer to + * a sph_ripemd128_context) + */ +void sph_ripemd128_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-128 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd128(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-128 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-128 context + * @param dst the destination buffer + */ +void sph_ripemd128_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-128 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-160 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-160 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-160 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[5]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd160_context; + +/** + * Initialize a RIPEMD-160 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-160 context (pointer to + * a sph_ripemd160_context) + */ +void sph_ripemd160_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-160 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd160(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-160 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-160 context + * @param dst the destination buffer + */ +void sph_ripemd160_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-160 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 160-bit input and output + */ +void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]); + +#endif diff --git a/util.c b/util.c index 8bf6e22eb..b965b54c5 100644 --- a/util.c +++ b/util.c @@ -1505,34 +1505,42 @@ static char *blank_merkel = "000000000000000000000000000000000000000000000000000 static bool parse_notify(struct pool *pool, json_t *val) { char *job_id, *prev_hash, *coinbase1, *coinbase2, *bbversion, *nbit, - *ntime, *header; + *ntime, *header, *trie; size_t cb1_len, cb2_len, alloc_len, header_len; unsigned char *cb1, *cb2; - bool clean, ret = false; - int merkles, i; + bool clean, ret = false, has_trie = false; + int merkles, i = 0; json_t *arr; - arr = json_array_get(val, 4); + has_trie = json_array_size(val) == 10; + + job_id = json_array_string(val, i++); + prev_hash = json_array_string(val, i++); + if (has_trie) { + trie = json_array_string(val, i++); + } + coinbase1 = json_array_string(val, i++); + coinbase2 = json_array_string(val, i++); + + arr = json_array_get(val, i++); if (!arr || !json_is_array(arr)) goto out; merkles = json_array_size(arr); - job_id = json_array_string(val, 0); - prev_hash = json_array_string(val, 1); - coinbase1 = json_array_string(val, 2); - coinbase2 = json_array_string(val, 3); - bbversion = json_array_string(val, 5); - nbit = json_array_string(val, 6); - ntime = json_array_string(val, 7); - clean = json_is_true(json_array_get(val, 8)); + bbversion = json_array_string(val, i++); + nbit = json_array_string(val, i++); + ntime = json_array_string(val, i++); + clean = json_is_true(json_array_get(val, i)); - if (!job_id || !prev_hash || !coinbase1 || !coinbase2 || !bbversion || !nbit || !ntime) { + if (!job_id || !prev_hash || !coinbase1 || !coinbase2 || !bbversion || !nbit || !ntime || (has_trie && !trie)) { /* Annoying but we must not leak memory */ if (job_id) free(job_id); if (prev_hash) free(prev_hash); + if (trie) + free(trie); if (coinbase1) free(coinbase1); if (coinbase2) @@ -1589,10 +1597,11 @@ static bool parse_notify(struct pool *pool, json_t *val) pool->merkle_offset /= 2; header = (char *)alloca(257); snprintf(header, 257, - "%s%s%s%s%s%s", + "%s%s%s%s%s%s%s", pool->swork.bbversion, pool->swork.prev_hash, blank_merkel, + has_trie ? trie : "", pool->swork.ntime, pool->swork.nbit, "00000000" /* nonce */ From 0316101b8e8a28aed62136464d77417366e8654f Mon Sep 17 00:00:00 2001 From: elbandi Date: Tue, 19 Jul 2016 20:29:23 +0200 Subject: [PATCH 51/63] Initialize trie variable --- util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util.c b/util.c index b965b54c5..c525b3d29 100644 --- a/util.c +++ b/util.c @@ -1505,7 +1505,7 @@ static char *blank_merkel = "000000000000000000000000000000000000000000000000000 static bool parse_notify(struct pool *pool, json_t *val) { char *job_id, *prev_hash, *coinbase1, *coinbase2, *bbversion, *nbit, - *ntime, *header, *trie; + *ntime, *header, *trie = NULL; size_t cb1_len, cb2_len, alloc_len, header_len; unsigned char *cb1, *cb2; bool clean, ret = false, has_trie = false; From 91e1d324c541fc2abe02db94b15122f6fdaba90c Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 10 Jul 2016 15:13:54 +0000 Subject: [PATCH 52/63] Add Sia algo support --- Makefile.am | 1 + algorithm.c | 21 +++++ algorithm.h | 1 + algorithm/sia.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++ algorithm/sia.h | 8 ++ kernel/sia.cl | 120 +++++++++++++++++++++++++ sgminer.c | 18 +++- 7 files changed, 398 insertions(+), 3 deletions(-) create mode 100644 algorithm/sia.c create mode 100644 algorithm/sia.h create mode 100644 kernel/sia.cl diff --git a/Makefile.am b/Makefile.am index 137a5723e..38f1bfed1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -75,6 +75,7 @@ sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h sgminer_SOURCES += algorithm/lyra2rev2.c algorithm/lyra2rev2.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h +sgminer_SOURCES += algorithm/sia.c algorithm/sia.h sgminer_SOURCES += algorithm/credits.c algorithm/credits.h sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h diff --git a/algorithm.c b/algorithm.c index a4637ba0a..ca9ba551a 100644 --- a/algorithm.c +++ b/algorithm.c @@ -39,6 +39,7 @@ #include "algorithm/credits.h" #include "algorithm/blake256.h" #include "algorithm/blakecoin.h" +#include "algorithm/sia.h" #include "algorithm/decred.h" #include "algorithm/lbry.h" @@ -72,6 +73,7 @@ const char *algorithm_type_str[] = { "Yescrypt-multi", "Blakecoin", "Blake", + "Sia", "Decred", "Vanilla", "Lbry" @@ -976,6 +978,24 @@ static cl_int queue_blake_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un return status; } +static cl_int queue_sia_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; @@ -1129,6 +1149,7 @@ static algorithm_settings_t algos[] = { { "blake256r8", ALGO_BLAKECOIN, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, sha256, NULL }, { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, blake256_midstate, blake256_prepare_work, queue_blake_kernel, gen_hash, NULL }, + { "sia", ALGO_SIA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 128, 0, sia_regenhash, NULL, NULL, queue_sia_kernel, NULL, NULL }, { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL }, { "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL }, diff --git a/algorithm.h b/algorithm.h index c02da8a66..560562957 100644 --- a/algorithm.h +++ b/algorithm.h @@ -36,6 +36,7 @@ typedef enum { ALGO_YESCRYPT_MULTI, ALGO_BLAKECOIN, ALGO_BLAKE, + ALGO_SIA, ALGO_DECRED, ALGO_VANILLA, ALGO_LBRY diff --git a/algorithm/sia.c b/algorithm/sia.c new file mode 100644 index 000000000..c04ead43f --- /dev/null +++ b/algorithm/sia.c @@ -0,0 +1,232 @@ +/*- + * Copyright 2009 Colin Percival, 2014 savale + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + +// Cyclic right rotation. + +#ifndef ROTR64 +#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y)))) +#endif + +// Little-endian byte access. + +#define B2B_GET64(p) \ + (((uint64_t) ((uint8_t *) (p))[0]) ^ \ + (((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \ + (((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \ + (((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \ + (((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \ + (((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \ + (((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \ + (((uint64_t) ((uint8_t *) (p))[7]) << 56)) + +// G Mixing function. + +#define B2B_G(a, b, c, d, x, y) { \ + v[a] = v[a] + v[b] + x; \ + v[d] = ROTR64(v[d] ^ v[a], 32); \ + v[c] = v[c] + v[d]; \ + v[b] = ROTR64(v[b] ^ v[c], 24); \ + v[a] = v[a] + v[b] + y; \ + v[d] = ROTR64(v[d] ^ v[a], 16); \ + v[c] = v[c] + v[d]; \ + v[b] = ROTR64(v[b] ^ v[c], 63); } + +// Initialization Vector. + +static const uint64_t blake2b_iv[8] = { + 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 +}; + +// state context +typedef struct { + uint8_t b[128]; // input buffer + uint64_t h[8]; // chained state + uint64_t t[2]; // total number of bytes + size_t c; // pointer for b[] + size_t outlen; // digest size +} blake2b_ctx; + +void blake2b_update(blake2b_ctx *ctx, // context + const void *in, size_t inlen); // data to be hashed + +// Compression function. "last" flag indicates last block. + +static void blake2b_compress(blake2b_ctx *ctx, int last) +{ + const uint8_t sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } + }; + int i; + uint64_t v[16], m[16]; + + for (i = 0; i < 8; i++) { // init work variables + v[i] = ctx->h[i]; + v[i + 8] = blake2b_iv[i]; + } + + v[12] ^= ctx->t[0]; // low 64 bits of offset + v[13] ^= ctx->t[1]; // high 64 bits + if (last) // last block flag set ? + v[14] = ~v[14]; + + for (i = 0; i < 16; i++) // get little-endian words + m[i] = B2B_GET64(&ctx->b[8 * i]); + + for (i = 0; i < 12; i++) { // twelve rounds + B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]); + B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]); + B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]); + B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]); + B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]); + B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]); + B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]); + B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]); + } + + for( i = 0; i < 8; ++i ) + ctx->h[i] ^= v[i] ^ v[i + 8]; +} + +// Initialize the hashing context "ctx" with optional key "key". +// 1 <= outlen <= 64 gives the digest size in bytes. +// Secret key (also <= 64 bytes) is optional (keylen = 0). + +int blake2b_init(blake2b_ctx *ctx, size_t outlen, + const void *key, size_t keylen) // (keylen=0: no key) +{ + size_t i; + + if (outlen == 0 || outlen > 64 || keylen > 64) + return -1; // illegal parameters + + for (i = 0; i < 8; i++) // state, "param block" + ctx->h[i] = blake2b_iv[i]; + ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen; + + ctx->t[0] = 0; // input count low word + ctx->t[1] = 0; // input count high word + ctx->c = 0; // pointer within buffer + ctx->outlen = outlen; + + for (i = keylen; i < 128; i++) // zero input block + ctx->b[i] = 0; + if (keylen > 0) { + blake2b_update(ctx, key, keylen); + ctx->c = 128; // at the end + } + + return 0; +} + +// Add "inlen" bytes from "in" into the hash. + +void blake2b_update(blake2b_ctx *ctx, + const void *in, size_t inlen) // data bytes +{ + size_t i; + + for (i = 0; i < inlen; i++) { + if (ctx->c == 128) { // buffer full ? + ctx->t[0] += ctx->c; // add counters + if (ctx->t[0] < ctx->c) // carry overflow ? + ctx->t[1]++; // high word + blake2b_compress(ctx, 0); // compress (not last) + ctx->c = 0; // counter to zero + } + ctx->b[ctx->c++] = ((const uint8_t *) in)[i]; + } +} + +// Generate the message digest (size given in init). +// Result placed in "out". + +void blake2b_final(blake2b_ctx *ctx, void *out) +{ + size_t i; + + ctx->t[0] += ctx->c; // mark last block offset + if (ctx->t[0] < ctx->c) // carry overflow + ctx->t[1]++; // high word + + while (ctx->c < 128) // fill up with zeros + ctx->b[ctx->c++] = 0; + blake2b_compress(ctx, 1); // final block flag = 1 + + // little endian convert and store + for (i = 0; i < ctx->outlen; i++) { + ((uint8_t *) out)[i] = + (ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF; + } +} + +#ifdef __APPLE_CC__ +static +#endif +void siaHash(void *state, const void *input) +{ + blake2b_ctx ctx; + blake2b_init(&ctx, 32, NULL, 0); + blake2b_update(&ctx, input, 80); + blake2b_final(&ctx, state); +} + +void sia_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t hash[16]; + char *scratchbuf; + uint32_t *nonce = (uint32_t *)(work->data + 32); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 20); + data[8] = htobe32(*nonce); + siaHash(hash, data); + swab256(ohash, hash); +} diff --git a/algorithm/sia.h b/algorithm/sia.h new file mode 100644 index 000000000..413a76c79 --- /dev/null +++ b/algorithm/sia.h @@ -0,0 +1,8 @@ +#ifndef SIAH_H +#define SIAH_H + +#include "miner.h" + +extern void sia_regenhash(struct work *work); + +#endif /* FRESHH_H */ \ No newline at end of file diff --git a/kernel/sia.cl b/kernel/sia.cl new file mode 100644 index 000000000..79a1354ac --- /dev/null +++ b/kernel/sia.cl @@ -0,0 +1,120 @@ + +#if __ENDIAN_LITTLE__ + #define SPH_LITTLE_ENDIAN 1 +#else + #define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ + typedef unsigned long long sph_u64; + typedef long long sph_s64; +#else + typedef unsigned long sph_u64; + typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); + #define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC64LE(x) (*(const __global sph_u64 *) (x)); + #define DEC32LE(x) (*(const __global sph_u32 *) (x)); +#endif + +inline static uint2 ror64(const uint2 x, const uint y) +{ + return (uint2)(((x).x>>y)^((x).y<<(32-y)),((x).y>>y)^((x).x<<(32-y))); +} +inline static uint2 ror64_2(const uint2 x, const uint y) +{ + return (uint2)(((x).y>>(y-32))^((x).x<<(64-y)),((x).x>>(y-32))^((x).y<<(64-y))); +} +__constant static const uchar blake2b_sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } }; + +__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) { + sph_u32 gid = get_global_id(0); + + ulong m[16]; + m[0] = DEC64LE(block + 0); + m[1] = DEC64LE(block + 8); + m[2] = DEC64LE(block + 16); + m[3] = DEC64LE(block + 24); + m[4] = DEC64LE(block + 32); + m[4] &= 0xFFFFFFFF00000000; + m[4] ^= (gid); + m[5] = DEC64LE(block + 40); + m[6] = DEC64LE(block + 48); + m[7] = DEC64LE(block + 56); + m[8] = DEC64LE(block + 64); + m[9] = DEC64LE(block + 72); + m[10] = m[11] = m[12] = m[13] = m[14] = m[15] = 0; + + ulong v[16] = { 0x6a09e667f2bdc928, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, + 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade68281, 0x9b05688c2b3e6c1f, 0xe07c265404be4294, 0x5be0cd19137e2179 }; + +#define G(r,i,a,b,c,d) \ + a = a + b + m[ blake2b_sigma[r][2*i] ]; \ + ((uint2*)&d)[0] = ((uint2*)&d)[0].yx ^ ((uint2*)&a)[0].yx; \ + c = c + d; \ + ((uint2*)&b)[0] = ror64( ((uint2*)&b)[0] ^ ((uint2*)&c)[0], 24U); \ + a = a + b + m[ blake2b_sigma[r][2*i+1] ]; \ + ((uint2*)&d)[0] = ror64( ((uint2*)&d)[0] ^ ((uint2*)&a)[0], 16U); \ + c = c + d; \ + ((uint2*)&b)[0] = ror64_2( ((uint2*)&b)[0] ^ ((uint2*)&c)[0], 63U); + +#define ROUND(r) \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + +#undef G +#undef ROUND + + bool result = (SWAP8(0x6a09e667f2bdc928 ^ v[0] ^ v[8]) <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); +} diff --git a/sgminer.c b/sgminer.c index 896f7abce..3ebc8ee8a 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5630,6 +5630,9 @@ static void *stratum_sthread(void *userdata) else if (pool->algorithm.type == ALGO_LBRY) { nonce = *((uint32_t *)(work->data + 108)); } + else if (pool->algorithm.type == ALGO_SIA) { + nonce = *((uint32_t *)(work->data + 32)); + } else { nonce = *((uint32_t *)(work->data + 76)); } @@ -6116,7 +6119,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) cg_wlock(&pool->data_lock); nonce2le = htole64(pool->nonce2); - if (pool->algorithm.type != ALGO_DECRED) { + if (pool->algorithm.type != ALGO_DECRED && pool->algorithm.type != ALGO_SIA) { /* Update coinbase. Always use an LE encoded nonce2 to fill in values * from left to right and prevent overflow errors with small n2sizes */ memcpy(pool->coinbase + pool->nonce2_offset, &nonce2le, pool->n2size); @@ -6127,7 +6130,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) /* Downgrade to a read lock to read off the pool variables */ cg_dwlock(&pool->data_lock); - if (pool->algorithm.type != ALGO_DECRED) { + if (pool->algorithm.type != ALGO_DECRED && pool->algorithm.type != ALGO_SIA) { /* Generate merkle root */ pool->algorithm.gen_hash(pool->coinbase, pool->swork.cb_len, merkle_root); memcpy(merkle_sha, merkle_root, 32); @@ -6181,6 +6184,14 @@ static void gen_stratum_work(struct pool *pool, struct work *work) memcpy(work->data + 144, pool->nonce1bin, nonce2_offset); memcpy(work->data + 144 + nonce2_offset, &nonce2le, pool->n2size); } + else if (pool->algorithm.type == ALGO_SIA) { + size_t nonce2_offset = MIN(pool->n1_len, 4); + swab256(work->data, pool->header_bin + 4); // prevhash + memcpy(work->data + 32 + 4, pool->nonce1bin, nonce2_offset); + memcpy(work->data + 32 + 4 + nonce2_offset, &nonce2le, pool->n2size); + memcpy(work->data + 32 + 8, pool->header_bin + 68, 4); // timestamp + flip32(work->data + 32 + 8 + 8, pool->coinbase); // merkleroot + } else { data32 = (uint32_t *)merkle_sha; swap32 = (uint32_t *)merkle_root; @@ -7153,6 +7164,7 @@ static void rebuild_nonce(struct work *work, uint32_t nonce) if (work->pool->algorithm.type == ALGO_CRE) nonce_pos = 140; else if (work->pool->algorithm.type == ALGO_DECRED) nonce_pos = 140; else if (work->pool->algorithm.type == ALGO_LBRY) nonce_pos = 108; + else if (work->pool->algorithm.type == ALGO_SIA) nonce_pos = 32; uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos); @@ -7189,7 +7201,7 @@ static void update_work_stats(struct thr_info *thr, struct work *work) test_diff *= work->pool->algorithm.share_diff_multiplier; - if (unlikely(work->share_diff >= test_diff)) { + if (unlikely(work->pool->algorithm.type != ALGO_SIA && work->share_diff >= test_diff)) { work->block = true; work->pool->solved++; found_blocks++; From 29b2de377f5e2da09d73717e7853f5c1aa29b6a3 Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 20 Jul 2016 15:53:51 +0200 Subject: [PATCH 53/63] Sia pools sends the block target too --- sgminer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sgminer.c b/sgminer.c index 3ebc8ee8a..3f8446f4e 100644 --- a/sgminer.c +++ b/sgminer.c @@ -7201,7 +7201,7 @@ static void update_work_stats(struct thr_info *thr, struct work *work) test_diff *= work->pool->algorithm.share_diff_multiplier; - if (unlikely(work->pool->algorithm.type != ALGO_SIA && work->share_diff >= test_diff)) { + if (unlikely(work->share_diff >= test_diff)) { work->block = true; work->pool->solved++; found_blocks++; From 522bd7672639961dafb2493d45001a9041439fa9 Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 20 Jul 2016 23:37:12 +0200 Subject: [PATCH 54/63] Version bump to 5.5.0 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 1c8b6c09b..c1c67d1b7 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [5]) -m4_define([v_min], [4]) +m4_define([v_min], [5]) m4_define([v_mic], [0]) m4_define([v_rev], [nicehash]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## From f32e6dcdb314c0047c7678c98d9651fd21d4783d Mon Sep 17 00:00:00 2001 From: Ivan Date: Wed, 5 Oct 2016 05:12:56 +0200 Subject: [PATCH 55/63] add sibcoin --- Makefile.am | 1 + algorithm.c | 53 ++ algorithm.h | 3 +- algorithm/sibcoin.c | 228 ++++++++ algorithm/sibcoin.h | 10 + kernel/gost-mod.cl | 1021 +++++++++++++++++++++++++++++++++++ kernel/gost.cl | 1064 +++++++++++++++++++++++++++++++++++++ kernel/sibcoin-mod.cl | 1169 +++++++++++++++++++++++++++++++++++++++++ kernel/sibcoin.cl | 759 ++++++++++++++++++++++++++ sph/Makefile.am | 2 +- sph/gost.c | 1104 ++++++++++++++++++++++++++++++++++++++ sph/sph_gost.h | 185 +++++++ 12 files changed, 5597 insertions(+), 2 deletions(-) create mode 100644 algorithm/sibcoin.c create mode 100644 algorithm/sibcoin.h create mode 100644 kernel/gost-mod.cl create mode 100644 kernel/gost.cl create mode 100644 kernel/sibcoin-mod.cl create mode 100644 kernel/sibcoin.cl create mode 100644 sph/gost.c create mode 100644 sph/sph_gost.h diff --git a/Makefile.am b/Makefile.am index 38f1bfed1..da0ba7859 100644 --- a/Makefile.am +++ b/Makefile.am @@ -61,6 +61,7 @@ sgminer_SOURCES += algorithm/fuguecoin.c algorithm/fuguecoin.h sgminer_SOURCES += algorithm/inkcoin.c algorithm/inkcoin.h sgminer_SOURCES += algorithm/animecoin.c algorithm/animecoin.h sgminer_SOURCES += algorithm/groestlcoin.c algorithm/groestlcoin.h +sgminer_SOURCES += algorithm/sibcoin.c algorithm/sibcoin.h sgminer_SOURCES += algorithm/sifcoin.c algorithm/sifcoin.h sgminer_SOURCES += algorithm/twecoin.c algorithm/twecoin.h sgminer_SOURCES += algorithm/marucoin.c algorithm/marucoin.h diff --git a/algorithm.c b/algorithm.c index ca9ba551a..6d3a2b946 100644 --- a/algorithm.c +++ b/algorithm.c @@ -42,6 +42,7 @@ #include "algorithm/sia.h" #include "algorithm/decred.h" #include "algorithm/lbry.h" +#include "algorithm/sibcoin.h" #include "compat.h" @@ -422,6 +423,54 @@ static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_b return status; } + +static cl_int queue_sibcoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + // bmw - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + // groestl - search2 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // skein - search3 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // jh - search4 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // keccak - search5 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // gost - search6 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // luffa - search7 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // cubehash - search8 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // shavite - search9 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // simd - search10 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // echo - search11 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + + static cl_int queue_bitblock_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel; @@ -1110,6 +1159,7 @@ static algorithm_settings_t algos[] = { #define A_DARK(a, b) \ { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } A_DARK("darkcoin", darkcoin_regenhash), + A_DARK("sibcoin", sibcoin_regenhash), A_DARK("inkcoin", inkcoin_regenhash), A_DARK("myriadcoin-groestl", myriadcoin_groestl_regenhash), #undef A_DARK @@ -1119,6 +1169,8 @@ static algorithm_settings_t algos[] = { { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, NULL, NULL, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "sibcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 11, 8 * 16 * 4194304, 0, sibcoin_regenhash, NULL, NULL, queue_sibcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x13_compiler_options }, { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, NULL, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, NULL, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options }, @@ -1216,6 +1268,7 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa ALGO_ALIAS_NF("adaptive-n-scrypt", "ckolivas", 11); ALGO_ALIAS("x11mod", "darkcoin-mod"); ALGO_ALIAS("x11", "darkcoin-mod"); + ALGO_ALIAS("x11-gost", "sibcoin-mod"); ALGO_ALIAS("x13mod", "marucoin-mod"); ALGO_ALIAS("x13", "marucoin-mod"); ALGO_ALIAS("x13old", "marucoin-modold"); diff --git a/algorithm.h b/algorithm.h index 560562957..2830e674d 100644 --- a/algorithm.h +++ b/algorithm.h @@ -39,7 +39,8 @@ typedef enum { ALGO_SIA, ALGO_DECRED, ALGO_VANILLA, - ALGO_LBRY + ALGO_LBRY, + ALGO_SIBCOIN } algorithm_type_t; extern const char *algorithm_type_str[]; diff --git a/algorithm/sibcoin.c b/algorithm/sibcoin.c new file mode 100644 index 000000000..5c3cbeee6 --- /dev/null +++ b/algorithm/sibcoin.c @@ -0,0 +1,228 @@ +/*- + * Copyright 2009 Colin Percival, 2011 ArtForz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + + +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" +#include "sph/sph_skein.h" +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" +#include "sph/sph_gost.h" + +/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ +typedef struct { + sph_blake512_context blake1; + sph_bmw512_context bmw1; + sph_groestl512_context groestl1; + sph_skein512_context skein1; + sph_jh512_context jh1; + sph_keccak512_context keccak1; + sph_gost512_context gost1; + sph_luffa512_context luffa1; + sph_cubehash512_context cubehash1; + sph_shavite512_context shavite1; + sph_simd512_context simd1; + sph_echo512_context echo1; +} Xhash_context_holder; + +static Xhash_context_holder base_contexts; + + +static void init_Xhash_contexts() +{ + sph_blake512_init(&base_contexts.blake1); + sph_bmw512_init(&base_contexts.bmw1); + sph_groestl512_init(&base_contexts.groestl1); + sph_skein512_init(&base_contexts.skein1); + sph_jh512_init(&base_contexts.jh1); + sph_keccak512_init(&base_contexts.keccak1); + sph_gost512_init(&base_contexts.gost1); + sph_luffa512_init(&base_contexts.luffa1); + sph_cubehash512_init(&base_contexts.cubehash1); + sph_shavite512_init(&base_contexts.shavite1); + sph_simd512_init(&base_contexts.simd1); + sph_echo512_init(&base_contexts.echo1); +} + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + + +static inline void xhash(void *state, const void *input) +{ + init_Xhash_contexts(); + + Xhash_context_holder ctx; + + uint32_t hashA[16], hashB[16]; + //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo + memcpy(&ctx, &base_contexts, sizeof(base_contexts)); + + sph_blake512 (&ctx.blake1, input, 80); + sph_blake512_close (&ctx.blake1, hashA); + + sph_bmw512 (&ctx.bmw1, hashA, 64); + sph_bmw512_close(&ctx.bmw1, hashB); + + sph_groestl512 (&ctx.groestl1, hashB, 64); + sph_groestl512_close(&ctx.groestl1, hashA); + + sph_skein512 (&ctx.skein1, hashA, 64); + sph_skein512_close(&ctx.skein1, hashB); + + sph_jh512 (&ctx.jh1, hashB, 64); + sph_jh512_close(&ctx.jh1, hashA); + + sph_keccak512 (&ctx.keccak1, hashA, 64); + sph_keccak512_close(&ctx.keccak1, hashB); + + sph_gost512 (&ctx.gost1, hashB, 64); + sph_gost512_close(&ctx.gost1, hashA); + + sph_luffa512 (&ctx.luffa1, hashA, 64); + sph_luffa512_close (&ctx.luffa1, hashB); + + sph_cubehash512 (&ctx.cubehash1, hashB, 64); + sph_cubehash512_close(&ctx.cubehash1, hashA); + + sph_shavite512 (&ctx.shavite1, hashA, 64); + sph_shavite512_close(&ctx.shavite1, hashB); + + sph_simd512 (&ctx.simd1, hashB, 64); + sph_simd512_close(&ctx.simd1, hashA); + + sph_echo512 (&ctx.echo1, hashA, 64); + sph_echo512_close(&ctx.echo1, hashB); + + memcpy(state, hashB, 32); + +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int sibcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + xhash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void sibcoin_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + xhash(ohash, data); +} + +bool scanhash_sibcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + xhash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} + + + + diff --git a/algorithm/sibcoin.h b/algorithm/sibcoin.h new file mode 100644 index 000000000..07ebcb512 --- /dev/null +++ b/algorithm/sibcoin.h @@ -0,0 +1,10 @@ +#ifndef SIBCOIN_H +#define SIBCOIN_H + +#include "miner.h" + +extern int sibcoin_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void sibcoin_regenhash(struct work *work); + +#endif /* SIBCOIN_H */ diff --git a/kernel/gost-mod.cl b/kernel/gost-mod.cl new file mode 100644 index 000000000..3e8477916 --- /dev/null +++ b/kernel/gost-mod.cl @@ -0,0 +1,1021 @@ +/* + * GOST R 34.10-2012 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Ivan + */ + +#ifdef __cplusplus +extern "C"{ +#endif + +//-------------------------------------------------------------------------------------------- +// +// stribog implementation +// +//-------------------------------------------------------------------------------------------- + +#define GOST_XOR(A, B, C) do {\ + C[0] = A[0] ^ B[0];\ + C[1] = A[1] ^ B[1];\ + C[2] = A[2] ^ B[2];\ + C[3] = A[3] ^ B[3];\ + C[4] = A[4] ^ B[4];\ + C[5] = A[5] ^ B[5];\ + C[6] = A[6] ^ B[6];\ + C[7] = A[7] ^ B[7];\ +} while (0) + +// Tables for function F + __constant const sph_u64 T[8][256] = { + { + 0xE6F87E5C5B711FD0,0x258377800924FA16,0xC849E07E852EA4A8,0x5B4686A18F06C16A, + 0x0B32E9A2D77B416E,0xABDA37A467815C66,0xF61796A81A686676,0xF5DC0B706391954B, + 0x4862F38DB7E64BF1,0xFF5C629A68BD85C5,0xCB827DA6FCD75795,0x66D36DAF69B9F089, + 0x356C9F74483D83B0,0x7CBCECB1238C99A1,0x36A702AC31C4708D,0x9EB6A8D02FBCDFD6, + 0x8B19FA51E5B3AE37,0x9CCFB5408A127D0B,0xBC0C78B508208F5A,0xE533E3842288ECED, + 0xCEC2C7D377C15FD2,0xEC7817B6505D0F5E,0xB94CC2C08336871D,0x8C205DB4CB0B04AD, + 0x763C855B28A0892F,0x588D1B79F6FF3257,0x3FECF69E4311933E,0x0FC0D39F803A18C9, + 0xEE010A26F5F3AD83,0x10EFE8F4411979A6,0x5DCDA10C7DE93A10,0x4A1BEE1D1248E92C, + 0x53BFF2DB21847339,0xB4F50CCFA6A23D09,0x5FB4BC9CD84798CD,0xE88A2D8B071C56F9, + 0x7F7771695A756A9C,0xC5F02E71A0BA1EBC,0xA663F9AB4215E672,0x2EB19E22DE5FBB78, + 0x0DB9CE0F2594BA14,0x82520E6397664D84,0x2F031E6A0208EA98,0x5C7F2144A1BE6BF0, + 0x7A37CB1CD16362DB,0x83E08E2B4B311C64,0xCF70479BAB960E32,0x856BA986B9DEE71E, + 0xB5478C877AF56CE9,0xB8FE42885F61D6FD,0x1BDD0156966238C8,0x622157923EF8A92E, + 0xFC97FF42114476F8,0x9D7D350856452CEB,0x4C90C9B0E0A71256,0x2308502DFBCB016C, + 0x2D7A03FAA7A64845,0xF46E8B38BFC6C4AB,0xBDBEF8FDD477DEBA,0x3AAC4CEBC8079B79, + 0xF09CB105E8879D0C,0x27FA6A10AC8A58CB,0x8960E7C1401D0CEA,0x1A6F811E4A356928, + 0x90C4FB0773D196FF,0x43501A2F609D0A9F,0xF7A516E0C63F3796,0x1CE4A6B3B8DA9252, + 0x1324752C38E08A9B,0xA5A864733BEC154F,0x2BF124575549B33F,0xD766DB15440DC5C7, + 0xA7D179E39E42B792,0xDADF151A61997FD3,0x86A0345EC0271423,0x38D5517B6DA939A4, + 0x6518F077104003B4,0x02791D90A5AEA2DD,0x88D267899C4A5D0A,0x930F66DF0A2865C2, + 0x4EE9D4204509B08B,0x325538916685292A,0x412907BFC533A842,0xB27E2B62544DC673, + 0x6C5304456295E007,0x5AF406E95351908A,0x1F2F3B6BC123616F,0xC37B09DC5255E5C6, + 0x3967D133B1FE6844,0x298839C7F0E711E2,0x409B87F71964F9A2,0xE938ADC3DB4B0719, + 0x0C0B4E47F9C3EBF4,0x5534D576D36B8843,0x4610A05AEB8B02D8,0x20C3CDF58232F251, + 0x6DE1840DBEC2B1E7,0xA0E8DE06B0FA1D08,0x7B854B540D34333B,0x42E29A67BCCA5B7F, + 0xD8A6088AC437DD0E,0xC63BB3A9D943ED81,0x21714DBD5E65A3B1,0x6761EDE7B5EEA169, + 0x2431F7C8D573ABF6,0xD51FC685E1A3671A,0x5E063CD40410C92D,0x283AB98F2CB04002, + 0x8FEBC06CB2F2F790,0x17D64F116FA1D33C,0xE07359F1A99EE4AA,0x784ED68C74CDC006, + 0x6E2A19D5C73B42DA,0x8712B4161C7045C3,0x371582E4ED93216D,0xACE390414939F6FC, + 0x7EC5F12186223B7C,0xC0B094042BAC16FB,0xF9D745379A527EBF,0x737C3F2EA3B68168, + 0x33E7B8D9BAD278CA,0xA9A32A34C22FFEBB,0xE48163CCFEDFBD0D,0x8E5940246EA5A670, + 0x51C6EF4B842AD1E4,0x22BAD065279C508C,0xD91488C218608CEE,0x319EA5491F7CDA17, + 0xD394E128134C9C60,0x094BF43272D5E3B3,0x9BF612A5A4AAD791,0xCCBBDA43D26FFD0F, + 0x34DE1F3C946AD250,0x4F5B5468995EE16B,0xDF9FAF6FEA8F7794,0x2648EA5870DD092B, + 0xBFC7E56D71D97C67,0xDDE6B2FF4F21D549,0x3C276B463AE86003,0x91767B4FAF86C71F, + 0x68A13E7835D4B9A0,0xB68C115F030C9FD4,0x141DD2C916582001,0x983D8F7DDD5324AC, + 0x64AA703FCC175254,0xC2C989948E02B426,0x3E5E76D69F46C2DE,0x50746F03587D8004, + 0x45DB3D829272F1E5,0x60584A029B560BF3,0xFBAE58A73FFCDC62,0xA15A5E4E6CAD4CE8, + 0x4BA96E55CE1FB8CC,0x08F9747AAE82B253,0xC102144CF7FB471B,0x9F042898F3EB8E36, + 0x068B27ADF2EFFB7A,0xEDCA97FE8C0A5EBE,0x778E0513F4F7D8CF,0x302C2501C32B8BF7, + 0x8D92DDFC175C554D,0xF865C57F46052F5F,0xEAF3301BA2B2F424,0xAA68B7ECBBD60D86, + 0x998F0F350104754C,0x0000000000000000,0xF12E314D34D0CCEC,0x710522BE061823B5, + 0xAF280D9930C005C1,0x97FD5CE25D693C65,0x19A41CC633CC9A15,0x95844172F8C79EB8, + 0xDC5432B7937684A9,0x9436C13A2490CF58,0x802B13F332C8EF59,0xC442AE397CED4F5C, + 0xFA1CD8EFE3AB8D82,0xF2E5AC954D293FD1,0x6AD823E8907A1B7D,0x4D2249F83CF043B6, + 0x03CB9DD879F9F33D,0xDE2D2F2736D82674,0x2A43A41F891EE2DF,0x6F98999D1B6C133A, + 0xD4AD46CD3DF436FA,0xBB35DF50269825C0,0x964FDCAA813E6D85,0xEB41B0537EE5A5C4, + 0x0540BA758B160847,0xA41AE43BE7BB44AF,0xE3B8C429D0671797,0x819993BBEE9FBEB9, + 0xAE9A8DD1EC975421,0xF3572CDD917E6E31,0x6393D7DAE2AFF8CE,0x47A2201237DC5338, + 0xA32343DEC903EE35,0x79FC56C4A89A91E6,0x01B28048DC5751E0,0x1296F564E4B7DB7B, + 0x75F7188351597A12,0xDB6D9552BDCE2E33,0x1E9DBB231D74308F,0x520D7293FDD322D9, + 0xE20A44610C304677,0xFEEEE2D2B4EAD425,0xCA30FDEE20800675,0x61EACA4A47015A13, + 0xE74AFE1487264E30,0x2CC883B27BF119A5,0x1664CF59B3F682DC,0xA811AA7C1E78AF5B, + 0x1D5626FB648DC3B2,0xB73E9117DF5BCE34,0xD05F7CF06AB56F5D,0xFD257F0ACD132718, + 0x574DC8E676C52A9E,0x0739A7E52EB8AA9A,0x5486553E0F3CD9A3,0x56FF48AEAA927B7E, + 0xBE756525AD8E2D87,0x7D0E6CF9FFDBC841,0x3B1ECCA31450CA99,0x6913BE30E983E840, + 0xAD511009956EA71C,0xB1B5B6BA2DB4354E,0x4469BDCA4E25A005,0x15AF5281CA0F71E1, + 0x744598CB8D0E2BF2,0x593F9B312AA863B7,0xEFB38A6E29A4FC63,0x6B6AA3A04C2D4A9D, + 0x3D95EB0EE6BF31E3,0xA291C3961554BFD5,0x18169C8EEF9BCBF5,0x115D68BC9D4E2846, + 0xBA875F18FACF7420,0xD1EDFCB8B6E23EBD,0xB00736F2F1E364AE,0x84D929CE6589B6FE, + 0x70B7A2F6DA4F7255,0x0E7253D75C6D4929,0x04F23A3D574159A7,0x0A8069EA0B2C108E, + 0x49D073C56BB11A11,0x8AAB7A1939E4FFD7,0xCD095A0B0E38ACEF,0xC9FB60365979F548, + 0x92BDE697D67F3422,0xC78933E10514BC61,0xE1C1D9B975C9B54A,0xD2266160CF1BCD80, + 0x9A4492ED78FD8671,0xB3CCAB2A881A9793,0x72CEBF667FE1D088,0xD6D45B5D985A9427 + }, + { + 0xC811A8058C3F55DE,0x65F5B43196B50619,0xF74F96B1D6706E43,0x859D1E8BCB43D336, + 0x5AAB8A85CCFA3D84,0xF9C7BF99C295FCFD,0xA21FD5A1DE4B630F,0xCDB3EF763B8B456D, + 0x803F59F87CF7C385,0xB27C73BE5F31913C,0x98E3AC6633B04821,0xBF61674C26B8F818, + 0x0FFBC995C4C130C8,0xAAA0862010761A98,0x6057F342210116AA,0xF63C760C0654CC35, + 0x2DDB45CC667D9042,0xBCF45A964BD40382,0x68E8A0C3EF3C6F3D,0xA7BD92D269FF73BC, + 0x290AE20201ED2287,0xB7DE34CDE885818F,0xD901EEA7DD61059B,0xD6FA273219A03553, + 0xD56F1AE874CCCEC9,0xEA31245C2E83F554,0x7034555DA07BE499,0xCE26D2AC56E7BEF7, + 0xFD161857A5054E38,0x6A0E7DA4527436D1,0x5BD86A381CDE9FF2,0xCAF7756231770C32, + 0xB09AAED9E279C8D0,0x5DEF1091C60674DB,0x111046A2515E5045,0x23536CE4729802FC, + 0xC50CBCF7F5B63CFA,0x73A16887CD171F03,0x7D2941AFD9F28DBD,0x3F5E3EB45A4F3B9D, + 0x84EEFE361B677140,0x3DB8E3D3E7076271,0x1A3A28F9F20FD248,0x7EBC7C75B49E7627, + 0x74E5F293C7EB565C,0x18DCF59E4F478BA4,0x0C6EF44FA9ADCB52,0xC699812D98DAC760, + 0x788B06DC6E469D0E,0xFC65F8EA7521EC4E,0x30A5F7219E8E0B55,0x2BEC3F65BCA57B6B, + 0xDDD04969BAF1B75E,0x99904CDBE394EA57,0x14B201D1E6EA40F6,0xBBB0C08241284ADD, + 0x50F20463BF8F1DFF,0xE8D7F93B93CBACB8,0x4D8CB68E477C86E8,0xC1DD1B3992268E3F, + 0x7C5AA11209D62FCB,0x2F3D98ABDB35C9AE,0x671369562BFD5FF5,0x15C1E16C36CEE280, + 0x1D7EB2EDF8F39B17,0xDA94D37DB00DFE01,0x877BC3EC760B8ADA,0xCB8495DFE153AE44, + 0x05A24773B7B410B3,0x12857B783C32ABDF,0x8EB770D06812513B,0x536739B9D2E3E665, + 0x584D57E271B26468,0xD789C78FC9849725,0xA935BBFA7D1AE102,0x8B1537A3DFA64188, + 0xD0CD5D9BC378DE7A,0x4AC82C9A4D80CFB7,0x42777F1B83BDB620,0x72D2883A1D33BD75, + 0x5E7A2D4BAB6A8F41,0xF4DAAB6BBB1C95D9,0x905CFFE7FD8D31B6,0x83AA6422119B381F, + 0xC0AEFB8442022C49,0xA0F908C663033AE3,0xA428AF0804938826,0xADE41C341A8A53C7, + 0xAE7121EE77E6A85D,0xC47F5C4A25929E8C,0xB538E9AA55CDD863,0x06377AA9DAD8EB29, + 0xA18AE87BB3279895,0x6EDFDA6A35E48414,0x6B7D9D19825094A7,0xD41CFA55A4E86CBF, + 0xE5CAEDC9EA42C59C,0xA36C351C0E6FC179,0x5181E4DE6FABBF89,0xFFF0C530184D17D4, + 0x9D41EB1584045892,0x1C0D525028D73961,0xF178EC180CA8856A,0x9A0571018EF811CD, + 0x4091A27C3EF5EFCC,0x19AF15239F6329D2,0x347450EFF91EB990,0xE11B4A078DD27759, + 0xB9561DE5FC601331,0x912F1F5A2DA993C0,0x1654DCB65BA2191A,0x3E2DDE098A6B99EB, + 0x8A66D71E0F82E3FE,0x8C51ADB7D55A08D7,0x4533E50F8941FF7F,0x02E6DD67BD4859EC, + 0xE068AABA5DF6D52F,0xC24826E3FF4A75A5,0x6C39070D88ACDDF8,0x6486548C4691A46F, + 0xD1BEBD26135C7C0C,0xB30F93038F15334A,0x82D9849FC1BF9A69,0x9C320BA85420FAE4, + 0xFA528243AFF90767,0x9ED4D6CFE968A308,0xB825FD582C44B147,0x9B7691BC5EDCB3BB, + 0xC7EA619048FE6516,0x1063A61F817AF233,0x47D538683409A693,0x63C2CE984C6DED30, + 0x2A9FDFD86C81D91D,0x7B1E3B06032A6694,0x666089EBFBD9FD83,0x0A598EE67375207B, + 0x07449A140AFC495F,0x2CA8A571B6593234,0x1F986F8A45BBC2FB,0x381AA4A050B372C2, + 0x5423A3ADD81FAF3A,0x17273C0B8B86BB6C,0xFE83258DC869B5A2,0x287902BFD1C980F1, + 0xF5A94BD66B3837AF,0x88800A79B2CABA12,0x55504310083B0D4C,0xDF36940E07B9EEB2, + 0x04D1A7CE6790B2C5,0x612413FFF125B4DC,0x26F12B97C52C124F,0x86082351A62F28AC, + 0xEF93632F9937E5E7,0x3507B052293A1BE6,0xE72C30AE570A9C70,0xD3586041AE1425E0, + 0xDE4574B3D79D4CC4,0x92BA228040C5685A,0xF00B0CA5DC8C271C,0xBE1287F1F69C5A6E, + 0xF39E317FB1E0DC86,0x495D114020EC342D,0x699B407E3F18CD4B,0xDCA3A9D46AD51528, + 0x0D1D14F279896924,0x0000000000000000,0x593EB75FA196C61E,0x2E4E78160B116BD8, + 0x6D4AE7B058887F8E,0xE65FD013872E3E06,0x7A6DDBBBD30EC4E2,0xAC97FC89CAAEF1B1, + 0x09CCB33C1E19DBE1,0x89F3EAC462EE1864,0x7770CF49AA87ADC6,0x56C57ECA6557F6D6, + 0x03953DDA6D6CFB9A,0x36928D884456E07C,0x1EEB8F37959F608D,0x31D6179C4EAAA923, + 0x6FAC3AD7E5C02662,0x43049FA653991456,0xABD3669DC052B8EE,0xAF02C153A7C20A2B, + 0x3CCB036E3723C007,0x93C9C23D90E1CA2C,0xC33BC65E2F6ED7D3,0x4CFF56339758249E, + 0xB1E94E64325D6AA6,0x37E16D359472420A,0x79F8E661BE623F78,0x5214D90402C74413, + 0x482EF1FDF0C8965B,0x13F69BC5EC1609A9,0x0E88292814E592BE,0x4E198B542A107D72, + 0xCCC00FCBEBAFE71B,0x1B49C844222B703E,0x2564164DA840E9D5,0x20C6513E1FF4F966, + 0xBAC3203F910CE8AB,0xF2EDD1C261C47EF0,0x814CB945ACD361F3,0x95FEB8944A392105, + 0x5C9CF02C1622D6AD,0x971865F3F77178E9,0xBD87BA2B9BF0A1F4,0x444005B259655D09, + 0xED75BE48247FBC0B,0x7596122E17CFF42A,0xB44B091785E97A15,0x966B854E2755DA9F, + 0xEEE0839249134791,0x32432A4623C652B9,0xA8465B47AD3E4374,0xF8B45F2412B15E8B, + 0x2417F6F078644BA3,0xFB2162FE7FDDA511,0x4BBBCC279DA46DC1,0x0173E0BDD024A276, + 0x22208C59A2BCA08A,0x8FC4906DB836F34D,0xE4B90D743A6667EA,0x7147B5E0705F46EF, + 0x2782CB2A1508B039,0xEC065EF5F45B1E7D,0x21B5B183CFD05B10,0xDBE733C060295C77, + 0x9FA73672394C017E,0xCF55321186C31C81,0xD8720E1A0D45A7ED,0x3B8F997A3DDF8958, + 0x3AFC79C7EDFB2B2E,0xE9A4198643EF0ECE,0x5F09CDF67B4E2D37,0x4F6A6BE9FA34DF04, + 0xB6ADD47038A123F9,0x8D224D0A057EAAA1,0xC96248B85C1BF7A8,0xE3FD9760309A2EB5, + 0x0B2A6E5BA351820D,0xEB42C4E1FEA75722,0x948D58299A1D8373,0x7FCF9CC864BAD451, + 0xA55B4FB5D4B72A50,0x08BF5381CE3D7997,0x46A6D8D5E42D04E5,0xD22B80FC7E308796, + 0x57B69E77B57354A0,0x3969441D8097D0B4,0x3330CAFBF3E2F0CF,0xE28E77DDE0BE8CC3, + 0x62B12E259C494F46,0xA6CE726FB9DBD1CA,0x41E242C1EED14DBA,0x76032FF47AA30FB0 + }, + { + 0x45B268A93ACDE4CC,0xAF7F0BE884549D08,0x048354B3C1468263,0x925435C2C80EFED2, + 0xEE4E37F27FDFFBA7,0x167A33920C60F14D,0xFB123B52EA03E584,0x4A0CAB53FDBB9007, + 0x9DEAF6380F788A19,0xCB48EC558F0CB32A,0xB59DC4B2D6FEF7E0,0xDCDBCA22F4F3ECB6, + 0x11DF5813549A9C40,0xE33FDEDF568ACED3,0xA0C1C8124322E9C3,0x07A56B8158FA6D0D, + 0x77279579B1E1F3DD,0xD9B18B74422AC004,0xB8EC2D9FFFABC294,0xF4ACF8A82D75914F, + 0x7BBF69B1EF2B6878,0xC4F62FAF487AC7E1,0x76CE809CC67E5D0C,0x6711D88F92E4C14C, + 0x627B99D9243DEDFE,0x234AA5C3DFB68B51,0x909B1F15262DBF6D,0x4F66EA054B62BCB5, + 0x1AE2CF5A52AA6AE8,0xBEA053FBD0CE0148,0xED6808C0E66314C9,0x43FE16CD15A82710, + 0xCD049231A06970F6,0xE7BC8A6C97CC4CB0,0x337CE835FCB3B9C0,0x65DEF2587CC780F3, + 0x52214EDE4132BB50,0x95F15E4390F493DF,0x870839625DD2E0F1,0x41313C1AFB8B66AF, + 0x91720AF051B211BC,0x477D427ED4EEA573,0x2E3B4CEEF6E3BE25,0x82627834EB0BCC43, + 0x9C03E3DD78E724C8,0x2877328AD9867DF9,0x14B51945E243B0F2,0x574B0F88F7EB97E2, + 0x88B6FA989AA4943A,0x19C4F068CB168586,0x50EE6409AF11FAEF,0x7DF317D5C04EABA4, + 0x7A567C5498B4C6A9,0xB6BBFB804F42188E,0x3CC22BCF3BC5CD0B,0xD04336EAAA397713, + 0xF02FAC1BEC33132C,0x2506DBA7F0D3488D,0xD7E65D6BF2C31A1E,0x5EB9B2161FF820F5, + 0x842E0650C46E0F9F,0x716BEB1D9E843001,0xA933758CAB315ED4,0x3FE414FDA2792265, + 0x27C9F1701EF00932,0x73A4C1CA70A771BE,0x94184BA6E76B3D0E,0x40D829FF8C14C87E, + 0x0FBEC3FAC77674CB,0x3616A9634A6A9572,0x8F139119C25EF937,0xF545ED4D5AEA3F9E, + 0xE802499650BA387B,0x6437E7BD0B582E22,0xE6559F89E053E261,0x80AD52E305288DFC, + 0x6DC55A23E34B9935,0xDE14E0F51AD0AD09,0xC6390578A659865E,0x96D7617109487CB1, + 0xE2D6CB3A21156002,0x01E915E5779FAED1,0xADB0213F6A77DCB7,0x9880B76EB9A1A6AB, + 0x5D9F8D248644CF9B,0xFD5E4536C5662658,0xF1C6B9FE9BACBDFD,0xEACD6341BE9979C4, + 0xEFA7221708405576,0x510771ECD88E543E,0xC2BA51CB671F043D,0x0AD482AC71AF5879, + 0xFE787A045CDAC936,0xB238AF338E049AED,0xBD866CC94972EE26,0x615DA6EBBD810290, + 0x3295FDD08B2C1711,0xF834046073BF0AEA,0xF3099329758FFC42,0x1CAEB13E7DCFA934, + 0xBA2307481188832B,0x24EFCE42874CE65C,0x0E57D61FB0E9DA1A,0xB3D1BAD6F99B343C, + 0xC0757B1C893C4582,0x2B510DB8403A9297,0x5C7698C1F1DB614A,0x3E0D0118D5E68CB4, + 0xD60F488E855CB4CF,0xAE961E0DF3CB33D9,0x3A8E55AB14A00ED7,0x42170328623789C1, + 0x838B6DD19C946292,0x895FEF7DED3B3AEB,0xCFCBB8E64E4A3149,0x064C7E642F65C3DC, + 0x3D2B3E2A4C5A63DA,0x5BD3F340A9210C47,0xB474D157A1615931,0xAC5934DA1DE87266, + 0x6EE365117AF7765B,0xC86ED36716B05C44,0x9BA6885C201D49C5,0xB905387A88346C45, + 0x131072C4BAB9DDFF,0xBF49461EA751AF99,0xD52977BC1CE05BA1,0xB0F785E46027DB52, + 0x546D30BA6E57788C,0x305AD707650F56AE,0xC987C682612FF295,0xA5AB8944F5FBC571, + 0x7ED528E759F244CA,0x8DDCBBCE2C7DB888,0xAA154ABE328DB1BA,0x1E619BE993ECE88B, + 0x09F2BD9EE813B717,0x7401AA4B285D1CB3,0x21858F143195CAEE,0x48C381841398D1B8, + 0xFCB750D3B2F98889,0x39A86A998D1CE1B9,0x1F888E0CE473465A,0x7899568376978716, + 0x02CF2AD7EE2341BF,0x85C713B5B3F1A14E,0xFF916FE12B4567E7,0x7C1A0230B7D10575, + 0x0C98FCC85ECA9BA5,0xA3E7F720DA9E06AD,0x6A6031A2BBB1F438,0x973E74947ED7D260, + 0x2CF4663918C0FF9A,0x5F50A7F368678E24,0x34D983B4A449D4CD,0x68AF1B755592B587, + 0x7F3C3D022E6DEA1B,0xABFC5F5B45121F6B,0x0D71E92D29553574,0xDFFDF5106D4F03D8, + 0x081BA87B9F8C19C6,0xDB7EA1A3AC0981BB,0xBBCA12AD66172DFA,0x79704366010829C7, + 0x179326777BFF5F9C,0x0000000000000000,0xEB2476A4C906D715,0x724DD42F0738DF6F, + 0xB752EE6538DDB65F,0x37FFBC863DF53BA3,0x8EFA84FCB5C157E6,0xE9EB5C73272596AA, + 0x1B0BDABF2535C439,0x86E12C872A4D4E20,0x9969A28BCE3E087A,0xFAFB2EB79D9C4B55, + 0x056A4156B6D92CB2,0x5A3AE6A5DEBEA296,0x22A3B026A8292580,0x53C85B3B36AD1581, + 0xB11E900117B87583,0xC51F3A4A3FE56930,0xE019E1EDCF3621BD,0xEC811D2591FCBA18, + 0x445B7D4C4D524A1D,0xA8DA6069DCAEF005,0x58F5CC72309DE329,0xD4C062596B7FF570, + 0xCE22AD0339D59F98,0x591CD99747024DF8,0x8B90C5AA03187B54,0xF663D27FC356D0F0, + 0xD8589E9135B56ED5,0x35309651D3D67A1C,0x12F96721CD26732E,0xD28C1C3D441A36AC, + 0x492A946164077F69,0x2D1D73DC6F5F514B,0x6F0A70F40D68D88A,0x60B4B30ECA1EAC41, + 0xD36509D83385987D,0x0B3D97490630F6A8,0x9ECCC90A96C46577,0xA20EE2C5AD01A87C, + 0xE49AB55E0E70A3DE,0xA4429CA182646BA0,0xDA97B446DB962F6A,0xCCED87D4D7F6DE27, + 0x2AB8185D37A53C46,0x9F25DCEFE15BCBA6,0xC19C6EF9FEA3EB53,0xA764A3931BD884CE, + 0x2FD2590B817C10F4,0x56A21A6D80743933,0xE573A0BB79EF0D0F,0x155C0CA095DC1E23, + 0x6C2C4FC694D437E4,0x10364DF623053291,0xDD32DFC7836C4267,0x03263F3299BCEF6E, + 0x66F8CD6AE57B6F9D,0x8C35AE2B5BE21659,0x31B3C2E21290F87F,0x93BD2027BF915003, + 0x69460E90220D1B56,0x299E276FAE19D328,0x63928C3C53A2432F,0x7082FEF8E91B9ED0, + 0xBC6F792C3EED40F7,0x4C40D537D2DE53DB,0x75E8BFAE5FC2B262,0x4DA9C0D2A541FD0A, + 0x4E8FFFE03CFD1264,0x2620E495696FA7E3,0xE1F0F408B8A98F6C,0xD1AA230FDDA6D9C2, + 0xC7D0109DD1C6288F,0x8A79D04F7487D585,0x4694579BA3710BA2,0x38417F7CFA834F68, + 0x1D47A4DB0A5007E5,0x206C9AF1460A643F,0xA128DDF734BD4712,0x8144470672B7232D, + 0xF2E086CC02105293,0x182DE58DBC892B57,0xCAA1F9B0F8931DFB,0x6B892447CC2E5AE9, + 0xF9DD11850420A43B,0x4BE5BEB68A243ED6,0x5584255F19C8D65D,0x3B67404E633FA006, + 0xA68DB6766C472A1F,0xF78AC79AB4C97E21,0xC353442E1080AAEC,0x9A4F9DB95782E714 + }, + { + 0x05BA7BC82C9B3220,0x31A54665F8B65E4F,0xB1B651F77547F4D4,0x8BFA0D857BA46682, + 0x85A96C5AA16A98BB,0x990FAEF908EB79C9,0xA15E37A247F4A62D,0x76857DCD5D27741E, + 0xF8C50B800A1820BC,0xBE65DCB201F7A2B4,0x666D1B986F9426E7,0x4CC921BF53C4E648, + 0x95410A0F93D9CA42,0x20CDCCAA647BA4EF,0x429A4060890A1871,0x0C4EA4F69B32B38B, + 0xCCDA362DDE354CD3,0x96DC23BC7C5B2FA9,0xC309BB68AA851AB3,0xD26131A73648E013, + 0x021DC52941FC4DB2,0xCD5ADAB7704BE48A,0xA77965D984ED71E6,0x32386FD61734BBA4, + 0xE82D6DD538AB7245,0x5C2147EA6177B4B1,0x5DA1AB70CF091CE8,0xAC907FCE72B8BDFF, + 0x57C85DFD972278A8,0xA4E44C6A6B6F940D,0x3851995B4F1FDFE4,0x62578CCAED71BC9E, + 0xD9882BB0C01D2C0A,0x917B9D5D113C503B,0xA2C31E11A87643C6,0xE463C923A399C1CE, + 0xF71686C57EA876DC,0x87B4A973E096D509,0xAF0D567D9D3A5814,0xB40C2A3F59DCC6F4, + 0x3602F88495D121DD,0xD3E1DD3D9836484A,0xF945E71AA46688E5,0x7518547EB2A591F5, + 0x9366587450C01D89,0x9EA81018658C065B,0x4F54080CBC4603A3,0x2D0384C65137BF3D, + 0xDC325078EC861E2A,0xEA30A8FC79573FF7,0x214D2030CA050CB6,0x65F0322B8016C30C, + 0x69BE96DD1B247087,0xDB95EE9981E161B8,0xD1FC1814D9CA05F8,0x820ED2BBCC0DE729, + 0x63D76050430F14C7,0x3BCCB0E8A09D3A0F,0x8E40764D573F54A2,0x39D175C1E16177BD, + 0x12F5A37C734F1F4B,0xAB37C12F1FDFC26D,0x5648B167395CD0F1,0x6C04ED1537BF42A7, + 0xED97161D14304065,0x7D6C67DAAB72B807,0xEC17FA87BA4EE83C,0xDFAF79CB0304FBC1, + 0x733F060571BC463E,0x78D61C1287E98A27,0xD07CF48E77B4ADA1,0xB9C262536C90DD26, + 0xE2449B5860801605,0x8FC09AD7F941FCFB,0xFAD8CEA94BE46D0E,0xA343F28B0608EB9F, + 0x9B126BD04917347B,0x9A92874AE7699C22,0x1B017C42C4E69EE0,0x3A4C5C720EE39256, + 0x4B6E9F5E3EA399DA,0x6BA353F45AD83D35,0xE7FEE0904C1B2425,0x22D009832587E95D, + 0x842980C00F1430E2,0xC6B3C0A0861E2893,0x087433A419D729F2,0x341F3DADD42D6C6F, + 0xEE0A3FAEFBB2A58E,0x4AEE73C490DD3183,0xAAB72DB5B1A16A34,0xA92A04065E238FDF, + 0x7B4B35A1686B6FCC,0x6A23BF6EF4A6956C,0x191CB96B851AD352,0x55D598D4D6DE351A, + 0xC9604DE5F2AE7EF3,0x1CA6C2A3A981E172,0xDE2F9551AD7A5398,0x3025AAFF56C8F616, + 0x15521D9D1E2860D9,0x506FE31CFA45073A,0x189C55F12B647B0B,0x0180EC9AAE7EA859, + 0x7CEC8B40050C105E,0x2350E5198BF94104,0xEF8AD33455CC0DD7,0x07A7BEE16D677F92, + 0xE5E325B90DE76997,0x5A061591A26E637A,0xB611EF1618208B46,0x09F4DF3EB7A981AB, + 0x1EBB078AE87DACC0,0xB791038CB65E231F,0x0FD38D4574B05660,0x67EDF702C1EA8EBE, + 0xBA5F4BE0831238CD,0xE3C477C2CEFEBE5C,0x0DCE486C354C1BD2,0x8C5DB36416C31910, + 0x26EA9ED1A7627324,0x039D29B3EF82E5EB,0x9F28FC82CBF2AE02,0xA8AAE89CF05D2786, + 0x431AACFA2774B028,0xCF471F9E31B7A938,0x581BD0B8E3922EC8,0xBC78199B400BEF06, + 0x90FB71C7BF42F862,0x1F3BEB1046030499,0x683E7A47B55AD8DE,0x988F4263A695D190, + 0xD808C72A6E638453,0x0627527BC319D7CB,0xEBB04466D72997AE,0xE67E0C0AE2658C7C, + 0x14D2F107B056C880,0x7122C32C30400B8C,0x8A7AE11FD5DACEDB,0xA0DEDB38E98A0E74, + 0xAD109354DCC615A6,0x0BE91A17F655CC19,0x8DDD5FFEB8BDB149,0xBFE53028AF890AED, + 0xD65BA6F5B4AD7A6A,0x7956F0882997227E,0x10E8665532B352F9,0x0E5361DFDACEFE39, + 0xCEC7F3049FC90161,0xFF62B561677F5F2E,0x975CCF26D22587F0,0x51EF0F86543BAF63, + 0x2F1E41EF10CBF28F,0x52722635BBB94A88,0xAE8DBAE73344F04D,0x410769D36688FD9A, + 0xB3AB94DE34BBB966,0x801317928DF1AA9B,0xA564A0F0C5113C54,0xF131D4BEBDB1A117, + 0x7F71A2F3EA8EF5B5,0x40878549C8F655C3,0x7EF14E6944F05DEC,0xD44663DCF55137D8, + 0xF2ACFD0D523344FC,0x0000000000000000,0x5FBC6E598EF5515A,0x16CF342EF1AA8532, + 0xB036BD6DDB395C8D,0x13754FE6DD31B712,0xBBDFA77A2D6C9094,0x89E7C8AC3A582B30, + 0x3C6B0E09CDFA459D,0xC4AE0589C7E26521,0x49735A777F5FD468,0xCAFD64561D2C9B18, + 0xDA1502032F9FC9E1,0x8867243694268369,0x3782141E3BAF8984,0x9CB5D53124704BE9, + 0xD7DB4A6F1AD3D233,0xA6F989432A93D9BF,0x9D3539AB8A0EE3B0,0x53F2CAAF15C7E2D1, + 0x6E19283C76430F15,0x3DEBE2936384EDC4,0x5E3C82C3208BF903,0x33B8834CB94A13FD, + 0x6470DEB12E686B55,0x359FD1377A53C436,0x61CAA57902F35975,0x043A975282E59A79, + 0xFD7F70482683129C,0xC52EE913699CCD78,0x28B9FF0E7DAC8D1D,0x5455744E78A09D43, + 0xCB7D88CCB3523341,0x44BD121B4A13CFBA,0x4D49CD25FDBA4E11,0x3E76CB208C06082F, + 0x3FF627BA2278A076,0xC28957F204FBB2EA,0x453DFE81E46D67E3,0x94C1E6953DA7621B, + 0x2C83685CFF491764,0xF32C1197FC4DECA5,0x2B24D6BD922E68F6,0xB22B78449AC5113F, + 0x48F3B6EDD1217C31,0x2E9EAD75BEB55AD6,0x174FD8B45FD42D6B,0x4ED4E4961238ABFA, + 0x92E6B4EEFEBEB5D0,0x46A0D7320BEF8208,0x47203BA8A5912A51,0x24F75BF8E69E3E96, + 0xF0B1382413CF094E,0xFEE259FBC901F777,0x276A724B091CDB7D,0xBDF8F501EE75475F, + 0x599B3C224DEC8691,0x6D84018F99C1EAFE,0x7498B8E41CDB39AC,0xE0595E71217C5BB7, + 0x2AA43A273C50C0AF,0xF50B43EC3F543B6E,0x838E3E2162734F70,0xC09492DB4507FF58, + 0x72BFEA9FDFC2EE67,0x11688ACF9CCDFAA0,0x1A8190D86A9836B9,0x7ACBD93BC615C795, + 0xC7332C3A286080CA,0x863445E94EE87D50,0xF6966A5FD0D6DE85,0xE9AD814F96D5DA1C, + 0x70A22FB69E3EA3D5,0x0A69F68D582B6440,0xB8428EC9C2EE757F,0x604A49E3AC8DF12C, + 0x5B86F90B0C10CB23,0xE1D9B2EB8F02F3EE,0x29391394D3D22544,0xC8E0A17F5CD0D6AA, + 0xB58CC6A5F7A26EAD,0x8193FB08238F02C2,0xD5C68F465B2F9F81,0xFCFF9CD288FDBAC5, + 0x77059157F359DC47,0x1D262E3907FF492B,0xFB582233E59AC557,0xDDB2BCE242F8B673, + 0x2577B76248E096CF,0x6F99C4A6D83DA74C,0xC1147E41EB795701,0xF48BAF76912A9337 + }, + { + 0x3EF29D249B2C0A19,0xE9E16322B6F8622F,0x5536994047757F7A,0x9F4D56D5A47B0B33, + 0x822567466AA1174C,0xB8F5057DEB082FB2,0xCC48C10BF4475F53,0x373088D4275DEC3A, + 0x968F4325180AED10,0x173D232CF7016151,0xAE4ED09F946FCC13,0xFD4B4741C4539873, + 0x1B5B3F0DD9933765,0x2FFCB0967B644052,0xE02376D20A89840C,0xA3AE3A70329B18D7, + 0x419CBD2335DE8526,0xFAFEBF115B7C3199,0x0397074F85AA9B0D,0xC58AD4FB4836B970, + 0xBEC60BE3FC4104A8,0x1EFF36DC4B708772,0x131FDC33ED8453B6,0x0844E33E341764D3, + 0x0FF11B6EAB38CD39,0x64351F0A7761B85A,0x3B5694F509CFBA0E,0x30857084B87245D0, + 0x47AFB3BD2297AE3C,0xF2BA5C2F6F6B554A,0x74BDC4761F4F70E1,0xCFDFC64471EDC45E, + 0xE610784C1DC0AF16,0x7ACA29D63C113F28,0x2DED411776A859AF,0xAC5F211E99A3D5EE, + 0xD484F949A87EF33B,0x3CE36CA596E013E4,0xD120F0983A9D432C,0x6BC40464DC597563, + 0x69D5F5E5D1956C9E,0x9AE95F043698BB24,0xC9ECC8DA66A4EF44,0xD69508C8A5B2EAC6, + 0xC40C2235C0503B80,0x38C193BA8C652103,0x1CEEC75D46BC9E8F,0xD331011937515AD1, + 0xD8E2E56886ECA50F,0xB137108D5779C991,0x709F3B6905CA4206,0x4FEB50831680CAEF, + 0xEC456AF3241BD238,0x58D673AFE181ABBE,0x242F54E7CAD9BF8C,0x0211F1810DCC19FD, + 0x90BC4DBB0F43C60A,0x9518446A9DA0761D,0xA1BFCBF13F57012A,0x2BDE4F8961E172B5, + 0x27B853A84F732481,0xB0B1E643DF1F4B61,0x18CC38425C39AC68,0xD2B7F7D7BF37D821, + 0x3103864A3014C720,0x14AA246372ABFA5C,0x6E600DB54EBAC574,0x394765740403A3F3, + 0x09C215F0BC71E623,0x2A58B947E987F045,0x7B4CDF18B477BDD8,0x9709B5EB906C6FE0, + 0x73083C268060D90B,0xFEDC400E41F9037E,0x284948C6E44BE9B8,0x728ECAE808065BFB, + 0x06330E9E17492B1A,0x5950856169E7294E,0xBAE4F4FCE6C4364F,0xCA7BCF95E30E7449, + 0x7D7FD186A33E96C2,0x52836110D85AD690,0x4DFAA1021B4CD312,0x913ABB75872544FA, + 0xDD46ECB9140F1518,0x3D659A6B1E869114,0xC23F2CABD719109A,0xD713FE062DD46836, + 0xD0A60656B2FBC1DC,0x221C5A79DD909496,0xEFD26DBCA1B14935,0x0E77EDA0235E4FC9, + 0xCBFD395B6B68F6B9,0x0DE0EAEFA6F4D4C4,0x0422FF1F1A8532E7,0xF969B85EDED6AA94, + 0x7F6E2007AEF28F3F,0x3AD0623B81A938FE,0x6624EE8B7AADA1A7,0xB682E8DDC856607B, + 0xA78CC56F281E2A30,0xC79B257A45FAA08D,0x5B4174E0642B30B3,0x5F638BFF7EAE0254, + 0x4BC9AF9C0C05F808,0xCE59308AF98B46AE,0x8FC58DA9CC55C388,0x803496C7676D0EB1, + 0xF33CAAE1E70DD7BA,0xBB6202326EA2B4BF,0xD5020F87201871CB,0x9D5CA754A9B712CE, + 0x841669D87DE83C56,0x8A6184785EB6739F,0x420BBA6CB0741E2B,0xF12D5B60EAC1CE47, + 0x76AC35F71283691C,0x2C6BB7D9FECEDB5F,0xFCCDB18F4C351A83,0x1F79C012C3160582, + 0xF0ABADAE62A74CB7,0xE1A5801C82EF06FC,0x67A21845F2CB2357,0x5114665F5DF04D9D, + 0xBF40FD2D74278658,0xA0393D3FB73183DA,0x05A409D192E3B017,0xA9FB28CF0B4065F9, + 0x25A9A22942BF3D7C,0xDB75E22703463E02,0xB326E10C5AB5D06C,0xE7968E8295A62DE6, + 0xB973F3B3636EAD42,0xDF571D3819C30CE5,0xEE549B7229D7CBC5,0x12992AFD65E2D146, + 0xF8EF4E9056B02864,0xB7041E134030E28B,0xC02EDD2ADAD50967,0x932B4AF48AE95D07, + 0x6FE6FB7BC6DC4784,0x239AACB755F61666,0x401A4BEDBDB807D6,0x485EA8D389AF6305, + 0xA41BC220ADB4B13D,0x753B32B89729F211,0x997E584BB3322029,0x1D683193CEDA1C7F, + 0xFF5AB6C0C99F818E,0x16BBD5E27F67E3A1,0xA59D34EE25D233CD,0x98F8AE853B54A2D9, + 0x6DF70AFACB105E79,0x795D2E99B9BBA425,0x8E437B6744334178,0x0186F6CE886682F0, + 0xEBF092A3BB347BD2,0xBCD7FA62F18D1D55,0xADD9D7D011C5571E,0x0BD3E471B1BDFFDE, + 0xAA6C2F808EEAFEF4,0x5EE57D31F6C880A4,0xF50FA47FF044FCA0,0x1ADDC9C351F5B595, + 0xEA76646D3352F922,0x0000000000000000,0x85909F16F58EBEA6,0x46294573AAF12CCC, + 0x0A5512BF39DB7D2E,0x78DBD85731DD26D5,0x29CFBE086C2D6B48,0x218B5D36583A0F9B, + 0x152CD2ADFACD78AC,0x83A39188E2C795BC,0xC3B9DA655F7F926A,0x9ECBA01B2C1D89C3, + 0x07B5F8509F2FA9EA,0x7EE8D6C926940DCF,0x36B67E1AAF3B6ECA,0x86079859702425AB, + 0xFB7849DFD31AB369,0x4C7C57CC932A51E2,0xD96413A60E8A27FF,0x263EA566C715A671, + 0x6C71FC344376DC89,0x4A4F595284637AF8,0xDAF314E98B20BCF2,0x572768C14AB96687, + 0x1088DB7C682EC8BB,0x887075F9537A6A62,0x2E7A4658F302C2A2,0x619116DBE582084D, + 0xA87DDE018326E709,0xDCC01A779C6997E8,0xEDC39C3DAC7D50C8,0xA60A33A1A078A8C0, + 0xC1A82BE452B38B97,0x3F746BEA134A88E9,0xA228CCBEBAFD9A27,0xABEAD94E068C7C04, + 0xF48952B178227E50,0x5CF48CB0FB049959,0x6017E0156DE48ABD,0x4438B4F2A73D3531, + 0x8C528AE649FF5885,0xB515EF924DFCFB76,0x0C661C212E925634,0xB493195CC59A7986, + 0x9CDA519A21D1903E,0x32948105B5BE5C2D,0x194ACE8CD45F2E98,0x438D4CA238129CDB, + 0x9B6FA9CABEFE39D4,0x81B26009EF0B8C41,0xDED1EBF691A58E15,0x4E6DA64D9EE6481F, + 0x54B06F8ECF13FD8A,0x49D85E1D01C9E1F5,0xAFC826511C094EE3,0xF698A33075EE67AD, + 0x5AC7822EEC4DB243,0x8DD47C28C199DA75,0x89F68337DB1CE892,0xCDCE37C57C21DDA3, + 0x530597DE503C5460,0x6A42F2AA543FF793,0x5D727A7E73621BA9,0xE232875307459DF1, + 0x56A19E0FC2DFE477,0xC61DD3B4CD9C227D,0xE5877F03986A341B,0x949EB2A415C6F4ED, + 0x6206119460289340,0x6380E75AE84E11B0,0x8BE772B6D6D0F16F,0x50929091D596CF6D, + 0xE86795EC3E9EE0DF,0x7CF927482B581432,0xC86A3E14EEC26DB4,0x7119CDA78DACC0F6, + 0xE40189CD100CB6EB,0x92ADBC3A028FDFF7,0xB2A017C2D2D3529C,0x200DABF8D05C8D6B, + 0x34A78F9BA2F77737,0xE3B4719D8F231F01,0x45BE423C2F5BB7C1,0xF71E55FEFD88E55D, + 0x6853032B59F3EE6E,0x65B3E9C4FF073AAA,0x772AC3399AE5EBEC,0x87816E97F842A75B, + 0x110E2DB2E0484A4B,0x331277CB3DD8DEDD,0xBD510CAC79EB9FA5,0x352179552A91F5C7 + }, + { + 0x8AB0A96846E06A6D,0x43C7E80B4BF0B33A,0x08C9B3546B161EE5,0x39F1C235EBA990BE, + 0xC1BEF2376606C7B2,0x2C209233614569AA,0xEB01523B6FC3289A,0x946953AB935ACEDD, + 0x272838F63E13340E,0x8B0455ECA12BA052,0x77A1B2C4978FF8A2,0xA55122CA13E54086, + 0x2276135862D3F1CD,0xDB8DDFDE08B76CFE,0x5D1E12C89E4A178A,0x0E56816B03969867, + 0xEE5F79953303ED59,0xAFED748BAB78D71D,0x6D929F2DF93E53EE,0xF5D8A8F8BA798C2A, + 0xF619B1698E39CF6B,0x95DDAF2F749104E2,0xEC2A9C80E0886427,0xCE5C8FD8825B95EA, + 0xC4E0D9993AC60271,0x4699C3A5173076F9,0x3D1B151F50A29F42,0x9ED505EA2BC75946, + 0x34665ACFDC7F4B98,0x61B1FB53292342F7,0xC721C0080E864130,0x8693CD1696FD7B74, + 0x872731927136B14B,0xD3446C8A63A1721B,0x669A35E8A6680E4A,0xCAB658F239509A16, + 0xA4E5DE4EF42E8AB9,0x37A7435EE83F08D9,0x134E6239E26C7F96,0x82791A3C2DF67488, + 0x3F6EF00A8329163C,0x8E5A7E42FDEB6591,0x5CAAEE4C7981DDB5,0x19F234785AF1E80D, + 0x255DDDE3ED98BD70,0x50898A32A99CCCAC,0x28CA4519DA4E6656,0xAE59880F4CB31D22, + 0x0D9798FA37D6DB26,0x32F968F0B4FFCD1A,0xA00F09644F258545,0xFA3AD5175E24DE72, + 0xF46C547C5DB24615,0x713E80FBFF0F7E20,0x7843CF2B73D2AAFA,0xBD17EA36AEDF62B4, + 0xFD111BACD16F92CF,0x4ABAA7DBC72D67E0,0xB3416B5DAD49FAD3,0xBCA316B24914A88B, + 0x15D150068AECF914,0xE27C1DEBE31EFC40,0x4FE48C759BEDA223,0x7EDCFD141B522C78, + 0x4E5070F17C26681C,0xE696CAC15815F3BC,0x35D2A64B3BB481A7,0x800CFF29FE7DFDF6, + 0x1ED9FAC3D5BAA4B0,0x6C2663A91EF599D1,0x03C1199134404341,0xF7AD4DED69F20554, + 0xCD9D9649B61BD6AB,0xC8C3BDE7EADB1368,0xD131899FB02AFB65,0x1D18E352E1FAE7F1, + 0xDA39235AEF7CA6C1,0xA1BBF5E0A8EE4F7A,0x91377805CF9A0B1E,0x3138716180BF8E5B, + 0xD9F83ACBDB3CE580,0x0275E515D38B897E,0x472D3F21F0FBBCC6,0x2D946EB7868EA395, + 0xBA3C248D21942E09,0xE7223645BFDE3983,0xFF64FEB902E41BB1,0xC97741630D10D957, + 0xC3CB1722B58D4ECC,0xA27AEC719CAE0C3B,0x99FECB51A48C15FB,0x1465AC826D27332B, + 0xE1BD047AD75EBF01,0x79F733AF941960C5,0x672EC96C41A3C475,0xC27FEBA6524684F3, + 0x64EFD0FD75E38734,0xED9E60040743AE18,0xFB8E2993B9EF144D,0x38453EB10C625A81, + 0x6978480742355C12,0x48CF42CE14A6EE9E,0x1CAC1FD606312DCE,0x7B82D6BA4792E9BB, + 0x9D141C7B1F871A07,0x5616B80DC11C4A2E,0xB849C198F21FA777,0x7CA91801C8D9A506, + 0xB1348E487EC273AD,0x41B20D1E987B3A44,0x7460AB55A3CFBBE3,0x84E628034576F20A, + 0x1B87D16D897A6173,0x0FE27DEFE45D5258,0x83CDE6B8CA3DBEB7,0x0C23647ED01D1119, + 0x7A362A3EA0592384,0xB61F40F3F1893F10,0x75D457D1440471DC,0x4558DA34237035B8, + 0xDCA6116587FC2043,0x8D9B67D3C9AB26D0,0x2B0B5C88EE0E2517,0x6FE77A382AB5DA90, + 0x269CC472D9D8FE31,0x63C41E46FAA8CB89,0xB7ABBC771642F52F,0x7D1DE4852F126F39, + 0xA8C6BA3024339BA0,0x600507D7CEE888C8,0x8FEE82C61A20AFAE,0x57A2448926D78011, + 0xFCA5E72836A458F0,0x072BCEBB8F4B4CBD,0x497BBE4AF36D24A1,0x3CAFE99BB769557D, + 0x12FA9EBD05A7B5A9,0xE8C04BAA5B836BDB,0x4273148FAC3B7905,0x908384812851C121, + 0xE557D3506C55B0FD,0x72FF996ACB4F3D61,0x3EDA0C8E64E2DC03,0xF0868356E6B949E9, + 0x04EAD72ABB0B0FFC,0x17A4B5135967706A,0xE3C8E16F04D5367F,0xF84F30028DAF570C, + 0x1846C8FCBD3A2232,0x5B8120F7F6CA9108,0xD46FA231ECEA3EA6,0x334D947453340725, + 0x58403966C28AD249,0xBED6F3A79A9F21F5,0x68CCB483A5FE962D,0xD085751B57E1315A, + 0xFED0023DE52FD18E,0x4B0E5B5F20E6ADDF,0x1A332DE96EB1AB4C,0xA3CE10F57B65C604, + 0x108F7BA8D62C3CD7,0xAB07A3A11073D8E1,0x6B0DAD1291BED56C,0xF2F366433532C097, + 0x2E557726B2CEE0D4,0x0000000000000000,0xCB02A476DE9B5029,0xE4E32FD48B9E7AC2, + 0x734B65EE2C84F75E,0x6E5386BCCD7E10AF,0x01B4FC84E7CBCA3F,0xCFE8735C65905FD5, + 0x3613BFDA0FF4C2E6,0x113B872C31E7F6E8,0x2FE18BA255052AEB,0xE974B72EBC48A1E4, + 0x0ABC5641B89D979B,0xB46AA5E62202B66E,0x44EC26B0C4BBFF87,0xA6903B5B27A503C7, + 0x7F680190FC99E647,0x97A84A3AA71A8D9C,0xDD12EDE16037EA7C,0xC554251DDD0DC84E, + 0x88C54C7D956BE313,0x4D91696048662B5D,0xB08072CC9909B992,0xB5DE5962C5C97C51, + 0x81B803AD19B637C9,0xB2F597D94A8230EC,0x0B08AAC55F565DA4,0xF1327FD2017283D6, + 0xAD98919E78F35E63,0x6AB9519676751F53,0x24E921670A53774F,0xB9FD3D1C15D46D48, + 0x92F66194FBDA485F,0x5A35DC7311015B37,0xDED3F4705477A93D,0xC00A0EB381CD0D8D, + 0xBB88D809C65FE436,0x16104997BEACBA55,0x21B70AC95693B28C,0x59F4C5E225411876, + 0xD5DB5EB50B21F499,0x55D7A19CF55C096F,0xA97246B4C3F8519F,0x8552D487A2BD3835, + 0x54635D181297C350,0x23C2EFDC85183BF2,0x9F61F96ECC0C9379,0x534893A39DDC8FED, + 0x5EDF0B59AA0A54CB,0xAC2C6D1A9F38945C,0xD7AEBBA0D8AA7DE7,0x2ABFA00C09C5EF28, + 0xD84CC64F3CF72FBF,0x2003F64DB15878B3,0xA724C7DFC06EC9F8,0x069F323F68808682, + 0xCC296ACD51D01C94,0x055E2BAE5CC0C5C3,0x6270E2C21D6301B6,0x3B842720382219C0, + 0xD2F0900E846AB824,0x52FC6F277A1745D2,0xC6953C8CE94D8B0F,0xE009F8FE3095753E, + 0x655B2C7992284D0B,0x984A37D54347DFC4,0xEAB5AEBF8808E2A5,0x9A3FD2C090CC56BA, + 0x9CA0E0FFF84CD038,0x4C2595E4AFADE162,0xDF6708F4B3BC6302,0xBF620F237D54EBCA, + 0x93429D101C118260,0x097D4FD08CDDD4DA,0x8C2F9B572E60ECEF,0x708A7C7F18C4B41F, + 0x3A30DBA4DFE9D3FF,0x4006F19A7FB0F07B,0x5F6BF7DD4DC19EF4,0x1F6D064732716E8F, + 0xF9FBCC866A649D33,0x308C8DE567744464,0x8971B0F972A0292C,0xD61A47243F61B7D8, + 0xEFEB8511D4C82766,0x961CB6BE40D147A3,0xAAB35F25F7B812DE,0x76154E407044329D, + 0x513D76B64E570693,0xF3479AC7D2F90AA8,0x9B8B2E4477079C85,0x297EB99D3D85AC69 + }, + { + 0x7E37E62DFC7D40C3,0x776F25A4EE939E5B,0xE045C850DD8FB5AD,0x86ED5BA711FF1952, + 0xE91D0BD9CF616B35,0x37E0AB256E408FFB,0x9607F6C031025A7A,0x0B02F5E116D23C9D, + 0xF3D8486BFB50650C,0x621CFF27C40875F5,0x7D40CB71FA5FD34A,0x6DAA6616DAA29062, + 0x9F5F354923EC84E2,0xEC847C3DC507C3B3,0x025A3668043CE205,0xA8BF9E6C4DAC0B19, + 0xFA808BE2E9BEBB94,0xB5B99C5277C74FA3,0x78D9BC95F0397BCC,0xE332E50CDBAD2624, + 0xC74FCE129332797E,0x1729ECEB2EA709AB,0xC2D6B9F69954D1F8,0x5D898CBFBAB8551A, + 0x859A76FB17DD8ADB,0x1BE85886362F7FB5,0xF6413F8FF136CD8A,0xD3110FA5BBB7E35C, + 0x0A2FEED514CC4D11,0xE83010EDCD7F1AB9,0xA1E75DE55F42D581,0xEEDE4A55C13B21B6, + 0xF2F5535FF94E1480,0x0CC1B46D1888761E,0xBCE15FDB6529913B,0x2D25E8975A7181C2, + 0x71817F1CE2D7A554,0x2E52C5CB5C53124B,0xF9F7A6BEEF9C281D,0x9E722E7D21F2F56E, + 0xCE170D9B81DCA7E6,0x0E9B82051CB4941B,0x1E712F623C49D733,0x21E45CFA42F9F7DC, + 0xCB8E7A7F8BBA0F60,0x8E98831A010FB646,0x474CCF0D8E895B23,0xA99285584FB27A95, + 0x8CC2B57205335443,0x42D5B8E984EFF3A5,0x012D1B34021E718C,0x57A6626AAE74180B, + 0xFF19FC06E3D81312,0x35BA9D4D6A7C6DFE,0xC9D44C178F86ED65,0x506523E6A02E5288, + 0x03772D5C06229389,0x8B01F4FE0B691EC0,0xF8DABD8AED825991,0x4C4E3AEC985B67BE, + 0xB10DF0827FBF96A9,0x6A69279AD4F8DAE1,0xE78689DCD3D5FF2E,0x812E1A2B1FA553D1, + 0xFBAD90D6EBA0CA18,0x1AC543B234310E39,0x1604F7DF2CB97827,0xA6241C6951189F02, + 0x753513CCEAAF7C5E,0x64F2A59FC84C4EFA,0x247D2B1E489F5F5A,0xDB64D718AB474C48, + 0x79F4A7A1F2270A40,0x1573DA832A9BEBAE,0x3497867968621C72,0x514838D2A2302304, + 0xF0AF6537FD72F685,0x1D06023E3A6B44BA,0x678588C3CE6EDD73,0x66A893F7CC70ACFF, + 0xD4D24E29B5EDA9DF,0x3856321470EA6A6C,0x07C3418C0E5A4A83,0x2BCBB22F5635BACD, + 0x04B46CD00878D90A,0x06EE5AB80C443B0F,0x3B211F4876C8F9E5,0x0958C38912EEDE98, + 0xD14B39CDBF8B0159,0x397B292072F41BE0,0x87C0409313E168DE,0xAD26E98847CAA39F, + 0x4E140C849C6785BB,0xD5FF551DB7F3D853,0xA0CA46D15D5CA40D,0xCD6020C787FE346F, + 0x84B76DCF15C3FB57,0xDEFDA0FCA121E4CE,0x4B8D7B6096012D3D,0x9AC642AD298A2C64, + 0x0875D8BD10F0AF14,0xB357C6EA7B8374AC,0x4D6321D89A451632,0xEDA96709C719B23F, + 0xF76C24BBF328BC06,0xC662D526912C08F2,0x3CE25EC47892B366,0xB978283F6F4F39BD, + 0xC08C8F9E9D6833FD,0x4F3917B09E79F437,0x593DE06FB2C08C10,0xD6887841B1D14BDA, + 0x19B26EEE32139DB0,0xB494876675D93E2F,0x825937771987C058,0x90E9AC783D466175, + 0xF1827E03FF6C8709,0x945DC0A8353EB87F,0x4516F9658AB5B926,0x3F9573987EB020EF, + 0xB855330B6D514831,0x2AE6A91B542BCB41,0x6331E413C6160479,0x408F8E8180D311A0, + 0xEFF35161C325503A,0xD06622F9BD9570D5,0x8876D9A20D4B8D49,0xA5533135573A0C8B, + 0xE168D364DF91C421,0xF41B09E7F50A2F8F,0x12B09B0F24C1A12D,0xDA49CC2CA9593DC4, + 0x1F5C34563E57A6BF,0x54D14F36A8568B82,0xAF7CDFE043F6419A,0xEA6A2685C943F8BC, + 0xE5DCBFB4D7E91D2B,0xB27ADDDE799D0520,0x6B443CAED6E6AB6D,0x7BAE91C9F61BE845, + 0x3EB868AC7CAE5163,0x11C7B65322E332A4,0xD23C1491B9A992D0,0x8FB5982E0311C7CA, + 0x70AC6428E0C9D4D8,0x895BC2960F55FCC5,0x76423E90EC8DEFD7,0x6FF0507EDE9E7267, + 0x3DCF45F07A8CC2EA,0x4AA06054941F5CB1,0x5810FB5BB0DEFD9C,0x5EFEA1E3BC9AC693, + 0x6EDD4B4ADC8003EB,0x741808F8E8B10DD2,0x145EC1B728859A22,0x28BC9F7350172944, + 0x270A06424EBDCCD3,0x972AEDF4331C2BF6,0x059977E40A66A886,0x2550302A4A812ED6, + 0xDD8A8DA0A7037747,0xC515F87A970E9B7B,0x3023EAA9601AC578,0xB7E3AA3A73FBADA6, + 0x0FB699311EAAE597,0x0000000000000000,0x310EF19D6204B4F4,0x229371A644DB6455, + 0x0DECAF591A960792,0x5CA4978BB8A62496,0x1C2B190A38753536,0x41A295B582CD602C, + 0x3279DCC16426277D,0xC1A194AA9F764271,0x139D803B26DFD0A1,0xAE51C4D441E83016, + 0xD813FA44AD65DFC1,0xAC0BF2BC45D4D213,0x23BE6A9246C515D9,0x49D74D08923DCF38, + 0x9D05032127D066E7,0x2F7FDEFF5E4D63C7,0xA47E2A0155247D07,0x99B16FF12FA8BFED, + 0x4661D4398C972AAF,0xDFD0BBC8A33F9542,0xDCA79694A51D06CB,0xB020EBB67DA1E725, + 0xBA0F0563696DAA34,0xE4F1A480D5F76CA7,0xC438E34E9510EAF7,0x939E81243B64F2FC, + 0x8DEFAE46072D25CF,0x2C08F3A3586FF04E,0xD7A56375B3CF3A56,0x20C947CE40E78650, + 0x43F8A3DD86F18229,0x568B795EAC6A6987,0x8003011F1DBB225D,0xF53612D3F7145E03, + 0x189F75DA300DEC3C,0x9570DB9C3720C9F3,0xBB221E576B73DBB8,0x72F65240E4F536DD, + 0x443BE25188ABC8AA,0xE21FFE38D9B357A8,0xFD43CA6EE7E4F117,0xCAA3614B89A47EEC, + 0xFE34E732E1C6629E,0x83742C431B99B1D4,0xCF3A16AF83C2D66A,0xAAE5A8044990E91C, + 0x26271D764CA3BD5F,0x91C4B74C3F5810F9,0x7C6DD045F841A2C6,0x7F1AFD19FE63314F, + 0xC8F957238D989CE9,0xA709075D5306EE8E,0x55FC5402AA48FA0E,0x48FA563C9023BEB4, + 0x65DFBEABCA523F76,0x6C877D22D8BCE1EE,0xCC4D3BF385E045E3,0xBEBB69B36115733E, + 0x10EAAD6720FD4328,0xB6CEB10E71E5DC2A,0xBDCC44EF6737E0B7,0x523F158EA412B08D, + 0x989C74C52DB6CE61,0x9BEB59992B945DE8,0x8A2CEFCA09776F4C,0xA3BD6B8D5B7E3784, + 0xEB473DB1CB5D8930,0xC3FBA2C29B4AA074,0x9C28181525CE176B,0x683311F2D0C438E4, + 0x5FD3BAD7BE84B71F,0xFC6ED15AE5FA809B,0x36CDB0116C5EFE77,0x29918447520958C8, + 0xA29070B959604608,0x53120EBAA60CC101,0x3A0C047C74D68869,0x691E0AC6D2DA4968, + 0x73DB4974E6EB4751,0x7A838AFDF40599C9,0x5A4ACD33B4E21F99,0x6046C94FC03497F0, + 0xE6AB92E8D1CB8EA2,0x3354C7F5663856F1,0xD93EE170AF7BAE4D,0x616BD27BC22AE67C, + 0x92B39A10397A8370,0xABC8B3304B8E9890,0xBF967287630B02B2,0x5B67D607B6FC6E15 + }, + { + 0xD031C397CE553FE6,0x16BA5B01B006B525,0xA89BADE6296E70C8,0x6A1F525D77D3435B, + 0x6E103570573DFA0B,0x660EFB2A17FC95AB,0x76327A9E97634BF6,0x4BAD9D6462458BF5, + 0xF1830CAEDBC3F748,0xC5C8F542669131FF,0x95044A1CDC48B0CB,0x892962DF3CF8B866, + 0xB0B9E208E930C135,0xA14FB3F0611A767C,0x8D2605F21C160136,0xD6B71922FECC549E, + 0x37089438A5907D8B,0x0B5DA38E5803D49C,0x5A5BCC9CEA6F3CBC,0xEDAE246D3B73FFE5, + 0xD2B87E0FDE22EDCE,0x5E54ABB1CA8185EC,0x1DE7F88FE80561B9,0xAD5E1A870135A08C, + 0x2F2ADBD665CECC76,0x5780B5A782F58358,0x3EDC8A2EEDE47B3F,0xC9D95C3506BEE70F, + 0x83BE111D6C4E05EE,0xA603B90959367410,0x103C81B4809FDE5D,0x2C69B6027D0C774A, + 0x399080D7D5C87953,0x09D41E16487406B4,0xCDD63B1826505E5F,0xF99DC2F49B0298E8, + 0x9CD0540A943CB67F,0xBCA84B7F891F17C5,0x723D1DB3B78DF2A6,0x78AA6E71E73B4F2E, + 0x1433E699A071670D,0x84F21BE454620782,0x98DF3327B4D20F2F,0xF049DCE2D3769E5C, + 0xDB6C60199656EB7A,0x648746B2078B4783,0x32CD23598DCBADCF,0x1EA4955BF0C7DA85, + 0xE9A143401B9D46B5,0xFD92A5D9BBEC21B8,0xC8138C790E0B8E1B,0x2EE00B9A6D7BA562, + 0xF85712B893B7F1FC,0xEB28FED80BEA949D,0x564A65EB8A40EA4C,0x6C9988E8474A2823, + 0x4535898B121D8F2D,0xABD8C03231ACCBF4,0xBA2E91CAB9867CBD,0x7960BE3DEF8E263A, + 0x0C11A977602FD6F0,0xCB50E1AD16C93527,0xEAE22E94035FFD89,0x2866D12F5DE2CE1A, + 0xFF1B1841AB9BF390,0x9F9339DE8CFE0D43,0x964727C8C48A0BF7,0x524502C6AAAE531C, + 0x9B9C5EF3AC10B413,0x4FA2FA4942AB32A5,0x3F165A62E551122B,0xC74148DA76E6E3D7, + 0x924840E5E464B2A7,0xD372AE43D69784DA,0x233B72A105E11A86,0xA48A04914941A638, + 0xB4B68525C9DE7865,0xDDEABAACA6CF8002,0x0A9773C250B6BD88,0xC284FFBB5EBD3393, + 0x8BA0DF472C8F6A4E,0x2AEF6CB74D951C32,0x427983722A318D41,0x73F7CDFFBF389BB2, + 0x074C0AF9382C026C,0x8A6A0F0B243A035A,0x6FDAE53C5F88931F,0xC68B98967E538AC3, + 0x44FF59C71AA8E639,0xE2FCE0CE439E9229,0xA20CDE2479D8CD40,0x19E89FA2C8EBD8E9, + 0xF446BBCFF398270C,0x43B3533E2284E455,0xD82F0DCD8E945046,0x51066F12B26CE820, + 0xE73957AF6BC5426D,0x081ECE5A40C16FA0,0x3B193D4FC5BFAB7B,0x7FE66488DF174D42, + 0x0E9814EF705804D8,0x8137AC857C39D7C6,0xB1733244E185A821,0x695C3F896F11F867, + 0xF6CF0657E3EFF524,0x1AABF276D02963D5,0x2DA3664E75B91E5E,0x0289BD981077D228, + 0x90C1FD7DF413608F,0x3C5537B6FD93A917,0xAA12107E3919A2E0,0x0686DAB530996B78, + 0xDAA6B0559EE3826E,0xC34E2FF756085A87,0x6D5358A44FFF4137,0xFC587595B35948AC, + 0x7CA5095CC7D5F67E,0xFB147F6C8B754AC0,0xBFEB26AB91DDACF9,0x6896EFC567A49173, + 0xCA9A31E11E7C5C33,0xBBE44186B13315A9,0x0DDB793B689ABFE4,0x70B4A02BA7FA208E, + 0xE47A3A7B7307F951,0x8CECD5BE14A36822,0xEEED49B923B144D9,0x17708B4DB8B3DC31, + 0x6088219F2765FED3,0xB3FA8FDCF1F27A09,0x910B2D31FCA6099B,0x0F52C4A378ED6DCC, + 0x50CCBF5EBAD98134,0x6BD582117F662A4F,0x94CE9A50D4FDD9DF,0x2B25BCFB45207526, + 0x67C42B661F49FCBF,0x492420FC723259DD,0x03436DD418C2BB3C,0x1F6E4517F872B391, + 0xA08563BC69AF1F68,0xD43EA4BAEEBB86B6,0x01CAD04C08B56914,0xAC94CACB0980C998, + 0x54C3D8739A373864,0x26FEC5C02DBACAC2,0xDEA9D778BE0D3B3E,0x040F672D20EEB950, + 0xE5B0EA377BB29045,0xF30AB136CBB42560,0x62019C0737122CFB,0xE86B930C13282FA1, + 0xCC1CEB542EE5374B,0x538FD28AA21B3A08,0x1B61223AD89C0AC1,0x36C24474AD25149F, + 0x7A23D3E9F74C9D06,0xBE21F6E79968C5ED,0xCF5F868036278C77,0xF705D61BEB5A9C30, + 0x4D2B47D152DCE08D,0x5F9E7BFDC234ECF8,0x247778583DCD18EA,0x867BA67C4415D5AA, + 0x4CE1979D5A698999,0x0000000000000000,0xEC64F42133C696F1,0xB57C5569C16B1171, + 0xC1C7926F467F88AF,0x654D96FE0F3E2E97,0x15F936D5A8C40E19,0xB8A72C52A9F1AE95, + 0xA9517DAA21DB19DC,0x58D27104FA18EE94,0x5918A148F2AD8780,0x5CDD1629DAF657C4, + 0x8274C15164FB6CFA,0xD1FB13DBC6E056F2,0x7D6FD910CF609F6A,0xB63F38BDD9A9AA4D, + 0x3D9FE7FAF526C003,0x74BBC706871499DE,0xDF630734B6B8522A,0x3AD3ED03CD0AC26F, + 0xFADEAF2083C023D4,0xC00D42234ECAE1BB,0x8538CBA85CD76E96,0xC402250E6E2458EB, + 0x47BC3413026A5D05,0xAFD7A71F114272A4,0x978DF784CC3F62E3,0xB96DFC1EA144C781, + 0x21B2CF391596C8AE,0x318E4E8D950916F3,0xCE9556CC3E92E563,0x385A509BDD7D1047, + 0x358129A0B5E7AFA3,0xE6F387E363702B79,0xE0755D5653E94001,0x7BE903A5FFF9F412, + 0x12B53C2C90E80C75,0x3307F315857EC4DB,0x8FAFB86A0C61D31E,0xD9E5DD8186213952, + 0x77F8AAD29FD622E2,0x25BDA814357871FE,0x7571174A8FA1F0CA,0x137FEC60985D6561, + 0x30449EC19DBC7FE7,0xA540D4DD41F4CF2C,0xDC206AE0AE7AE916,0x5B911CD0E2DA55A8, + 0xB2305F90F947131D,0x344BF9ECBD52C6B7,0x5D17C665D2433ED0,0x18224FEEC05EB1FD, + 0x9E59E992844B6457,0x9A568EBFA4A5DD07,0xA3C60E68716DA454,0x7E2CB4C4D7A22456, + 0x87B176304CA0BCBE,0x413AEEA632F3367D,0x9915E36BBC67663B,0x40F03EEA3A465F69, + 0x1C2D28C3E0B008AD,0x4E682A054A1E5BB1,0x05C5B761285BD044,0xE1BF8D1A5B5C2915, + 0xF2C0617AC3014C74,0xB7F5E8F1D11CC359,0x63CB4C4B3FA745EF,0x9D1A84469C89DF6B, + 0xE33630824B2BFB3D,0xD5F474F6E60EEFA2,0xF58C6B83FB2D4E18,0x4676E45F0ADF3411, + 0x20781F751D23A1BA,0xBD629B3381AA7ED1,0xAE1D775319F71BB0,0xFED1C80DA32E9A84, + 0x5509083F92825170,0x29AC01635557A70E,0xA7C9694551831D04,0x8E65682604D4BA0A, + 0x11F651F8882AB749,0xD77DC96EF6793D8A,0xEF2799F52B042DCD,0x48EEF0B07A8730C9, + 0x22F1A2ED0D547392,0x6142F1D32FD097C7,0x4A674D286AF0E2E1,0x80FD7CC9748CBED2, + 0x717E7067AF4F499A,0x938290A9ECD1DBB3,0x88E3B293344DD172,0x2734158C250FA3D6 + } +}; + +// Constant values for KeySchedule function +__constant const ulong CC[12][8] = { + { + C64e(0xB1085BDA1ECADAE9), + C64e(0xEBCB2F81C0657C1F), + C64e(0x2F6A76432E45D016), + C64e(0x714EB88D7585C4FC), + C64e(0x4B7CE09192676901), + C64e(0xA2422A08A460D315), + C64e(0x05767436CC744D23), + C64e(0xDD806559F2A64507) + }, + { + C64e(0x6FA3B58AA99D2F1A), + C64e(0x4FE39D460F70B5D7), + C64e(0xF3FEEA720A232B98), + C64e(0x61D55E0F16B50131), + C64e(0x9AB5176B12D69958), + C64e(0x5CB561C2DB0AA7CA), + C64e(0x55DDA21BD7CBCD56), + C64e(0xE679047021B19BB7) + }, + { + C64e(0xF574DCAC2BCE2FC7), + C64e(0x0A39FC286A3D8435), + C64e(0x06F15E5F529C1F8B), + C64e(0xF2EA7514B1297B7B), + C64e(0xD3E20FE490359EB1), + C64e(0xC1C93A376062DB09), + C64e(0xC2B6F443867ADB31), + C64e(0x991E96F50ABA0AB2) + }, + { + C64e(0xEF1FDFB3E81566D2), + C64e(0xF948E1A05D71E4DD), + C64e(0x488E857E335C3C7D), + C64e(0x9D721CAD685E353F), + C64e(0xA9D72C82ED03D675), + C64e(0xD8B71333935203BE), + C64e(0x3453EAA193E837F1), + C64e(0x220CBEBC84E3D12E) + }, + { + C64e(0x4BEA6BACAD474799), + C64e(0x9A3F410C6CA92363), + C64e(0x7F151C1F1686104A), + C64e(0x359E35D7800FFFBD), + C64e(0xBFCD1747253AF5A3), + C64e(0xDFFF00B723271A16), + C64e(0x7A56A27EA9EA63F5), + C64e(0x601758FD7C6CFE57) + }, + { + C64e(0xAE4FAEAE1D3AD3D9), + C64e(0x6FA4C33B7A3039C0), + C64e(0x2D66C4F95142A46C), + C64e(0x187F9AB49AF08EC6), + C64e(0xCFFAA6B71C9AB7B4), + C64e(0x0AF21F66C2BEC6B6), + C64e(0xBF71C57236904F35), + C64e(0xFA68407A46647D6E) + }, + { + C64e(0xF4C70E16EEAAC5EC), + C64e(0x51AC86FEBF240954), + C64e(0x399EC6C7E6BF87C9), + C64e(0xD3473E33197A93C9), + C64e(0x0992ABC52D822C37), + C64e(0x06476983284A0504), + C64e(0x3517454CA23C4AF3), + C64e(0x8886564D3A14D493) + }, + { + C64e(0x9B1F5B424D93C9A7), + C64e(0x03E7AA020C6E4141), + C64e(0x4EB7F8719C36DE1E), + C64e(0x89B4443B4DDBC49A), + C64e(0xF4892BCB929B0690), + C64e(0x69D18D2BD1A5C42F), + C64e(0x36ACC2355951A8D9), + C64e(0xA47F0DD4BF02E71E) + }, + { + C64e(0x378F5A541631229B), + C64e(0x944C9AD8EC165FDE), + C64e(0x3A7D3A1B25894224), + C64e(0x3CD955B7E00D0984), + C64e(0x800A440BDBB2CEB1), + C64e(0x7B2B8A9AA6079C54), + C64e(0x0E38DC92CB1F2A60), + C64e(0x7261445183235ADB) + }, + { + C64e(0xABBEDEA680056F52), + C64e(0x382AE548B2E4F3F3), + C64e(0x8941E71CFF8A78DB), + C64e(0x1FFFE18A1B336103), + C64e(0x9FE76702AF69334B), + C64e(0x7A1E6C303B7652F4), + C64e(0x3698FAD1153BB6C3), + C64e(0x74B4C7FB98459CED) + }, + { + C64e(0x7BCD9ED0EFC889FB), + C64e(0x3002C6CD635AFE94), + C64e(0xD8FA6BBBEBAB0761), + C64e(0x2001802114846679), + C64e(0x8A1D71EFEA48B9CA), + C64e(0xEFBACD1D7D476E98), + C64e(0xDEA2594AC06FD85D), + C64e(0x6BCAA4CD81F32D1B) + }, + { + C64e(0x378EE767F11631BA), + C64e(0xD21380B00449B17A), + C64e(0xCDA43C32BCDF1D77), + C64e(0xF82012D430219F9B), + C64e(0x5D80EF9D1891CC86), + C64e(0xE71DA4AA88E12852), + C64e(0xFAF417D5D9B21B99), + C64e(0x48BC924AF11BD720) + } +}; + + +#define ADDBYTE_8(A,B,C,i) t_addm = A[i] + B[i] + (t_addm >> 8); C[i] = t_addm & 0xFF; + +#define GOST_ADDMODULO512(A, B, C) do { \ + t_addm = 0; \ + ADDBYTE_8(A,B,C,63); \ + ADDBYTE_8(A,B,C,62); \ + ADDBYTE_8(A,B,C,61); \ + ADDBYTE_8(A,B,C,60); \ + ADDBYTE_8(A,B,C,59); \ + ADDBYTE_8(A,B,C,58); \ + ADDBYTE_8(A,B,C,57); \ + ADDBYTE_8(A,B,C,56); \ + ADDBYTE_8(A,B,C,55); \ + ADDBYTE_8(A,B,C,54); \ + ADDBYTE_8(A,B,C,53); \ + ADDBYTE_8(A,B,C,52); \ + ADDBYTE_8(A,B,C,51); \ + ADDBYTE_8(A,B,C,50); \ + ADDBYTE_8(A,B,C,49); \ + ADDBYTE_8(A,B,C,48); \ + ADDBYTE_8(A,B,C,47); \ + ADDBYTE_8(A,B,C,46); \ + ADDBYTE_8(A,B,C,45); \ + ADDBYTE_8(A,B,C,44); \ + ADDBYTE_8(A,B,C,43); \ + ADDBYTE_8(A,B,C,42); \ + ADDBYTE_8(A,B,C,41); \ + ADDBYTE_8(A,B,C,40); \ + ADDBYTE_8(A,B,C,39); \ + ADDBYTE_8(A,B,C,38); \ + ADDBYTE_8(A,B,C,37); \ + ADDBYTE_8(A,B,C,36); \ + ADDBYTE_8(A,B,C,35); \ + ADDBYTE_8(A,B,C,34); \ + ADDBYTE_8(A,B,C,33); \ + ADDBYTE_8(A,B,C,32); \ + ADDBYTE_8(A,B,C,31); \ + ADDBYTE_8(A,B,C,30); \ + ADDBYTE_8(A,B,C,29); \ + ADDBYTE_8(A,B,C,28); \ + ADDBYTE_8(A,B,C,27); \ + ADDBYTE_8(A,B,C,26); \ + ADDBYTE_8(A,B,C,25); \ + ADDBYTE_8(A,B,C,24); \ + ADDBYTE_8(A,B,C,23); \ + ADDBYTE_8(A,B,C,22); \ + ADDBYTE_8(A,B,C,21); \ + ADDBYTE_8(A,B,C,20); \ + ADDBYTE_8(A,B,C,19); \ + ADDBYTE_8(A,B,C,18); \ + ADDBYTE_8(A,B,C,17); \ + ADDBYTE_8(A,B,C,16); \ + ADDBYTE_8(A,B,C,15); \ + ADDBYTE_8(A,B,C,14); \ + ADDBYTE_8(A,B,C,13); \ + ADDBYTE_8(A,B,C,12); \ + ADDBYTE_8(A,B,C,11); \ + ADDBYTE_8(A,B,C,10); \ + ADDBYTE_8(A,B,C,9); \ + ADDBYTE_8(A,B,C,8); \ + ADDBYTE_8(A,B,C,7); \ + ADDBYTE_8(A,B,C,6); \ + ADDBYTE_8(A,B,C,5); \ + ADDBYTE_8(A,B,C,4); \ + ADDBYTE_8(A,B,C,3); \ + ADDBYTE_8(A,B,C,2); \ + ADDBYTE_8(A,B,C,1); \ + ADDBYTE_8(A,B,C,0); \ + } while (0) + +#define GOST_F(state, sph_state, lT) do {\ + r = 0;\ + r ^= lT[0][state[56]];\ + r ^= lT[1][state[48]];\ + r ^= lT[2][state[40]];\ + r ^= lT[3][state[32]];\ + r ^= lT[4][state[24]];\ + r ^= lT[5][state[16]];\ + r ^= lT[6][state[8]];\ + r ^= lT[7][state[0]];\ + *return_state = r;\ + r = 0;\ + \ + r ^= lT[0][state[57]];\ + r ^= lT[1][state[49]];\ + r ^= lT[2][state[41]];\ + r ^= lT[3][state[33]];\ + r ^= lT[4][state[25]];\ + r ^= lT[5][state[17]];\ + r ^= lT[6][state[9]];\ + r ^= lT[7][state[1]];\ + *(return_state+1) = r;\ + r = 0;\ + \ + r ^= lT[0][state[58]];\ + r ^= lT[1][state[50]];\ + r ^= lT[2][state[42]];\ + r ^= lT[3][state[34]];\ + r ^= lT[4][state[26]];\ + r ^= lT[5][state[18]];\ + r ^= lT[6][state[10]];\ + r ^= lT[7][state[2]];\ + *(return_state+2) = r;\ + r = 0;\ + \ + r ^= lT[0][state[59]];\ + r ^= lT[1][state[51]];\ + r ^= lT[2][state[43]];\ + r ^= lT[3][state[35]];\ + r ^= lT[4][state[27]];\ + r ^= lT[5][state[19]];\ + r ^= lT[6][state[11]];\ + r ^= lT[7][state[3]];\ + *(return_state+3) = r;\ + r = 0;\ + \ + r ^= lT[0][state[60]];\ + r ^= lT[1][state[52]];\ + r ^= lT[2][state[44]];\ + r ^= lT[3][state[36]];\ + r ^= lT[4][state[28]];\ + r ^= lT[5][state[20]];\ + r ^= lT[6][state[12]];\ + r ^= lT[7][state[4]];\ + *(return_state+4) = r;\ + r = 0;\ + \ + r ^= lT[0][state[61]];\ + r ^= lT[1][state[53]];\ + r ^= lT[2][state[45]];\ + r ^= lT[3][state[37]];\ + r ^= lT[4][state[29]];\ + r ^= lT[5][state[21]];\ + r ^= lT[6][state[13]];\ + r ^= lT[7][state[5]];\ + *(return_state+5) = r;\ + r = 0;\ + \ + r ^= lT[0][state[62]];\ + r ^= lT[1][state[54]];\ + r ^= lT[2][state[46]];\ + r ^= lT[3][state[38]];\ + r ^= lT[4][state[30]];\ + r ^= lT[5][state[22]];\ + r ^= lT[6][state[14]];\ + r ^= lT[7][state[6]];\ + *(return_state+6) = r;\ + r = 0;\ + \ + r ^= lT[0][state[63]];\ + r ^= lT[1][state[55]];\ + r ^= lT[2][state[47]];\ + r ^= lT[3][state[39]];\ + r ^= lT[4][state[31]];\ + r ^= lT[5][state[23]];\ + r ^= lT[6][state[15]];\ + r ^= lT[7][state[7]];\ + *(return_state+7) = r;\ + tmp = (sph_u64*)state;\ + state = (unsigned char*)return_state;\ + sph_state = (sph_u64*)state;\ + return_state = tmp;\ +} while(0) + +#define GOST_KeySchedule(K,ucK,i) GOST_XOR(K,CC[i],K); GOST_F(ucK,K,lT); + +#define GOST_E(K,m,state) do {\ + \ + GOST_XOR(m,K,state);\ + ucstate = (unsigned char*) state;\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,0);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,1);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,2);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT); \ + GOST_KeySchedule(K,ucK,3);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,4);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,5);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,6);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,7);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,8);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,9);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,10);\ + GOST_XOR(state,K,state);\ + \ + GOST_F(ucstate,state,lT);\ + GOST_KeySchedule(K,ucK,11);\ + GOST_XOR(state,K,state);\ +} while(0) + + +#define GOST_G_N(N,h,m) do {\ + NN = (sph_u64*) N;\ + hh = (sph_u64*) h;\ + mm = (sph_u64*) m;\ + \ + GOST_XOR(NN,hh,K);\ + \ + ucK = (unsigned char*)K;\ + \ + GOST_F(ucK, K, lT);\ + \ + GOST_E(K,mm,t);\ + \ + GOST_XOR(t,hh,t);\ + GOST_XOR(t,mm,hh);\ +} while(0) + + +#define GOST_HASH_X(message,out) do {\ + unsigned char v512[64] = {\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00\ + };\ + unsigned char v0[64] = {\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\ + };\ + unsigned char Sigma[64] = {\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\ + };\ + unsigned char N[64] = {\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\ + };\ + unsigned char hash[64] = {\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00\ + };\ + \ + unsigned char m[64];\ + unsigned char *msg = (unsigned char*) message;\ + unsigned char *outc = (unsigned char*) out;\ + \ + sph_u64 t_mem[8], K_mem[8];\ + sph_u64 *K, *t;\ + K = K_mem;\ + t = t_mem;\ + unsigned char *ucstate, *ucK;\ + const sph_u64 *NN;\ + sph_u64 *hh;\ + const sph_u64 *mm;\ + int t_addm=0;\ + \ + sph_u64 return_state_mem[8];\ + sph_u64 *return_state;\ + sph_u64 r;\ + sph_u64 *tmp;\ + return_state = return_state_mem;\ + \ + _Pragma("unroll") for (int i = 0; i < 64; i++) m[i] = msg[i]; \ + GOST_G_N(N,hash,m);\ + GOST_ADDMODULO512(N,v512,N);\ + GOST_ADDMODULO512(Sigma,m,Sigma);\ + \ + _Pragma("unroll") for (int i = 0; i < 64; i++) m[i] = 0; \ + m[63] = 1;\ + \ + GOST_G_N(N,hash,m);\ + v512[63] = 0 & 0xFF;\ + v512[62] = 0 >> 8;\ + GOST_ADDMODULO512(N,v512,N);\ + GOST_ADDMODULO512(Sigma,m,Sigma);\ + GOST_G_N(v0,hash,N);\ + GOST_G_N(v0,hash,Sigma);\ + _Pragma("unroll") for (int i = 0; i < 64; i++) outc[i] = hash[i]; \ +} while(0) + +#define GOST_HASH_512(message,out) do {\ + GOST_HASH_X(message,out);\ +} while(0) diff --git a/kernel/gost.cl b/kernel/gost.cl new file mode 100644 index 000000000..976d94ea6 --- /dev/null +++ b/kernel/gost.cl @@ -0,0 +1,1064 @@ +/* + * GOST R 34.10-2012 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#ifdef __cplusplus +extern "C"{ +#endif + +#define FULL_UNROLL 1 + +//-------------------------------------------------------------------------------------------- +// +// stribog implementation +// +//-------------------------------------------------------------------------------------------- + +#define memcpy(dst, src, len) do {\ + for (int i = 0; i < len; i++) {\ + *((unsigned char*)dst + i) = *((unsigned char*)src + i);\ + }\ +} while(0) + + +// Tables for function F + __constant static const sph_u64 T[8][256] = { + { + 0xE6F87E5C5B711FD0,0x258377800924FA16,0xC849E07E852EA4A8,0x5B4686A18F06C16A, + 0x0B32E9A2D77B416E,0xABDA37A467815C66,0xF61796A81A686676,0xF5DC0B706391954B, + 0x4862F38DB7E64BF1,0xFF5C629A68BD85C5,0xCB827DA6FCD75795,0x66D36DAF69B9F089, + 0x356C9F74483D83B0,0x7CBCECB1238C99A1,0x36A702AC31C4708D,0x9EB6A8D02FBCDFD6, + 0x8B19FA51E5B3AE37,0x9CCFB5408A127D0B,0xBC0C78B508208F5A,0xE533E3842288ECED, + 0xCEC2C7D377C15FD2,0xEC7817B6505D0F5E,0xB94CC2C08336871D,0x8C205DB4CB0B04AD, + 0x763C855B28A0892F,0x588D1B79F6FF3257,0x3FECF69E4311933E,0x0FC0D39F803A18C9, + 0xEE010A26F5F3AD83,0x10EFE8F4411979A6,0x5DCDA10C7DE93A10,0x4A1BEE1D1248E92C, + 0x53BFF2DB21847339,0xB4F50CCFA6A23D09,0x5FB4BC9CD84798CD,0xE88A2D8B071C56F9, + 0x7F7771695A756A9C,0xC5F02E71A0BA1EBC,0xA663F9AB4215E672,0x2EB19E22DE5FBB78, + 0x0DB9CE0F2594BA14,0x82520E6397664D84,0x2F031E6A0208EA98,0x5C7F2144A1BE6BF0, + 0x7A37CB1CD16362DB,0x83E08E2B4B311C64,0xCF70479BAB960E32,0x856BA986B9DEE71E, + 0xB5478C877AF56CE9,0xB8FE42885F61D6FD,0x1BDD0156966238C8,0x622157923EF8A92E, + 0xFC97FF42114476F8,0x9D7D350856452CEB,0x4C90C9B0E0A71256,0x2308502DFBCB016C, + 0x2D7A03FAA7A64845,0xF46E8B38BFC6C4AB,0xBDBEF8FDD477DEBA,0x3AAC4CEBC8079B79, + 0xF09CB105E8879D0C,0x27FA6A10AC8A58CB,0x8960E7C1401D0CEA,0x1A6F811E4A356928, + 0x90C4FB0773D196FF,0x43501A2F609D0A9F,0xF7A516E0C63F3796,0x1CE4A6B3B8DA9252, + 0x1324752C38E08A9B,0xA5A864733BEC154F,0x2BF124575549B33F,0xD766DB15440DC5C7, + 0xA7D179E39E42B792,0xDADF151A61997FD3,0x86A0345EC0271423,0x38D5517B6DA939A4, + 0x6518F077104003B4,0x02791D90A5AEA2DD,0x88D267899C4A5D0A,0x930F66DF0A2865C2, + 0x4EE9D4204509B08B,0x325538916685292A,0x412907BFC533A842,0xB27E2B62544DC673, + 0x6C5304456295E007,0x5AF406E95351908A,0x1F2F3B6BC123616F,0xC37B09DC5255E5C6, + 0x3967D133B1FE6844,0x298839C7F0E711E2,0x409B87F71964F9A2,0xE938ADC3DB4B0719, + 0x0C0B4E47F9C3EBF4,0x5534D576D36B8843,0x4610A05AEB8B02D8,0x20C3CDF58232F251, + 0x6DE1840DBEC2B1E7,0xA0E8DE06B0FA1D08,0x7B854B540D34333B,0x42E29A67BCCA5B7F, + 0xD8A6088AC437DD0E,0xC63BB3A9D943ED81,0x21714DBD5E65A3B1,0x6761EDE7B5EEA169, + 0x2431F7C8D573ABF6,0xD51FC685E1A3671A,0x5E063CD40410C92D,0x283AB98F2CB04002, + 0x8FEBC06CB2F2F790,0x17D64F116FA1D33C,0xE07359F1A99EE4AA,0x784ED68C74CDC006, + 0x6E2A19D5C73B42DA,0x8712B4161C7045C3,0x371582E4ED93216D,0xACE390414939F6FC, + 0x7EC5F12186223B7C,0xC0B094042BAC16FB,0xF9D745379A527EBF,0x737C3F2EA3B68168, + 0x33E7B8D9BAD278CA,0xA9A32A34C22FFEBB,0xE48163CCFEDFBD0D,0x8E5940246EA5A670, + 0x51C6EF4B842AD1E4,0x22BAD065279C508C,0xD91488C218608CEE,0x319EA5491F7CDA17, + 0xD394E128134C9C60,0x094BF43272D5E3B3,0x9BF612A5A4AAD791,0xCCBBDA43D26FFD0F, + 0x34DE1F3C946AD250,0x4F5B5468995EE16B,0xDF9FAF6FEA8F7794,0x2648EA5870DD092B, + 0xBFC7E56D71D97C67,0xDDE6B2FF4F21D549,0x3C276B463AE86003,0x91767B4FAF86C71F, + 0x68A13E7835D4B9A0,0xB68C115F030C9FD4,0x141DD2C916582001,0x983D8F7DDD5324AC, + 0x64AA703FCC175254,0xC2C989948E02B426,0x3E5E76D69F46C2DE,0x50746F03587D8004, + 0x45DB3D829272F1E5,0x60584A029B560BF3,0xFBAE58A73FFCDC62,0xA15A5E4E6CAD4CE8, + 0x4BA96E55CE1FB8CC,0x08F9747AAE82B253,0xC102144CF7FB471B,0x9F042898F3EB8E36, + 0x068B27ADF2EFFB7A,0xEDCA97FE8C0A5EBE,0x778E0513F4F7D8CF,0x302C2501C32B8BF7, + 0x8D92DDFC175C554D,0xF865C57F46052F5F,0xEAF3301BA2B2F424,0xAA68B7ECBBD60D86, + 0x998F0F350104754C,0x0000000000000000,0xF12E314D34D0CCEC,0x710522BE061823B5, + 0xAF280D9930C005C1,0x97FD5CE25D693C65,0x19A41CC633CC9A15,0x95844172F8C79EB8, + 0xDC5432B7937684A9,0x9436C13A2490CF58,0x802B13F332C8EF59,0xC442AE397CED4F5C, + 0xFA1CD8EFE3AB8D82,0xF2E5AC954D293FD1,0x6AD823E8907A1B7D,0x4D2249F83CF043B6, + 0x03CB9DD879F9F33D,0xDE2D2F2736D82674,0x2A43A41F891EE2DF,0x6F98999D1B6C133A, + 0xD4AD46CD3DF436FA,0xBB35DF50269825C0,0x964FDCAA813E6D85,0xEB41B0537EE5A5C4, + 0x0540BA758B160847,0xA41AE43BE7BB44AF,0xE3B8C429D0671797,0x819993BBEE9FBEB9, + 0xAE9A8DD1EC975421,0xF3572CDD917E6E31,0x6393D7DAE2AFF8CE,0x47A2201237DC5338, + 0xA32343DEC903EE35,0x79FC56C4A89A91E6,0x01B28048DC5751E0,0x1296F564E4B7DB7B, + 0x75F7188351597A12,0xDB6D9552BDCE2E33,0x1E9DBB231D74308F,0x520D7293FDD322D9, + 0xE20A44610C304677,0xFEEEE2D2B4EAD425,0xCA30FDEE20800675,0x61EACA4A47015A13, + 0xE74AFE1487264E30,0x2CC883B27BF119A5,0x1664CF59B3F682DC,0xA811AA7C1E78AF5B, + 0x1D5626FB648DC3B2,0xB73E9117DF5BCE34,0xD05F7CF06AB56F5D,0xFD257F0ACD132718, + 0x574DC8E676C52A9E,0x0739A7E52EB8AA9A,0x5486553E0F3CD9A3,0x56FF48AEAA927B7E, + 0xBE756525AD8E2D87,0x7D0E6CF9FFDBC841,0x3B1ECCA31450CA99,0x6913BE30E983E840, + 0xAD511009956EA71C,0xB1B5B6BA2DB4354E,0x4469BDCA4E25A005,0x15AF5281CA0F71E1, + 0x744598CB8D0E2BF2,0x593F9B312AA863B7,0xEFB38A6E29A4FC63,0x6B6AA3A04C2D4A9D, + 0x3D95EB0EE6BF31E3,0xA291C3961554BFD5,0x18169C8EEF9BCBF5,0x115D68BC9D4E2846, + 0xBA875F18FACF7420,0xD1EDFCB8B6E23EBD,0xB00736F2F1E364AE,0x84D929CE6589B6FE, + 0x70B7A2F6DA4F7255,0x0E7253D75C6D4929,0x04F23A3D574159A7,0x0A8069EA0B2C108E, + 0x49D073C56BB11A11,0x8AAB7A1939E4FFD7,0xCD095A0B0E38ACEF,0xC9FB60365979F548, + 0x92BDE697D67F3422,0xC78933E10514BC61,0xE1C1D9B975C9B54A,0xD2266160CF1BCD80, + 0x9A4492ED78FD8671,0xB3CCAB2A881A9793,0x72CEBF667FE1D088,0xD6D45B5D985A9427 + }, + { + 0xC811A8058C3F55DE,0x65F5B43196B50619,0xF74F96B1D6706E43,0x859D1E8BCB43D336, + 0x5AAB8A85CCFA3D84,0xF9C7BF99C295FCFD,0xA21FD5A1DE4B630F,0xCDB3EF763B8B456D, + 0x803F59F87CF7C385,0xB27C73BE5F31913C,0x98E3AC6633B04821,0xBF61674C26B8F818, + 0x0FFBC995C4C130C8,0xAAA0862010761A98,0x6057F342210116AA,0xF63C760C0654CC35, + 0x2DDB45CC667D9042,0xBCF45A964BD40382,0x68E8A0C3EF3C6F3D,0xA7BD92D269FF73BC, + 0x290AE20201ED2287,0xB7DE34CDE885818F,0xD901EEA7DD61059B,0xD6FA273219A03553, + 0xD56F1AE874CCCEC9,0xEA31245C2E83F554,0x7034555DA07BE499,0xCE26D2AC56E7BEF7, + 0xFD161857A5054E38,0x6A0E7DA4527436D1,0x5BD86A381CDE9FF2,0xCAF7756231770C32, + 0xB09AAED9E279C8D0,0x5DEF1091C60674DB,0x111046A2515E5045,0x23536CE4729802FC, + 0xC50CBCF7F5B63CFA,0x73A16887CD171F03,0x7D2941AFD9F28DBD,0x3F5E3EB45A4F3B9D, + 0x84EEFE361B677140,0x3DB8E3D3E7076271,0x1A3A28F9F20FD248,0x7EBC7C75B49E7627, + 0x74E5F293C7EB565C,0x18DCF59E4F478BA4,0x0C6EF44FA9ADCB52,0xC699812D98DAC760, + 0x788B06DC6E469D0E,0xFC65F8EA7521EC4E,0x30A5F7219E8E0B55,0x2BEC3F65BCA57B6B, + 0xDDD04969BAF1B75E,0x99904CDBE394EA57,0x14B201D1E6EA40F6,0xBBB0C08241284ADD, + 0x50F20463BF8F1DFF,0xE8D7F93B93CBACB8,0x4D8CB68E477C86E8,0xC1DD1B3992268E3F, + 0x7C5AA11209D62FCB,0x2F3D98ABDB35C9AE,0x671369562BFD5FF5,0x15C1E16C36CEE280, + 0x1D7EB2EDF8F39B17,0xDA94D37DB00DFE01,0x877BC3EC760B8ADA,0xCB8495DFE153AE44, + 0x05A24773B7B410B3,0x12857B783C32ABDF,0x8EB770D06812513B,0x536739B9D2E3E665, + 0x584D57E271B26468,0xD789C78FC9849725,0xA935BBFA7D1AE102,0x8B1537A3DFA64188, + 0xD0CD5D9BC378DE7A,0x4AC82C9A4D80CFB7,0x42777F1B83BDB620,0x72D2883A1D33BD75, + 0x5E7A2D4BAB6A8F41,0xF4DAAB6BBB1C95D9,0x905CFFE7FD8D31B6,0x83AA6422119B381F, + 0xC0AEFB8442022C49,0xA0F908C663033AE3,0xA428AF0804938826,0xADE41C341A8A53C7, + 0xAE7121EE77E6A85D,0xC47F5C4A25929E8C,0xB538E9AA55CDD863,0x06377AA9DAD8EB29, + 0xA18AE87BB3279895,0x6EDFDA6A35E48414,0x6B7D9D19825094A7,0xD41CFA55A4E86CBF, + 0xE5CAEDC9EA42C59C,0xA36C351C0E6FC179,0x5181E4DE6FABBF89,0xFFF0C530184D17D4, + 0x9D41EB1584045892,0x1C0D525028D73961,0xF178EC180CA8856A,0x9A0571018EF811CD, + 0x4091A27C3EF5EFCC,0x19AF15239F6329D2,0x347450EFF91EB990,0xE11B4A078DD27759, + 0xB9561DE5FC601331,0x912F1F5A2DA993C0,0x1654DCB65BA2191A,0x3E2DDE098A6B99EB, + 0x8A66D71E0F82E3FE,0x8C51ADB7D55A08D7,0x4533E50F8941FF7F,0x02E6DD67BD4859EC, + 0xE068AABA5DF6D52F,0xC24826E3FF4A75A5,0x6C39070D88ACDDF8,0x6486548C4691A46F, + 0xD1BEBD26135C7C0C,0xB30F93038F15334A,0x82D9849FC1BF9A69,0x9C320BA85420FAE4, + 0xFA528243AFF90767,0x9ED4D6CFE968A308,0xB825FD582C44B147,0x9B7691BC5EDCB3BB, + 0xC7EA619048FE6516,0x1063A61F817AF233,0x47D538683409A693,0x63C2CE984C6DED30, + 0x2A9FDFD86C81D91D,0x7B1E3B06032A6694,0x666089EBFBD9FD83,0x0A598EE67375207B, + 0x07449A140AFC495F,0x2CA8A571B6593234,0x1F986F8A45BBC2FB,0x381AA4A050B372C2, + 0x5423A3ADD81FAF3A,0x17273C0B8B86BB6C,0xFE83258DC869B5A2,0x287902BFD1C980F1, + 0xF5A94BD66B3837AF,0x88800A79B2CABA12,0x55504310083B0D4C,0xDF36940E07B9EEB2, + 0x04D1A7CE6790B2C5,0x612413FFF125B4DC,0x26F12B97C52C124F,0x86082351A62F28AC, + 0xEF93632F9937E5E7,0x3507B052293A1BE6,0xE72C30AE570A9C70,0xD3586041AE1425E0, + 0xDE4574B3D79D4CC4,0x92BA228040C5685A,0xF00B0CA5DC8C271C,0xBE1287F1F69C5A6E, + 0xF39E317FB1E0DC86,0x495D114020EC342D,0x699B407E3F18CD4B,0xDCA3A9D46AD51528, + 0x0D1D14F279896924,0x0000000000000000,0x593EB75FA196C61E,0x2E4E78160B116BD8, + 0x6D4AE7B058887F8E,0xE65FD013872E3E06,0x7A6DDBBBD30EC4E2,0xAC97FC89CAAEF1B1, + 0x09CCB33C1E19DBE1,0x89F3EAC462EE1864,0x7770CF49AA87ADC6,0x56C57ECA6557F6D6, + 0x03953DDA6D6CFB9A,0x36928D884456E07C,0x1EEB8F37959F608D,0x31D6179C4EAAA923, + 0x6FAC3AD7E5C02662,0x43049FA653991456,0xABD3669DC052B8EE,0xAF02C153A7C20A2B, + 0x3CCB036E3723C007,0x93C9C23D90E1CA2C,0xC33BC65E2F6ED7D3,0x4CFF56339758249E, + 0xB1E94E64325D6AA6,0x37E16D359472420A,0x79F8E661BE623F78,0x5214D90402C74413, + 0x482EF1FDF0C8965B,0x13F69BC5EC1609A9,0x0E88292814E592BE,0x4E198B542A107D72, + 0xCCC00FCBEBAFE71B,0x1B49C844222B703E,0x2564164DA840E9D5,0x20C6513E1FF4F966, + 0xBAC3203F910CE8AB,0xF2EDD1C261C47EF0,0x814CB945ACD361F3,0x95FEB8944A392105, + 0x5C9CF02C1622D6AD,0x971865F3F77178E9,0xBD87BA2B9BF0A1F4,0x444005B259655D09, + 0xED75BE48247FBC0B,0x7596122E17CFF42A,0xB44B091785E97A15,0x966B854E2755DA9F, + 0xEEE0839249134791,0x32432A4623C652B9,0xA8465B47AD3E4374,0xF8B45F2412B15E8B, + 0x2417F6F078644BA3,0xFB2162FE7FDDA511,0x4BBBCC279DA46DC1,0x0173E0BDD024A276, + 0x22208C59A2BCA08A,0x8FC4906DB836F34D,0xE4B90D743A6667EA,0x7147B5E0705F46EF, + 0x2782CB2A1508B039,0xEC065EF5F45B1E7D,0x21B5B183CFD05B10,0xDBE733C060295C77, + 0x9FA73672394C017E,0xCF55321186C31C81,0xD8720E1A0D45A7ED,0x3B8F997A3DDF8958, + 0x3AFC79C7EDFB2B2E,0xE9A4198643EF0ECE,0x5F09CDF67B4E2D37,0x4F6A6BE9FA34DF04, + 0xB6ADD47038A123F9,0x8D224D0A057EAAA1,0xC96248B85C1BF7A8,0xE3FD9760309A2EB5, + 0x0B2A6E5BA351820D,0xEB42C4E1FEA75722,0x948D58299A1D8373,0x7FCF9CC864BAD451, + 0xA55B4FB5D4B72A50,0x08BF5381CE3D7997,0x46A6D8D5E42D04E5,0xD22B80FC7E308796, + 0x57B69E77B57354A0,0x3969441D8097D0B4,0x3330CAFBF3E2F0CF,0xE28E77DDE0BE8CC3, + 0x62B12E259C494F46,0xA6CE726FB9DBD1CA,0x41E242C1EED14DBA,0x76032FF47AA30FB0 + }, + { + 0x45B268A93ACDE4CC,0xAF7F0BE884549D08,0x048354B3C1468263,0x925435C2C80EFED2, + 0xEE4E37F27FDFFBA7,0x167A33920C60F14D,0xFB123B52EA03E584,0x4A0CAB53FDBB9007, + 0x9DEAF6380F788A19,0xCB48EC558F0CB32A,0xB59DC4B2D6FEF7E0,0xDCDBCA22F4F3ECB6, + 0x11DF5813549A9C40,0xE33FDEDF568ACED3,0xA0C1C8124322E9C3,0x07A56B8158FA6D0D, + 0x77279579B1E1F3DD,0xD9B18B74422AC004,0xB8EC2D9FFFABC294,0xF4ACF8A82D75914F, + 0x7BBF69B1EF2B6878,0xC4F62FAF487AC7E1,0x76CE809CC67E5D0C,0x6711D88F92E4C14C, + 0x627B99D9243DEDFE,0x234AA5C3DFB68B51,0x909B1F15262DBF6D,0x4F66EA054B62BCB5, + 0x1AE2CF5A52AA6AE8,0xBEA053FBD0CE0148,0xED6808C0E66314C9,0x43FE16CD15A82710, + 0xCD049231A06970F6,0xE7BC8A6C97CC4CB0,0x337CE835FCB3B9C0,0x65DEF2587CC780F3, + 0x52214EDE4132BB50,0x95F15E4390F493DF,0x870839625DD2E0F1,0x41313C1AFB8B66AF, + 0x91720AF051B211BC,0x477D427ED4EEA573,0x2E3B4CEEF6E3BE25,0x82627834EB0BCC43, + 0x9C03E3DD78E724C8,0x2877328AD9867DF9,0x14B51945E243B0F2,0x574B0F88F7EB97E2, + 0x88B6FA989AA4943A,0x19C4F068CB168586,0x50EE6409AF11FAEF,0x7DF317D5C04EABA4, + 0x7A567C5498B4C6A9,0xB6BBFB804F42188E,0x3CC22BCF3BC5CD0B,0xD04336EAAA397713, + 0xF02FAC1BEC33132C,0x2506DBA7F0D3488D,0xD7E65D6BF2C31A1E,0x5EB9B2161FF820F5, + 0x842E0650C46E0F9F,0x716BEB1D9E843001,0xA933758CAB315ED4,0x3FE414FDA2792265, + 0x27C9F1701EF00932,0x73A4C1CA70A771BE,0x94184BA6E76B3D0E,0x40D829FF8C14C87E, + 0x0FBEC3FAC77674CB,0x3616A9634A6A9572,0x8F139119C25EF937,0xF545ED4D5AEA3F9E, + 0xE802499650BA387B,0x6437E7BD0B582E22,0xE6559F89E053E261,0x80AD52E305288DFC, + 0x6DC55A23E34B9935,0xDE14E0F51AD0AD09,0xC6390578A659865E,0x96D7617109487CB1, + 0xE2D6CB3A21156002,0x01E915E5779FAED1,0xADB0213F6A77DCB7,0x9880B76EB9A1A6AB, + 0x5D9F8D248644CF9B,0xFD5E4536C5662658,0xF1C6B9FE9BACBDFD,0xEACD6341BE9979C4, + 0xEFA7221708405576,0x510771ECD88E543E,0xC2BA51CB671F043D,0x0AD482AC71AF5879, + 0xFE787A045CDAC936,0xB238AF338E049AED,0xBD866CC94972EE26,0x615DA6EBBD810290, + 0x3295FDD08B2C1711,0xF834046073BF0AEA,0xF3099329758FFC42,0x1CAEB13E7DCFA934, + 0xBA2307481188832B,0x24EFCE42874CE65C,0x0E57D61FB0E9DA1A,0xB3D1BAD6F99B343C, + 0xC0757B1C893C4582,0x2B510DB8403A9297,0x5C7698C1F1DB614A,0x3E0D0118D5E68CB4, + 0xD60F488E855CB4CF,0xAE961E0DF3CB33D9,0x3A8E55AB14A00ED7,0x42170328623789C1, + 0x838B6DD19C946292,0x895FEF7DED3B3AEB,0xCFCBB8E64E4A3149,0x064C7E642F65C3DC, + 0x3D2B3E2A4C5A63DA,0x5BD3F340A9210C47,0xB474D157A1615931,0xAC5934DA1DE87266, + 0x6EE365117AF7765B,0xC86ED36716B05C44,0x9BA6885C201D49C5,0xB905387A88346C45, + 0x131072C4BAB9DDFF,0xBF49461EA751AF99,0xD52977BC1CE05BA1,0xB0F785E46027DB52, + 0x546D30BA6E57788C,0x305AD707650F56AE,0xC987C682612FF295,0xA5AB8944F5FBC571, + 0x7ED528E759F244CA,0x8DDCBBCE2C7DB888,0xAA154ABE328DB1BA,0x1E619BE993ECE88B, + 0x09F2BD9EE813B717,0x7401AA4B285D1CB3,0x21858F143195CAEE,0x48C381841398D1B8, + 0xFCB750D3B2F98889,0x39A86A998D1CE1B9,0x1F888E0CE473465A,0x7899568376978716, + 0x02CF2AD7EE2341BF,0x85C713B5B3F1A14E,0xFF916FE12B4567E7,0x7C1A0230B7D10575, + 0x0C98FCC85ECA9BA5,0xA3E7F720DA9E06AD,0x6A6031A2BBB1F438,0x973E74947ED7D260, + 0x2CF4663918C0FF9A,0x5F50A7F368678E24,0x34D983B4A449D4CD,0x68AF1B755592B587, + 0x7F3C3D022E6DEA1B,0xABFC5F5B45121F6B,0x0D71E92D29553574,0xDFFDF5106D4F03D8, + 0x081BA87B9F8C19C6,0xDB7EA1A3AC0981BB,0xBBCA12AD66172DFA,0x79704366010829C7, + 0x179326777BFF5F9C,0x0000000000000000,0xEB2476A4C906D715,0x724DD42F0738DF6F, + 0xB752EE6538DDB65F,0x37FFBC863DF53BA3,0x8EFA84FCB5C157E6,0xE9EB5C73272596AA, + 0x1B0BDABF2535C439,0x86E12C872A4D4E20,0x9969A28BCE3E087A,0xFAFB2EB79D9C4B55, + 0x056A4156B6D92CB2,0x5A3AE6A5DEBEA296,0x22A3B026A8292580,0x53C85B3B36AD1581, + 0xB11E900117B87583,0xC51F3A4A3FE56930,0xE019E1EDCF3621BD,0xEC811D2591FCBA18, + 0x445B7D4C4D524A1D,0xA8DA6069DCAEF005,0x58F5CC72309DE329,0xD4C062596B7FF570, + 0xCE22AD0339D59F98,0x591CD99747024DF8,0x8B90C5AA03187B54,0xF663D27FC356D0F0, + 0xD8589E9135B56ED5,0x35309651D3D67A1C,0x12F96721CD26732E,0xD28C1C3D441A36AC, + 0x492A946164077F69,0x2D1D73DC6F5F514B,0x6F0A70F40D68D88A,0x60B4B30ECA1EAC41, + 0xD36509D83385987D,0x0B3D97490630F6A8,0x9ECCC90A96C46577,0xA20EE2C5AD01A87C, + 0xE49AB55E0E70A3DE,0xA4429CA182646BA0,0xDA97B446DB962F6A,0xCCED87D4D7F6DE27, + 0x2AB8185D37A53C46,0x9F25DCEFE15BCBA6,0xC19C6EF9FEA3EB53,0xA764A3931BD884CE, + 0x2FD2590B817C10F4,0x56A21A6D80743933,0xE573A0BB79EF0D0F,0x155C0CA095DC1E23, + 0x6C2C4FC694D437E4,0x10364DF623053291,0xDD32DFC7836C4267,0x03263F3299BCEF6E, + 0x66F8CD6AE57B6F9D,0x8C35AE2B5BE21659,0x31B3C2E21290F87F,0x93BD2027BF915003, + 0x69460E90220D1B56,0x299E276FAE19D328,0x63928C3C53A2432F,0x7082FEF8E91B9ED0, + 0xBC6F792C3EED40F7,0x4C40D537D2DE53DB,0x75E8BFAE5FC2B262,0x4DA9C0D2A541FD0A, + 0x4E8FFFE03CFD1264,0x2620E495696FA7E3,0xE1F0F408B8A98F6C,0xD1AA230FDDA6D9C2, + 0xC7D0109DD1C6288F,0x8A79D04F7487D585,0x4694579BA3710BA2,0x38417F7CFA834F68, + 0x1D47A4DB0A5007E5,0x206C9AF1460A643F,0xA128DDF734BD4712,0x8144470672B7232D, + 0xF2E086CC02105293,0x182DE58DBC892B57,0xCAA1F9B0F8931DFB,0x6B892447CC2E5AE9, + 0xF9DD11850420A43B,0x4BE5BEB68A243ED6,0x5584255F19C8D65D,0x3B67404E633FA006, + 0xA68DB6766C472A1F,0xF78AC79AB4C97E21,0xC353442E1080AAEC,0x9A4F9DB95782E714 + }, + { + 0x05BA7BC82C9B3220,0x31A54665F8B65E4F,0xB1B651F77547F4D4,0x8BFA0D857BA46682, + 0x85A96C5AA16A98BB,0x990FAEF908EB79C9,0xA15E37A247F4A62D,0x76857DCD5D27741E, + 0xF8C50B800A1820BC,0xBE65DCB201F7A2B4,0x666D1B986F9426E7,0x4CC921BF53C4E648, + 0x95410A0F93D9CA42,0x20CDCCAA647BA4EF,0x429A4060890A1871,0x0C4EA4F69B32B38B, + 0xCCDA362DDE354CD3,0x96DC23BC7C5B2FA9,0xC309BB68AA851AB3,0xD26131A73648E013, + 0x021DC52941FC4DB2,0xCD5ADAB7704BE48A,0xA77965D984ED71E6,0x32386FD61734BBA4, + 0xE82D6DD538AB7245,0x5C2147EA6177B4B1,0x5DA1AB70CF091CE8,0xAC907FCE72B8BDFF, + 0x57C85DFD972278A8,0xA4E44C6A6B6F940D,0x3851995B4F1FDFE4,0x62578CCAED71BC9E, + 0xD9882BB0C01D2C0A,0x917B9D5D113C503B,0xA2C31E11A87643C6,0xE463C923A399C1CE, + 0xF71686C57EA876DC,0x87B4A973E096D509,0xAF0D567D9D3A5814,0xB40C2A3F59DCC6F4, + 0x3602F88495D121DD,0xD3E1DD3D9836484A,0xF945E71AA46688E5,0x7518547EB2A591F5, + 0x9366587450C01D89,0x9EA81018658C065B,0x4F54080CBC4603A3,0x2D0384C65137BF3D, + 0xDC325078EC861E2A,0xEA30A8FC79573FF7,0x214D2030CA050CB6,0x65F0322B8016C30C, + 0x69BE96DD1B247087,0xDB95EE9981E161B8,0xD1FC1814D9CA05F8,0x820ED2BBCC0DE729, + 0x63D76050430F14C7,0x3BCCB0E8A09D3A0F,0x8E40764D573F54A2,0x39D175C1E16177BD, + 0x12F5A37C734F1F4B,0xAB37C12F1FDFC26D,0x5648B167395CD0F1,0x6C04ED1537BF42A7, + 0xED97161D14304065,0x7D6C67DAAB72B807,0xEC17FA87BA4EE83C,0xDFAF79CB0304FBC1, + 0x733F060571BC463E,0x78D61C1287E98A27,0xD07CF48E77B4ADA1,0xB9C262536C90DD26, + 0xE2449B5860801605,0x8FC09AD7F941FCFB,0xFAD8CEA94BE46D0E,0xA343F28B0608EB9F, + 0x9B126BD04917347B,0x9A92874AE7699C22,0x1B017C42C4E69EE0,0x3A4C5C720EE39256, + 0x4B6E9F5E3EA399DA,0x6BA353F45AD83D35,0xE7FEE0904C1B2425,0x22D009832587E95D, + 0x842980C00F1430E2,0xC6B3C0A0861E2893,0x087433A419D729F2,0x341F3DADD42D6C6F, + 0xEE0A3FAEFBB2A58E,0x4AEE73C490DD3183,0xAAB72DB5B1A16A34,0xA92A04065E238FDF, + 0x7B4B35A1686B6FCC,0x6A23BF6EF4A6956C,0x191CB96B851AD352,0x55D598D4D6DE351A, + 0xC9604DE5F2AE7EF3,0x1CA6C2A3A981E172,0xDE2F9551AD7A5398,0x3025AAFF56C8F616, + 0x15521D9D1E2860D9,0x506FE31CFA45073A,0x189C55F12B647B0B,0x0180EC9AAE7EA859, + 0x7CEC8B40050C105E,0x2350E5198BF94104,0xEF8AD33455CC0DD7,0x07A7BEE16D677F92, + 0xE5E325B90DE76997,0x5A061591A26E637A,0xB611EF1618208B46,0x09F4DF3EB7A981AB, + 0x1EBB078AE87DACC0,0xB791038CB65E231F,0x0FD38D4574B05660,0x67EDF702C1EA8EBE, + 0xBA5F4BE0831238CD,0xE3C477C2CEFEBE5C,0x0DCE486C354C1BD2,0x8C5DB36416C31910, + 0x26EA9ED1A7627324,0x039D29B3EF82E5EB,0x9F28FC82CBF2AE02,0xA8AAE89CF05D2786, + 0x431AACFA2774B028,0xCF471F9E31B7A938,0x581BD0B8E3922EC8,0xBC78199B400BEF06, + 0x90FB71C7BF42F862,0x1F3BEB1046030499,0x683E7A47B55AD8DE,0x988F4263A695D190, + 0xD808C72A6E638453,0x0627527BC319D7CB,0xEBB04466D72997AE,0xE67E0C0AE2658C7C, + 0x14D2F107B056C880,0x7122C32C30400B8C,0x8A7AE11FD5DACEDB,0xA0DEDB38E98A0E74, + 0xAD109354DCC615A6,0x0BE91A17F655CC19,0x8DDD5FFEB8BDB149,0xBFE53028AF890AED, + 0xD65BA6F5B4AD7A6A,0x7956F0882997227E,0x10E8665532B352F9,0x0E5361DFDACEFE39, + 0xCEC7F3049FC90161,0xFF62B561677F5F2E,0x975CCF26D22587F0,0x51EF0F86543BAF63, + 0x2F1E41EF10CBF28F,0x52722635BBB94A88,0xAE8DBAE73344F04D,0x410769D36688FD9A, + 0xB3AB94DE34BBB966,0x801317928DF1AA9B,0xA564A0F0C5113C54,0xF131D4BEBDB1A117, + 0x7F71A2F3EA8EF5B5,0x40878549C8F655C3,0x7EF14E6944F05DEC,0xD44663DCF55137D8, + 0xF2ACFD0D523344FC,0x0000000000000000,0x5FBC6E598EF5515A,0x16CF342EF1AA8532, + 0xB036BD6DDB395C8D,0x13754FE6DD31B712,0xBBDFA77A2D6C9094,0x89E7C8AC3A582B30, + 0x3C6B0E09CDFA459D,0xC4AE0589C7E26521,0x49735A777F5FD468,0xCAFD64561D2C9B18, + 0xDA1502032F9FC9E1,0x8867243694268369,0x3782141E3BAF8984,0x9CB5D53124704BE9, + 0xD7DB4A6F1AD3D233,0xA6F989432A93D9BF,0x9D3539AB8A0EE3B0,0x53F2CAAF15C7E2D1, + 0x6E19283C76430F15,0x3DEBE2936384EDC4,0x5E3C82C3208BF903,0x33B8834CB94A13FD, + 0x6470DEB12E686B55,0x359FD1377A53C436,0x61CAA57902F35975,0x043A975282E59A79, + 0xFD7F70482683129C,0xC52EE913699CCD78,0x28B9FF0E7DAC8D1D,0x5455744E78A09D43, + 0xCB7D88CCB3523341,0x44BD121B4A13CFBA,0x4D49CD25FDBA4E11,0x3E76CB208C06082F, + 0x3FF627BA2278A076,0xC28957F204FBB2EA,0x453DFE81E46D67E3,0x94C1E6953DA7621B, + 0x2C83685CFF491764,0xF32C1197FC4DECA5,0x2B24D6BD922E68F6,0xB22B78449AC5113F, + 0x48F3B6EDD1217C31,0x2E9EAD75BEB55AD6,0x174FD8B45FD42D6B,0x4ED4E4961238ABFA, + 0x92E6B4EEFEBEB5D0,0x46A0D7320BEF8208,0x47203BA8A5912A51,0x24F75BF8E69E3E96, + 0xF0B1382413CF094E,0xFEE259FBC901F777,0x276A724B091CDB7D,0xBDF8F501EE75475F, + 0x599B3C224DEC8691,0x6D84018F99C1EAFE,0x7498B8E41CDB39AC,0xE0595E71217C5BB7, + 0x2AA43A273C50C0AF,0xF50B43EC3F543B6E,0x838E3E2162734F70,0xC09492DB4507FF58, + 0x72BFEA9FDFC2EE67,0x11688ACF9CCDFAA0,0x1A8190D86A9836B9,0x7ACBD93BC615C795, + 0xC7332C3A286080CA,0x863445E94EE87D50,0xF6966A5FD0D6DE85,0xE9AD814F96D5DA1C, + 0x70A22FB69E3EA3D5,0x0A69F68D582B6440,0xB8428EC9C2EE757F,0x604A49E3AC8DF12C, + 0x5B86F90B0C10CB23,0xE1D9B2EB8F02F3EE,0x29391394D3D22544,0xC8E0A17F5CD0D6AA, + 0xB58CC6A5F7A26EAD,0x8193FB08238F02C2,0xD5C68F465B2F9F81,0xFCFF9CD288FDBAC5, + 0x77059157F359DC47,0x1D262E3907FF492B,0xFB582233E59AC557,0xDDB2BCE242F8B673, + 0x2577B76248E096CF,0x6F99C4A6D83DA74C,0xC1147E41EB795701,0xF48BAF76912A9337 + }, + { + 0x3EF29D249B2C0A19,0xE9E16322B6F8622F,0x5536994047757F7A,0x9F4D56D5A47B0B33, + 0x822567466AA1174C,0xB8F5057DEB082FB2,0xCC48C10BF4475F53,0x373088D4275DEC3A, + 0x968F4325180AED10,0x173D232CF7016151,0xAE4ED09F946FCC13,0xFD4B4741C4539873, + 0x1B5B3F0DD9933765,0x2FFCB0967B644052,0xE02376D20A89840C,0xA3AE3A70329B18D7, + 0x419CBD2335DE8526,0xFAFEBF115B7C3199,0x0397074F85AA9B0D,0xC58AD4FB4836B970, + 0xBEC60BE3FC4104A8,0x1EFF36DC4B708772,0x131FDC33ED8453B6,0x0844E33E341764D3, + 0x0FF11B6EAB38CD39,0x64351F0A7761B85A,0x3B5694F509CFBA0E,0x30857084B87245D0, + 0x47AFB3BD2297AE3C,0xF2BA5C2F6F6B554A,0x74BDC4761F4F70E1,0xCFDFC64471EDC45E, + 0xE610784C1DC0AF16,0x7ACA29D63C113F28,0x2DED411776A859AF,0xAC5F211E99A3D5EE, + 0xD484F949A87EF33B,0x3CE36CA596E013E4,0xD120F0983A9D432C,0x6BC40464DC597563, + 0x69D5F5E5D1956C9E,0x9AE95F043698BB24,0xC9ECC8DA66A4EF44,0xD69508C8A5B2EAC6, + 0xC40C2235C0503B80,0x38C193BA8C652103,0x1CEEC75D46BC9E8F,0xD331011937515AD1, + 0xD8E2E56886ECA50F,0xB137108D5779C991,0x709F3B6905CA4206,0x4FEB50831680CAEF, + 0xEC456AF3241BD238,0x58D673AFE181ABBE,0x242F54E7CAD9BF8C,0x0211F1810DCC19FD, + 0x90BC4DBB0F43C60A,0x9518446A9DA0761D,0xA1BFCBF13F57012A,0x2BDE4F8961E172B5, + 0x27B853A84F732481,0xB0B1E643DF1F4B61,0x18CC38425C39AC68,0xD2B7F7D7BF37D821, + 0x3103864A3014C720,0x14AA246372ABFA5C,0x6E600DB54EBAC574,0x394765740403A3F3, + 0x09C215F0BC71E623,0x2A58B947E987F045,0x7B4CDF18B477BDD8,0x9709B5EB906C6FE0, + 0x73083C268060D90B,0xFEDC400E41F9037E,0x284948C6E44BE9B8,0x728ECAE808065BFB, + 0x06330E9E17492B1A,0x5950856169E7294E,0xBAE4F4FCE6C4364F,0xCA7BCF95E30E7449, + 0x7D7FD186A33E96C2,0x52836110D85AD690,0x4DFAA1021B4CD312,0x913ABB75872544FA, + 0xDD46ECB9140F1518,0x3D659A6B1E869114,0xC23F2CABD719109A,0xD713FE062DD46836, + 0xD0A60656B2FBC1DC,0x221C5A79DD909496,0xEFD26DBCA1B14935,0x0E77EDA0235E4FC9, + 0xCBFD395B6B68F6B9,0x0DE0EAEFA6F4D4C4,0x0422FF1F1A8532E7,0xF969B85EDED6AA94, + 0x7F6E2007AEF28F3F,0x3AD0623B81A938FE,0x6624EE8B7AADA1A7,0xB682E8DDC856607B, + 0xA78CC56F281E2A30,0xC79B257A45FAA08D,0x5B4174E0642B30B3,0x5F638BFF7EAE0254, + 0x4BC9AF9C0C05F808,0xCE59308AF98B46AE,0x8FC58DA9CC55C388,0x803496C7676D0EB1, + 0xF33CAAE1E70DD7BA,0xBB6202326EA2B4BF,0xD5020F87201871CB,0x9D5CA754A9B712CE, + 0x841669D87DE83C56,0x8A6184785EB6739F,0x420BBA6CB0741E2B,0xF12D5B60EAC1CE47, + 0x76AC35F71283691C,0x2C6BB7D9FECEDB5F,0xFCCDB18F4C351A83,0x1F79C012C3160582, + 0xF0ABADAE62A74CB7,0xE1A5801C82EF06FC,0x67A21845F2CB2357,0x5114665F5DF04D9D, + 0xBF40FD2D74278658,0xA0393D3FB73183DA,0x05A409D192E3B017,0xA9FB28CF0B4065F9, + 0x25A9A22942BF3D7C,0xDB75E22703463E02,0xB326E10C5AB5D06C,0xE7968E8295A62DE6, + 0xB973F3B3636EAD42,0xDF571D3819C30CE5,0xEE549B7229D7CBC5,0x12992AFD65E2D146, + 0xF8EF4E9056B02864,0xB7041E134030E28B,0xC02EDD2ADAD50967,0x932B4AF48AE95D07, + 0x6FE6FB7BC6DC4784,0x239AACB755F61666,0x401A4BEDBDB807D6,0x485EA8D389AF6305, + 0xA41BC220ADB4B13D,0x753B32B89729F211,0x997E584BB3322029,0x1D683193CEDA1C7F, + 0xFF5AB6C0C99F818E,0x16BBD5E27F67E3A1,0xA59D34EE25D233CD,0x98F8AE853B54A2D9, + 0x6DF70AFACB105E79,0x795D2E99B9BBA425,0x8E437B6744334178,0x0186F6CE886682F0, + 0xEBF092A3BB347BD2,0xBCD7FA62F18D1D55,0xADD9D7D011C5571E,0x0BD3E471B1BDFFDE, + 0xAA6C2F808EEAFEF4,0x5EE57D31F6C880A4,0xF50FA47FF044FCA0,0x1ADDC9C351F5B595, + 0xEA76646D3352F922,0x0000000000000000,0x85909F16F58EBEA6,0x46294573AAF12CCC, + 0x0A5512BF39DB7D2E,0x78DBD85731DD26D5,0x29CFBE086C2D6B48,0x218B5D36583A0F9B, + 0x152CD2ADFACD78AC,0x83A39188E2C795BC,0xC3B9DA655F7F926A,0x9ECBA01B2C1D89C3, + 0x07B5F8509F2FA9EA,0x7EE8D6C926940DCF,0x36B67E1AAF3B6ECA,0x86079859702425AB, + 0xFB7849DFD31AB369,0x4C7C57CC932A51E2,0xD96413A60E8A27FF,0x263EA566C715A671, + 0x6C71FC344376DC89,0x4A4F595284637AF8,0xDAF314E98B20BCF2,0x572768C14AB96687, + 0x1088DB7C682EC8BB,0x887075F9537A6A62,0x2E7A4658F302C2A2,0x619116DBE582084D, + 0xA87DDE018326E709,0xDCC01A779C6997E8,0xEDC39C3DAC7D50C8,0xA60A33A1A078A8C0, + 0xC1A82BE452B38B97,0x3F746BEA134A88E9,0xA228CCBEBAFD9A27,0xABEAD94E068C7C04, + 0xF48952B178227E50,0x5CF48CB0FB049959,0x6017E0156DE48ABD,0x4438B4F2A73D3531, + 0x8C528AE649FF5885,0xB515EF924DFCFB76,0x0C661C212E925634,0xB493195CC59A7986, + 0x9CDA519A21D1903E,0x32948105B5BE5C2D,0x194ACE8CD45F2E98,0x438D4CA238129CDB, + 0x9B6FA9CABEFE39D4,0x81B26009EF0B8C41,0xDED1EBF691A58E15,0x4E6DA64D9EE6481F, + 0x54B06F8ECF13FD8A,0x49D85E1D01C9E1F5,0xAFC826511C094EE3,0xF698A33075EE67AD, + 0x5AC7822EEC4DB243,0x8DD47C28C199DA75,0x89F68337DB1CE892,0xCDCE37C57C21DDA3, + 0x530597DE503C5460,0x6A42F2AA543FF793,0x5D727A7E73621BA9,0xE232875307459DF1, + 0x56A19E0FC2DFE477,0xC61DD3B4CD9C227D,0xE5877F03986A341B,0x949EB2A415C6F4ED, + 0x6206119460289340,0x6380E75AE84E11B0,0x8BE772B6D6D0F16F,0x50929091D596CF6D, + 0xE86795EC3E9EE0DF,0x7CF927482B581432,0xC86A3E14EEC26DB4,0x7119CDA78DACC0F6, + 0xE40189CD100CB6EB,0x92ADBC3A028FDFF7,0xB2A017C2D2D3529C,0x200DABF8D05C8D6B, + 0x34A78F9BA2F77737,0xE3B4719D8F231F01,0x45BE423C2F5BB7C1,0xF71E55FEFD88E55D, + 0x6853032B59F3EE6E,0x65B3E9C4FF073AAA,0x772AC3399AE5EBEC,0x87816E97F842A75B, + 0x110E2DB2E0484A4B,0x331277CB3DD8DEDD,0xBD510CAC79EB9FA5,0x352179552A91F5C7 + }, + { + 0x8AB0A96846E06A6D,0x43C7E80B4BF0B33A,0x08C9B3546B161EE5,0x39F1C235EBA990BE, + 0xC1BEF2376606C7B2,0x2C209233614569AA,0xEB01523B6FC3289A,0x946953AB935ACEDD, + 0x272838F63E13340E,0x8B0455ECA12BA052,0x77A1B2C4978FF8A2,0xA55122CA13E54086, + 0x2276135862D3F1CD,0xDB8DDFDE08B76CFE,0x5D1E12C89E4A178A,0x0E56816B03969867, + 0xEE5F79953303ED59,0xAFED748BAB78D71D,0x6D929F2DF93E53EE,0xF5D8A8F8BA798C2A, + 0xF619B1698E39CF6B,0x95DDAF2F749104E2,0xEC2A9C80E0886427,0xCE5C8FD8825B95EA, + 0xC4E0D9993AC60271,0x4699C3A5173076F9,0x3D1B151F50A29F42,0x9ED505EA2BC75946, + 0x34665ACFDC7F4B98,0x61B1FB53292342F7,0xC721C0080E864130,0x8693CD1696FD7B74, + 0x872731927136B14B,0xD3446C8A63A1721B,0x669A35E8A6680E4A,0xCAB658F239509A16, + 0xA4E5DE4EF42E8AB9,0x37A7435EE83F08D9,0x134E6239E26C7F96,0x82791A3C2DF67488, + 0x3F6EF00A8329163C,0x8E5A7E42FDEB6591,0x5CAAEE4C7981DDB5,0x19F234785AF1E80D, + 0x255DDDE3ED98BD70,0x50898A32A99CCCAC,0x28CA4519DA4E6656,0xAE59880F4CB31D22, + 0x0D9798FA37D6DB26,0x32F968F0B4FFCD1A,0xA00F09644F258545,0xFA3AD5175E24DE72, + 0xF46C547C5DB24615,0x713E80FBFF0F7E20,0x7843CF2B73D2AAFA,0xBD17EA36AEDF62B4, + 0xFD111BACD16F92CF,0x4ABAA7DBC72D67E0,0xB3416B5DAD49FAD3,0xBCA316B24914A88B, + 0x15D150068AECF914,0xE27C1DEBE31EFC40,0x4FE48C759BEDA223,0x7EDCFD141B522C78, + 0x4E5070F17C26681C,0xE696CAC15815F3BC,0x35D2A64B3BB481A7,0x800CFF29FE7DFDF6, + 0x1ED9FAC3D5BAA4B0,0x6C2663A91EF599D1,0x03C1199134404341,0xF7AD4DED69F20554, + 0xCD9D9649B61BD6AB,0xC8C3BDE7EADB1368,0xD131899FB02AFB65,0x1D18E352E1FAE7F1, + 0xDA39235AEF7CA6C1,0xA1BBF5E0A8EE4F7A,0x91377805CF9A0B1E,0x3138716180BF8E5B, + 0xD9F83ACBDB3CE580,0x0275E515D38B897E,0x472D3F21F0FBBCC6,0x2D946EB7868EA395, + 0xBA3C248D21942E09,0xE7223645BFDE3983,0xFF64FEB902E41BB1,0xC97741630D10D957, + 0xC3CB1722B58D4ECC,0xA27AEC719CAE0C3B,0x99FECB51A48C15FB,0x1465AC826D27332B, + 0xE1BD047AD75EBF01,0x79F733AF941960C5,0x672EC96C41A3C475,0xC27FEBA6524684F3, + 0x64EFD0FD75E38734,0xED9E60040743AE18,0xFB8E2993B9EF144D,0x38453EB10C625A81, + 0x6978480742355C12,0x48CF42CE14A6EE9E,0x1CAC1FD606312DCE,0x7B82D6BA4792E9BB, + 0x9D141C7B1F871A07,0x5616B80DC11C4A2E,0xB849C198F21FA777,0x7CA91801C8D9A506, + 0xB1348E487EC273AD,0x41B20D1E987B3A44,0x7460AB55A3CFBBE3,0x84E628034576F20A, + 0x1B87D16D897A6173,0x0FE27DEFE45D5258,0x83CDE6B8CA3DBEB7,0x0C23647ED01D1119, + 0x7A362A3EA0592384,0xB61F40F3F1893F10,0x75D457D1440471DC,0x4558DA34237035B8, + 0xDCA6116587FC2043,0x8D9B67D3C9AB26D0,0x2B0B5C88EE0E2517,0x6FE77A382AB5DA90, + 0x269CC472D9D8FE31,0x63C41E46FAA8CB89,0xB7ABBC771642F52F,0x7D1DE4852F126F39, + 0xA8C6BA3024339BA0,0x600507D7CEE888C8,0x8FEE82C61A20AFAE,0x57A2448926D78011, + 0xFCA5E72836A458F0,0x072BCEBB8F4B4CBD,0x497BBE4AF36D24A1,0x3CAFE99BB769557D, + 0x12FA9EBD05A7B5A9,0xE8C04BAA5B836BDB,0x4273148FAC3B7905,0x908384812851C121, + 0xE557D3506C55B0FD,0x72FF996ACB4F3D61,0x3EDA0C8E64E2DC03,0xF0868356E6B949E9, + 0x04EAD72ABB0B0FFC,0x17A4B5135967706A,0xE3C8E16F04D5367F,0xF84F30028DAF570C, + 0x1846C8FCBD3A2232,0x5B8120F7F6CA9108,0xD46FA231ECEA3EA6,0x334D947453340725, + 0x58403966C28AD249,0xBED6F3A79A9F21F5,0x68CCB483A5FE962D,0xD085751B57E1315A, + 0xFED0023DE52FD18E,0x4B0E5B5F20E6ADDF,0x1A332DE96EB1AB4C,0xA3CE10F57B65C604, + 0x108F7BA8D62C3CD7,0xAB07A3A11073D8E1,0x6B0DAD1291BED56C,0xF2F366433532C097, + 0x2E557726B2CEE0D4,0x0000000000000000,0xCB02A476DE9B5029,0xE4E32FD48B9E7AC2, + 0x734B65EE2C84F75E,0x6E5386BCCD7E10AF,0x01B4FC84E7CBCA3F,0xCFE8735C65905FD5, + 0x3613BFDA0FF4C2E6,0x113B872C31E7F6E8,0x2FE18BA255052AEB,0xE974B72EBC48A1E4, + 0x0ABC5641B89D979B,0xB46AA5E62202B66E,0x44EC26B0C4BBFF87,0xA6903B5B27A503C7, + 0x7F680190FC99E647,0x97A84A3AA71A8D9C,0xDD12EDE16037EA7C,0xC554251DDD0DC84E, + 0x88C54C7D956BE313,0x4D91696048662B5D,0xB08072CC9909B992,0xB5DE5962C5C97C51, + 0x81B803AD19B637C9,0xB2F597D94A8230EC,0x0B08AAC55F565DA4,0xF1327FD2017283D6, + 0xAD98919E78F35E63,0x6AB9519676751F53,0x24E921670A53774F,0xB9FD3D1C15D46D48, + 0x92F66194FBDA485F,0x5A35DC7311015B37,0xDED3F4705477A93D,0xC00A0EB381CD0D8D, + 0xBB88D809C65FE436,0x16104997BEACBA55,0x21B70AC95693B28C,0x59F4C5E225411876, + 0xD5DB5EB50B21F499,0x55D7A19CF55C096F,0xA97246B4C3F8519F,0x8552D487A2BD3835, + 0x54635D181297C350,0x23C2EFDC85183BF2,0x9F61F96ECC0C9379,0x534893A39DDC8FED, + 0x5EDF0B59AA0A54CB,0xAC2C6D1A9F38945C,0xD7AEBBA0D8AA7DE7,0x2ABFA00C09C5EF28, + 0xD84CC64F3CF72FBF,0x2003F64DB15878B3,0xA724C7DFC06EC9F8,0x069F323F68808682, + 0xCC296ACD51D01C94,0x055E2BAE5CC0C5C3,0x6270E2C21D6301B6,0x3B842720382219C0, + 0xD2F0900E846AB824,0x52FC6F277A1745D2,0xC6953C8CE94D8B0F,0xE009F8FE3095753E, + 0x655B2C7992284D0B,0x984A37D54347DFC4,0xEAB5AEBF8808E2A5,0x9A3FD2C090CC56BA, + 0x9CA0E0FFF84CD038,0x4C2595E4AFADE162,0xDF6708F4B3BC6302,0xBF620F237D54EBCA, + 0x93429D101C118260,0x097D4FD08CDDD4DA,0x8C2F9B572E60ECEF,0x708A7C7F18C4B41F, + 0x3A30DBA4DFE9D3FF,0x4006F19A7FB0F07B,0x5F6BF7DD4DC19EF4,0x1F6D064732716E8F, + 0xF9FBCC866A649D33,0x308C8DE567744464,0x8971B0F972A0292C,0xD61A47243F61B7D8, + 0xEFEB8511D4C82766,0x961CB6BE40D147A3,0xAAB35F25F7B812DE,0x76154E407044329D, + 0x513D76B64E570693,0xF3479AC7D2F90AA8,0x9B8B2E4477079C85,0x297EB99D3D85AC69 + }, + { + 0x7E37E62DFC7D40C3,0x776F25A4EE939E5B,0xE045C850DD8FB5AD,0x86ED5BA711FF1952, + 0xE91D0BD9CF616B35,0x37E0AB256E408FFB,0x9607F6C031025A7A,0x0B02F5E116D23C9D, + 0xF3D8486BFB50650C,0x621CFF27C40875F5,0x7D40CB71FA5FD34A,0x6DAA6616DAA29062, + 0x9F5F354923EC84E2,0xEC847C3DC507C3B3,0x025A3668043CE205,0xA8BF9E6C4DAC0B19, + 0xFA808BE2E9BEBB94,0xB5B99C5277C74FA3,0x78D9BC95F0397BCC,0xE332E50CDBAD2624, + 0xC74FCE129332797E,0x1729ECEB2EA709AB,0xC2D6B9F69954D1F8,0x5D898CBFBAB8551A, + 0x859A76FB17DD8ADB,0x1BE85886362F7FB5,0xF6413F8FF136CD8A,0xD3110FA5BBB7E35C, + 0x0A2FEED514CC4D11,0xE83010EDCD7F1AB9,0xA1E75DE55F42D581,0xEEDE4A55C13B21B6, + 0xF2F5535FF94E1480,0x0CC1B46D1888761E,0xBCE15FDB6529913B,0x2D25E8975A7181C2, + 0x71817F1CE2D7A554,0x2E52C5CB5C53124B,0xF9F7A6BEEF9C281D,0x9E722E7D21F2F56E, + 0xCE170D9B81DCA7E6,0x0E9B82051CB4941B,0x1E712F623C49D733,0x21E45CFA42F9F7DC, + 0xCB8E7A7F8BBA0F60,0x8E98831A010FB646,0x474CCF0D8E895B23,0xA99285584FB27A95, + 0x8CC2B57205335443,0x42D5B8E984EFF3A5,0x012D1B34021E718C,0x57A6626AAE74180B, + 0xFF19FC06E3D81312,0x35BA9D4D6A7C6DFE,0xC9D44C178F86ED65,0x506523E6A02E5288, + 0x03772D5C06229389,0x8B01F4FE0B691EC0,0xF8DABD8AED825991,0x4C4E3AEC985B67BE, + 0xB10DF0827FBF96A9,0x6A69279AD4F8DAE1,0xE78689DCD3D5FF2E,0x812E1A2B1FA553D1, + 0xFBAD90D6EBA0CA18,0x1AC543B234310E39,0x1604F7DF2CB97827,0xA6241C6951189F02, + 0x753513CCEAAF7C5E,0x64F2A59FC84C4EFA,0x247D2B1E489F5F5A,0xDB64D718AB474C48, + 0x79F4A7A1F2270A40,0x1573DA832A9BEBAE,0x3497867968621C72,0x514838D2A2302304, + 0xF0AF6537FD72F685,0x1D06023E3A6B44BA,0x678588C3CE6EDD73,0x66A893F7CC70ACFF, + 0xD4D24E29B5EDA9DF,0x3856321470EA6A6C,0x07C3418C0E5A4A83,0x2BCBB22F5635BACD, + 0x04B46CD00878D90A,0x06EE5AB80C443B0F,0x3B211F4876C8F9E5,0x0958C38912EEDE98, + 0xD14B39CDBF8B0159,0x397B292072F41BE0,0x87C0409313E168DE,0xAD26E98847CAA39F, + 0x4E140C849C6785BB,0xD5FF551DB7F3D853,0xA0CA46D15D5CA40D,0xCD6020C787FE346F, + 0x84B76DCF15C3FB57,0xDEFDA0FCA121E4CE,0x4B8D7B6096012D3D,0x9AC642AD298A2C64, + 0x0875D8BD10F0AF14,0xB357C6EA7B8374AC,0x4D6321D89A451632,0xEDA96709C719B23F, + 0xF76C24BBF328BC06,0xC662D526912C08F2,0x3CE25EC47892B366,0xB978283F6F4F39BD, + 0xC08C8F9E9D6833FD,0x4F3917B09E79F437,0x593DE06FB2C08C10,0xD6887841B1D14BDA, + 0x19B26EEE32139DB0,0xB494876675D93E2F,0x825937771987C058,0x90E9AC783D466175, + 0xF1827E03FF6C8709,0x945DC0A8353EB87F,0x4516F9658AB5B926,0x3F9573987EB020EF, + 0xB855330B6D514831,0x2AE6A91B542BCB41,0x6331E413C6160479,0x408F8E8180D311A0, + 0xEFF35161C325503A,0xD06622F9BD9570D5,0x8876D9A20D4B8D49,0xA5533135573A0C8B, + 0xE168D364DF91C421,0xF41B09E7F50A2F8F,0x12B09B0F24C1A12D,0xDA49CC2CA9593DC4, + 0x1F5C34563E57A6BF,0x54D14F36A8568B82,0xAF7CDFE043F6419A,0xEA6A2685C943F8BC, + 0xE5DCBFB4D7E91D2B,0xB27ADDDE799D0520,0x6B443CAED6E6AB6D,0x7BAE91C9F61BE845, + 0x3EB868AC7CAE5163,0x11C7B65322E332A4,0xD23C1491B9A992D0,0x8FB5982E0311C7CA, + 0x70AC6428E0C9D4D8,0x895BC2960F55FCC5,0x76423E90EC8DEFD7,0x6FF0507EDE9E7267, + 0x3DCF45F07A8CC2EA,0x4AA06054941F5CB1,0x5810FB5BB0DEFD9C,0x5EFEA1E3BC9AC693, + 0x6EDD4B4ADC8003EB,0x741808F8E8B10DD2,0x145EC1B728859A22,0x28BC9F7350172944, + 0x270A06424EBDCCD3,0x972AEDF4331C2BF6,0x059977E40A66A886,0x2550302A4A812ED6, + 0xDD8A8DA0A7037747,0xC515F87A970E9B7B,0x3023EAA9601AC578,0xB7E3AA3A73FBADA6, + 0x0FB699311EAAE597,0x0000000000000000,0x310EF19D6204B4F4,0x229371A644DB6455, + 0x0DECAF591A960792,0x5CA4978BB8A62496,0x1C2B190A38753536,0x41A295B582CD602C, + 0x3279DCC16426277D,0xC1A194AA9F764271,0x139D803B26DFD0A1,0xAE51C4D441E83016, + 0xD813FA44AD65DFC1,0xAC0BF2BC45D4D213,0x23BE6A9246C515D9,0x49D74D08923DCF38, + 0x9D05032127D066E7,0x2F7FDEFF5E4D63C7,0xA47E2A0155247D07,0x99B16FF12FA8BFED, + 0x4661D4398C972AAF,0xDFD0BBC8A33F9542,0xDCA79694A51D06CB,0xB020EBB67DA1E725, + 0xBA0F0563696DAA34,0xE4F1A480D5F76CA7,0xC438E34E9510EAF7,0x939E81243B64F2FC, + 0x8DEFAE46072D25CF,0x2C08F3A3586FF04E,0xD7A56375B3CF3A56,0x20C947CE40E78650, + 0x43F8A3DD86F18229,0x568B795EAC6A6987,0x8003011F1DBB225D,0xF53612D3F7145E03, + 0x189F75DA300DEC3C,0x9570DB9C3720C9F3,0xBB221E576B73DBB8,0x72F65240E4F536DD, + 0x443BE25188ABC8AA,0xE21FFE38D9B357A8,0xFD43CA6EE7E4F117,0xCAA3614B89A47EEC, + 0xFE34E732E1C6629E,0x83742C431B99B1D4,0xCF3A16AF83C2D66A,0xAAE5A8044990E91C, + 0x26271D764CA3BD5F,0x91C4B74C3F5810F9,0x7C6DD045F841A2C6,0x7F1AFD19FE63314F, + 0xC8F957238D989CE9,0xA709075D5306EE8E,0x55FC5402AA48FA0E,0x48FA563C9023BEB4, + 0x65DFBEABCA523F76,0x6C877D22D8BCE1EE,0xCC4D3BF385E045E3,0xBEBB69B36115733E, + 0x10EAAD6720FD4328,0xB6CEB10E71E5DC2A,0xBDCC44EF6737E0B7,0x523F158EA412B08D, + 0x989C74C52DB6CE61,0x9BEB59992B945DE8,0x8A2CEFCA09776F4C,0xA3BD6B8D5B7E3784, + 0xEB473DB1CB5D8930,0xC3FBA2C29B4AA074,0x9C28181525CE176B,0x683311F2D0C438E4, + 0x5FD3BAD7BE84B71F,0xFC6ED15AE5FA809B,0x36CDB0116C5EFE77,0x29918447520958C8, + 0xA29070B959604608,0x53120EBAA60CC101,0x3A0C047C74D68869,0x691E0AC6D2DA4968, + 0x73DB4974E6EB4751,0x7A838AFDF40599C9,0x5A4ACD33B4E21F99,0x6046C94FC03497F0, + 0xE6AB92E8D1CB8EA2,0x3354C7F5663856F1,0xD93EE170AF7BAE4D,0x616BD27BC22AE67C, + 0x92B39A10397A8370,0xABC8B3304B8E9890,0xBF967287630B02B2,0x5B67D607B6FC6E15 + }, + { + 0xD031C397CE553FE6,0x16BA5B01B006B525,0xA89BADE6296E70C8,0x6A1F525D77D3435B, + 0x6E103570573DFA0B,0x660EFB2A17FC95AB,0x76327A9E97634BF6,0x4BAD9D6462458BF5, + 0xF1830CAEDBC3F748,0xC5C8F542669131FF,0x95044A1CDC48B0CB,0x892962DF3CF8B866, + 0xB0B9E208E930C135,0xA14FB3F0611A767C,0x8D2605F21C160136,0xD6B71922FECC549E, + 0x37089438A5907D8B,0x0B5DA38E5803D49C,0x5A5BCC9CEA6F3CBC,0xEDAE246D3B73FFE5, + 0xD2B87E0FDE22EDCE,0x5E54ABB1CA8185EC,0x1DE7F88FE80561B9,0xAD5E1A870135A08C, + 0x2F2ADBD665CECC76,0x5780B5A782F58358,0x3EDC8A2EEDE47B3F,0xC9D95C3506BEE70F, + 0x83BE111D6C4E05EE,0xA603B90959367410,0x103C81B4809FDE5D,0x2C69B6027D0C774A, + 0x399080D7D5C87953,0x09D41E16487406B4,0xCDD63B1826505E5F,0xF99DC2F49B0298E8, + 0x9CD0540A943CB67F,0xBCA84B7F891F17C5,0x723D1DB3B78DF2A6,0x78AA6E71E73B4F2E, + 0x1433E699A071670D,0x84F21BE454620782,0x98DF3327B4D20F2F,0xF049DCE2D3769E5C, + 0xDB6C60199656EB7A,0x648746B2078B4783,0x32CD23598DCBADCF,0x1EA4955BF0C7DA85, + 0xE9A143401B9D46B5,0xFD92A5D9BBEC21B8,0xC8138C790E0B8E1B,0x2EE00B9A6D7BA562, + 0xF85712B893B7F1FC,0xEB28FED80BEA949D,0x564A65EB8A40EA4C,0x6C9988E8474A2823, + 0x4535898B121D8F2D,0xABD8C03231ACCBF4,0xBA2E91CAB9867CBD,0x7960BE3DEF8E263A, + 0x0C11A977602FD6F0,0xCB50E1AD16C93527,0xEAE22E94035FFD89,0x2866D12F5DE2CE1A, + 0xFF1B1841AB9BF390,0x9F9339DE8CFE0D43,0x964727C8C48A0BF7,0x524502C6AAAE531C, + 0x9B9C5EF3AC10B413,0x4FA2FA4942AB32A5,0x3F165A62E551122B,0xC74148DA76E6E3D7, + 0x924840E5E464B2A7,0xD372AE43D69784DA,0x233B72A105E11A86,0xA48A04914941A638, + 0xB4B68525C9DE7865,0xDDEABAACA6CF8002,0x0A9773C250B6BD88,0xC284FFBB5EBD3393, + 0x8BA0DF472C8F6A4E,0x2AEF6CB74D951C32,0x427983722A318D41,0x73F7CDFFBF389BB2, + 0x074C0AF9382C026C,0x8A6A0F0B243A035A,0x6FDAE53C5F88931F,0xC68B98967E538AC3, + 0x44FF59C71AA8E639,0xE2FCE0CE439E9229,0xA20CDE2479D8CD40,0x19E89FA2C8EBD8E9, + 0xF446BBCFF398270C,0x43B3533E2284E455,0xD82F0DCD8E945046,0x51066F12B26CE820, + 0xE73957AF6BC5426D,0x081ECE5A40C16FA0,0x3B193D4FC5BFAB7B,0x7FE66488DF174D42, + 0x0E9814EF705804D8,0x8137AC857C39D7C6,0xB1733244E185A821,0x695C3F896F11F867, + 0xF6CF0657E3EFF524,0x1AABF276D02963D5,0x2DA3664E75B91E5E,0x0289BD981077D228, + 0x90C1FD7DF413608F,0x3C5537B6FD93A917,0xAA12107E3919A2E0,0x0686DAB530996B78, + 0xDAA6B0559EE3826E,0xC34E2FF756085A87,0x6D5358A44FFF4137,0xFC587595B35948AC, + 0x7CA5095CC7D5F67E,0xFB147F6C8B754AC0,0xBFEB26AB91DDACF9,0x6896EFC567A49173, + 0xCA9A31E11E7C5C33,0xBBE44186B13315A9,0x0DDB793B689ABFE4,0x70B4A02BA7FA208E, + 0xE47A3A7B7307F951,0x8CECD5BE14A36822,0xEEED49B923B144D9,0x17708B4DB8B3DC31, + 0x6088219F2765FED3,0xB3FA8FDCF1F27A09,0x910B2D31FCA6099B,0x0F52C4A378ED6DCC, + 0x50CCBF5EBAD98134,0x6BD582117F662A4F,0x94CE9A50D4FDD9DF,0x2B25BCFB45207526, + 0x67C42B661F49FCBF,0x492420FC723259DD,0x03436DD418C2BB3C,0x1F6E4517F872B391, + 0xA08563BC69AF1F68,0xD43EA4BAEEBB86B6,0x01CAD04C08B56914,0xAC94CACB0980C998, + 0x54C3D8739A373864,0x26FEC5C02DBACAC2,0xDEA9D778BE0D3B3E,0x040F672D20EEB950, + 0xE5B0EA377BB29045,0xF30AB136CBB42560,0x62019C0737122CFB,0xE86B930C13282FA1, + 0xCC1CEB542EE5374B,0x538FD28AA21B3A08,0x1B61223AD89C0AC1,0x36C24474AD25149F, + 0x7A23D3E9F74C9D06,0xBE21F6E79968C5ED,0xCF5F868036278C77,0xF705D61BEB5A9C30, + 0x4D2B47D152DCE08D,0x5F9E7BFDC234ECF8,0x247778583DCD18EA,0x867BA67C4415D5AA, + 0x4CE1979D5A698999,0x0000000000000000,0xEC64F42133C696F1,0xB57C5569C16B1171, + 0xC1C7926F467F88AF,0x654D96FE0F3E2E97,0x15F936D5A8C40E19,0xB8A72C52A9F1AE95, + 0xA9517DAA21DB19DC,0x58D27104FA18EE94,0x5918A148F2AD8780,0x5CDD1629DAF657C4, + 0x8274C15164FB6CFA,0xD1FB13DBC6E056F2,0x7D6FD910CF609F6A,0xB63F38BDD9A9AA4D, + 0x3D9FE7FAF526C003,0x74BBC706871499DE,0xDF630734B6B8522A,0x3AD3ED03CD0AC26F, + 0xFADEAF2083C023D4,0xC00D42234ECAE1BB,0x8538CBA85CD76E96,0xC402250E6E2458EB, + 0x47BC3413026A5D05,0xAFD7A71F114272A4,0x978DF784CC3F62E3,0xB96DFC1EA144C781, + 0x21B2CF391596C8AE,0x318E4E8D950916F3,0xCE9556CC3E92E563,0x385A509BDD7D1047, + 0x358129A0B5E7AFA3,0xE6F387E363702B79,0xE0755D5653E94001,0x7BE903A5FFF9F412, + 0x12B53C2C90E80C75,0x3307F315857EC4DB,0x8FAFB86A0C61D31E,0xD9E5DD8186213952, + 0x77F8AAD29FD622E2,0x25BDA814357871FE,0x7571174A8FA1F0CA,0x137FEC60985D6561, + 0x30449EC19DBC7FE7,0xA540D4DD41F4CF2C,0xDC206AE0AE7AE916,0x5B911CD0E2DA55A8, + 0xB2305F90F947131D,0x344BF9ECBD52C6B7,0x5D17C665D2433ED0,0x18224FEEC05EB1FD, + 0x9E59E992844B6457,0x9A568EBFA4A5DD07,0xA3C60E68716DA454,0x7E2CB4C4D7A22456, + 0x87B176304CA0BCBE,0x413AEEA632F3367D,0x9915E36BBC67663B,0x40F03EEA3A465F69, + 0x1C2D28C3E0B008AD,0x4E682A054A1E5BB1,0x05C5B761285BD044,0xE1BF8D1A5B5C2915, + 0xF2C0617AC3014C74,0xB7F5E8F1D11CC359,0x63CB4C4B3FA745EF,0x9D1A84469C89DF6B, + 0xE33630824B2BFB3D,0xD5F474F6E60EEFA2,0xF58C6B83FB2D4E18,0x4676E45F0ADF3411, + 0x20781F751D23A1BA,0xBD629B3381AA7ED1,0xAE1D775319F71BB0,0xFED1C80DA32E9A84, + 0x5509083F92825170,0x29AC01635557A70E,0xA7C9694551831D04,0x8E65682604D4BA0A, + 0x11F651F8882AB749,0xD77DC96EF6793D8A,0xEF2799F52B042DCD,0x48EEF0B07A8730C9, + 0x22F1A2ED0D547392,0x6142F1D32FD097C7,0x4A674D286AF0E2E1,0x80FD7CC9748CBED2, + 0x717E7067AF4F499A,0x938290A9ECD1DBB3,0x88E3B293344DD172,0x2734158C250FA3D6 + } +}; + +// Constant values for KeySchedule function + __constant static const unsigned char CC[12][64] = { + { + 0xB1,0x08,0x5B,0xDA,0x1E,0xCA,0xDA,0xE9,0xEB,0xCB,0x2F,0x81,0xC0,0x65,0x7C,0x1F, + 0x2F,0x6A,0x76,0x43,0x2E,0x45,0xD0,0x16,0x71,0x4E,0xB8,0x8D,0x75,0x85,0xC4,0xFC, + 0x4B,0x7C,0xE0,0x91,0x92,0x67,0x69,0x01,0xA2,0x42,0x2A,0x08,0xA4,0x60,0xD3,0x15, + 0x05,0x76,0x74,0x36,0xCC,0x74,0x4D,0x23,0xDD,0x80,0x65,0x59,0xF2,0xA6,0x45,0x07 + }, + { + 0x6F,0xA3,0xB5,0x8A,0xA9,0x9D,0x2F,0x1A,0x4F,0xE3,0x9D,0x46,0x0F,0x70,0xB5,0xD7, + 0xF3,0xFE,0xEA,0x72,0x0A,0x23,0x2B,0x98,0x61,0xD5,0x5E,0x0F,0x16,0xB5,0x01,0x31, + 0x9A,0xB5,0x17,0x6B,0x12,0xD6,0x99,0x58,0x5C,0xB5,0x61,0xC2,0xDB,0x0A,0xA7,0xCA, + 0x55,0xDD,0xA2,0x1B,0xD7,0xCB,0xCD,0x56,0xE6,0x79,0x04,0x70,0x21,0xB1,0x9B,0xB7 + }, + { + 0xF5,0x74,0xDC,0xAC,0x2B,0xCE,0x2F,0xC7,0x0A,0x39,0xFC,0x28,0x6A,0x3D,0x84,0x35, + 0x06,0xF1,0x5E,0x5F,0x52,0x9C,0x1F,0x8B,0xF2,0xEA,0x75,0x14,0xB1,0x29,0x7B,0x7B, + 0xD3,0xE2,0x0F,0xE4,0x90,0x35,0x9E,0xB1,0xC1,0xC9,0x3A,0x37,0x60,0x62,0xDB,0x09, + 0xC2,0xB6,0xF4,0x43,0x86,0x7A,0xDB,0x31,0x99,0x1E,0x96,0xF5,0x0A,0xBA,0x0A,0xB2 + }, + { + 0xEF,0x1F,0xDF,0xB3,0xE8,0x15,0x66,0xD2,0xF9,0x48,0xE1,0xA0,0x5D,0x71,0xE4,0xDD, + 0x48,0x8E,0x85,0x7E,0x33,0x5C,0x3C,0x7D,0x9D,0x72,0x1C,0xAD,0x68,0x5E,0x35,0x3F, + 0xA9,0xD7,0x2C,0x82,0xED,0x03,0xD6,0x75,0xD8,0xB7,0x13,0x33,0x93,0x52,0x03,0xBE, + 0x34,0x53,0xEA,0xA1,0x93,0xE8,0x37,0xF1,0x22,0x0C,0xBE,0xBC,0x84,0xE3,0xD1,0x2E + }, + { + 0x4B,0xEA,0x6B,0xAC,0xAD,0x47,0x47,0x99,0x9A,0x3F,0x41,0x0C,0x6C,0xA9,0x23,0x63, + 0x7F,0x15,0x1C,0x1F,0x16,0x86,0x10,0x4A,0x35,0x9E,0x35,0xD7,0x80,0x0F,0xFF,0xBD, + 0xBF,0xCD,0x17,0x47,0x25,0x3A,0xF5,0xA3,0xDF,0xFF,0x00,0xB7,0x23,0x27,0x1A,0x16, + 0x7A,0x56,0xA2,0x7E,0xA9,0xEA,0x63,0xF5,0x60,0x17,0x58,0xFD,0x7C,0x6C,0xFE,0x57 + }, + { + 0xAE,0x4F,0xAE,0xAE,0x1D,0x3A,0xD3,0xD9,0x6F,0xA4,0xC3,0x3B,0x7A,0x30,0x39,0xC0, + 0x2D,0x66,0xC4,0xF9,0x51,0x42,0xA4,0x6C,0x18,0x7F,0x9A,0xB4,0x9A,0xF0,0x8E,0xC6, + 0xCF,0xFA,0xA6,0xB7,0x1C,0x9A,0xB7,0xB4,0x0A,0xF2,0x1F,0x66,0xC2,0xBE,0xC6,0xB6, + 0xBF,0x71,0xC5,0x72,0x36,0x90,0x4F,0x35,0xFA,0x68,0x40,0x7A,0x46,0x64,0x7D,0x6E + }, + { + 0xF4,0xC7,0x0E,0x16,0xEE,0xAA,0xC5,0xEC,0x51,0xAC,0x86,0xFE,0xBF,0x24,0x09,0x54, + 0x39,0x9E,0xC6,0xC7,0xE6,0xBF,0x87,0xC9,0xD3,0x47,0x3E,0x33,0x19,0x7A,0x93,0xC9, + 0x09,0x92,0xAB,0xC5,0x2D,0x82,0x2C,0x37,0x06,0x47,0x69,0x83,0x28,0x4A,0x05,0x04, + 0x35,0x17,0x45,0x4C,0xA2,0x3C,0x4A,0xF3,0x88,0x86,0x56,0x4D,0x3A,0x14,0xD4,0x93 + }, + { + 0x9B,0x1F,0x5B,0x42,0x4D,0x93,0xC9,0xA7,0x03,0xE7,0xAA,0x02,0x0C,0x6E,0x41,0x41, + 0x4E,0xB7,0xF8,0x71,0x9C,0x36,0xDE,0x1E,0x89,0xB4,0x44,0x3B,0x4D,0xDB,0xC4,0x9A, + 0xF4,0x89,0x2B,0xCB,0x92,0x9B,0x06,0x90,0x69,0xD1,0x8D,0x2B,0xD1,0xA5,0xC4,0x2F, + 0x36,0xAC,0xC2,0x35,0x59,0x51,0xA8,0xD9,0xA4,0x7F,0x0D,0xD4,0xBF,0x02,0xE7,0x1E + }, + { + 0x37,0x8F,0x5A,0x54,0x16,0x31,0x22,0x9B,0x94,0x4C,0x9A,0xD8,0xEC,0x16,0x5F,0xDE, + 0x3A,0x7D,0x3A,0x1B,0x25,0x89,0x42,0x24,0x3C,0xD9,0x55,0xB7,0xE0,0x0D,0x09,0x84, + 0x80,0x0A,0x44,0x0B,0xDB,0xB2,0xCE,0xB1,0x7B,0x2B,0x8A,0x9A,0xA6,0x07,0x9C,0x54, + 0x0E,0x38,0xDC,0x92,0xCB,0x1F,0x2A,0x60,0x72,0x61,0x44,0x51,0x83,0x23,0x5A,0xDB + }, + { + 0xAB,0xBE,0xDE,0xA6,0x80,0x05,0x6F,0x52,0x38,0x2A,0xE5,0x48,0xB2,0xE4,0xF3,0xF3, + 0x89,0x41,0xE7,0x1C,0xFF,0x8A,0x78,0xDB,0x1F,0xFF,0xE1,0x8A,0x1B,0x33,0x61,0x03, + 0x9F,0xE7,0x67,0x02,0xAF,0x69,0x33,0x4B,0x7A,0x1E,0x6C,0x30,0x3B,0x76,0x52,0xF4, + 0x36,0x98,0xFA,0xD1,0x15,0x3B,0xB6,0xC3,0x74,0xB4,0xC7,0xFB,0x98,0x45,0x9C,0xED + }, + { + 0x7B,0xCD,0x9E,0xD0,0xEF,0xC8,0x89,0xFB,0x30,0x02,0xC6,0xCD,0x63,0x5A,0xFE,0x94, + 0xD8,0xFA,0x6B,0xBB,0xEB,0xAB,0x07,0x61,0x20,0x01,0x80,0x21,0x14,0x84,0x66,0x79, + 0x8A,0x1D,0x71,0xEF,0xEA,0x48,0xB9,0xCA,0xEF,0xBA,0xCD,0x1D,0x7D,0x47,0x6E,0x98, + 0xDE,0xA2,0x59,0x4A,0xC0,0x6F,0xD8,0x5D,0x6B,0xCA,0xA4,0xCD,0x81,0xF3,0x2D,0x1B + }, + { + 0x37,0x8E,0xE7,0x67,0xF1,0x16,0x31,0xBA,0xD2,0x13,0x80,0xB0,0x04,0x49,0xB1,0x7A, + 0xCD,0xA4,0x3C,0x32,0xBC,0xDF,0x1D,0x77,0xF8,0x20,0x12,0xD4,0x30,0x21,0x9F,0x9B, + 0x5D,0x80,0xEF,0x9D,0x18,0x91,0xCC,0x86,0xE7,0x1D,0xA4,0xAA,0x88,0xE1,0x28,0x52, + 0xFA,0xF4,0x17,0xD5,0xD9,0xB2,0x1B,0x99,0x48,0xBC,0x92,0x4A,0xF1,0x1B,0xD7,0x20 + } +}; + + +inline void GOST_AddModulo512(const void *a,const void *b,void *c) +{ + const unsigned char *A=a, *B=b; + unsigned char *C=c; + int t = 0; +#define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF; +#ifdef FULL_UNROLL + ADDBYTE_8(63) + ADDBYTE_8(62) + ADDBYTE_8(61) + ADDBYTE_8(60) + ADDBYTE_8(59) + ADDBYTE_8(58) + ADDBYTE_8(57) + ADDBYTE_8(56) + ADDBYTE_8(55) + ADDBYTE_8(54) + ADDBYTE_8(53) + ADDBYTE_8(52) + ADDBYTE_8(51) + ADDBYTE_8(50) + ADDBYTE_8(49) + ADDBYTE_8(48) + ADDBYTE_8(47) + ADDBYTE_8(46) + ADDBYTE_8(45) + ADDBYTE_8(44) + ADDBYTE_8(43) + ADDBYTE_8(42) + ADDBYTE_8(41) + ADDBYTE_8(40) + ADDBYTE_8(39) + ADDBYTE_8(38) + ADDBYTE_8(37) + ADDBYTE_8(36) + ADDBYTE_8(35) + ADDBYTE_8(34) + ADDBYTE_8(33) + ADDBYTE_8(32) + ADDBYTE_8(31) + ADDBYTE_8(30) + ADDBYTE_8(29) + ADDBYTE_8(28) + ADDBYTE_8(27) + ADDBYTE_8(26) + ADDBYTE_8(25) + ADDBYTE_8(24) + ADDBYTE_8(23) + ADDBYTE_8(22) + ADDBYTE_8(21) + ADDBYTE_8(20) + ADDBYTE_8(19) + ADDBYTE_8(18) + ADDBYTE_8(17) + ADDBYTE_8(16) + ADDBYTE_8(15) + ADDBYTE_8(14) + ADDBYTE_8(13) + ADDBYTE_8(12) + ADDBYTE_8(11) + ADDBYTE_8(10) + ADDBYTE_8(9) + ADDBYTE_8(8) + ADDBYTE_8(7) + ADDBYTE_8(6) + ADDBYTE_8(5) + ADDBYTE_8(4) + ADDBYTE_8(3) + ADDBYTE_8(2) + ADDBYTE_8(1) + ADDBYTE_8(0) +#else + int i = 0; + for(i=63;i>=0;i--) + { + t = A[i] + B[i] + (t >> 8); + C[i] = t & 0xFF; + } +#endif +} + + +inline void GOST_AddXor512(const void *a, __constant const void *b, void *c) +{ + __constant const sph_u64 *B=b; + const sph_u64 *A=a; + sph_u64 *C=c; +#ifdef FULL_UNROLL + C[0] = A[0] ^ B[0]; + C[1] = A[1] ^ B[1]; + C[2] = A[2] ^ B[2]; + C[3] = A[3] ^ B[3]; + C[4] = A[4] ^ B[4]; + C[5] = A[5] ^ B[5]; + C[6] = A[6] ^ B[6]; + C[7] = A[7] ^ B[7]; +#else + int i = 0; + for(i=0;i<8;i++) + { + C[i] = A[i] ^ B[i]; + } +#endif +} + +inline void GOST_AddXor512_(const void *a, const void *b, void *c) +{ + const sph_u64 *B=b; + const sph_u64 *A=a; + sph_u64 *C=c; +#ifdef FULL_UNROLL + C[0] = A[0] ^ B[0]; + C[1] = A[1] ^ B[1]; + C[2] = A[2] ^ B[2]; + C[3] = A[3] ^ B[3]; + C[4] = A[4] ^ B[4]; + C[5] = A[5] ^ B[5]; + C[6] = A[6] ^ B[6]; + C[7] = A[7] ^ B[7]; +#else + int i = 0; + for(i=0;i<8;i++) + { + C[i] = A[i] ^ B[i]; + } +#endif +} + + +inline void GOST_F(void *_state, void *_return_state, int memc) +{ + unsigned char *state=(unsigned char *)_state; + sph_u64 *return_state=(sph_u64 *)_return_state; + sph_u64 r = 0; + r ^= T[0][state[56]]; + r ^= T[1][state[48]]; + r ^= T[2][state[40]]; + r ^= T[3][state[32]]; + r ^= T[4][state[24]]; + r ^= T[5][state[16]]; + r ^= T[6][state[8]]; + r ^= T[7][state[0]]; + *return_state = r; + r = 0; + + r ^= T[0][state[57]]; + r ^= T[1][state[49]]; + r ^= T[2][state[41]]; + r ^= T[3][state[33]]; + r ^= T[4][state[25]]; + r ^= T[5][state[17]]; + r ^= T[6][state[9]]; + r ^= T[7][state[1]]; + *(return_state+1) = r; + r = 0; + + r ^= T[0][state[58]]; + r ^= T[1][state[50]]; + r ^= T[2][state[42]]; + r ^= T[3][state[34]]; + r ^= T[4][state[26]]; + r ^= T[5][state[18]]; + r ^= T[6][state[10]]; + r ^= T[7][state[2]]; + *(return_state+2) = r; + r = 0; + + r ^= T[0][state[59]]; + r ^= T[1][state[51]]; + r ^= T[2][state[43]]; + r ^= T[3][state[35]]; + r ^= T[4][state[27]]; + r ^= T[5][state[19]]; + r ^= T[6][state[11]]; + r ^= T[7][state[3]]; + *(return_state+3) = r; + r = 0; + + r ^= T[0][state[60]]; + r ^= T[1][state[52]]; + r ^= T[2][state[44]]; + r ^= T[3][state[36]]; + r ^= T[4][state[28]]; + r ^= T[5][state[20]]; + r ^= T[6][state[12]]; + r ^= T[7][state[4]]; + *(return_state+4) = r; + r = 0; + + r ^= T[0][state[61]]; + r ^= T[1][state[53]]; + r ^= T[2][state[45]]; + r ^= T[3][state[37]]; + r ^= T[4][state[29]]; + r ^= T[5][state[21]]; + r ^= T[6][state[13]]; + r ^= T[7][state[5]]; + *(return_state+5) = r; + r = 0; + + r ^= T[0][state[62]]; + r ^= T[1][state[54]]; + r ^= T[2][state[46]]; + r ^= T[3][state[38]]; + r ^= T[4][state[30]]; + r ^= T[5][state[22]]; + r ^= T[6][state[14]]; + r ^= T[7][state[6]]; + *(return_state+6) = r; + r = 0; + + r ^= T[0][state[63]]; + r ^= T[1][state[55]]; + r ^= T[2][state[47]]; + r ^= T[3][state[39]]; + r ^= T[4][state[31]]; + r ^= T[5][state[23]]; + r ^= T[6][state[15]]; + r ^= T[7][state[7]]; + *(return_state+7) = r; + + if (memc){ + memcpy(state,return_state,64); + } +} + +#define GOST_KeySchedule(K,r_K,i) GOST_AddXor512(K,CC[i],K); GOST_F(K,r_K,0); + +inline void GOST_E(unsigned char *K,const unsigned char *m, unsigned char *state) +{ + unsigned char *r_state[2]; + unsigned char *r_K[2]; + unsigned char r_s0[64], r_K0[64]; + r_state[0] = r_s0; + r_K[0] = r_K0; + r_state[1] = state; + r_K[1] = K; + +#ifndef FULL_UNROLL + GOST_AddXor512(m,K,state); + + GOST_F(state); + GOST_KeySchedule(K,0); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,1); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,2); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,3); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,4); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,5); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,6); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,7); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,8); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,9); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,10); + GOST_AddXor512(state,K,state); + + GOST_F(state); + GOST_KeySchedule(K,11); + GOST_AddXor512(state,K,state); +#else + int i = 0, f = 0, s = 0; + + + GOST_AddXor512_(m,K,state); + + GOST_F(state,r_state[0],0); + GOST_KeySchedule(K,r_K[0],i); + GOST_AddXor512_(r_state[0],r_K[0],r_state[0]); + + for(i=1;i<12;i++) + { + s = i%2; + f = (s+1)%2; + GOST_F(r_state[f],r_state[s],0); + GOST_KeySchedule(r_K[f],r_K[s],i); + GOST_AddXor512_(r_state[s],r_K[s],r_state[s]); + } +#endif +} + +inline void GOST_g_N(const unsigned char *N,unsigned char *h, const unsigned char *m) +{ + unsigned char t[64], K_mem[64]; + unsigned char *K = K_mem; + sph_u64 rs_mem[8]; + sph_u64 *ret_state = rs_mem; + + GOST_AddXor512_(N,h,K); + + GOST_F(K,ret_state,0); + + GOST_E((unsigned char*)ret_state,m,t); + + GOST_AddXor512_(t,h,t); + GOST_AddXor512_(t,m,h); +} + +inline void GOST_hash_X(unsigned char *IV,const unsigned char *message,sph_u64 length,unsigned char *out) +{ + unsigned char v512[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00 + }; + unsigned char v0[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char Sigma[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char N[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char zeros[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char m[64], *hash = IV; + sph_u64 len = length; + + // Stage 2 + + while (len >= 512) + { + memcpy(m, message + len/8 - 63 - ( (len & 0x7) == 0 ), 64); + + GOST_g_N(N,hash,m); + GOST_AddModulo512(N,v512,N); + GOST_AddModulo512(Sigma,m,Sigma); + len -= 512; + } + + memcpy(m,zeros,64); + memcpy(m + 63 - len/8 + ( (len & 0x7) == 0 ), message, len/8 + 1 - ( (len & 0x7) == 0 )); + + // Stage 3 + m[ 63 - len/8 ] |= (1 << (len & 0x7)); + + GOST_g_N(N,hash,m); + v512[63] = len & 0xFF; + v512[62] = len >> 8; + GOST_AddModulo512(N,v512,N); + + GOST_AddModulo512(Sigma,m,Sigma); + + GOST_g_N(v0,hash,N); + GOST_g_N(v0,hash,Sigma); + + memcpy(out, hash, 64); +} + +static void GOST_hash_512(const unsigned char *message,int length,unsigned char *out) +{ + unsigned char IV[64] = + { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + + GOST_hash_X(IV,message,length,out); +} + diff --git a/kernel/sibcoin-mod.cl b/kernel/sibcoin-mod.cl new file mode 100644 index 000000000..72ae3c5a6 --- /dev/null +++ b/kernel/sibcoin-mod.cl @@ -0,0 +1,1169 @@ +/* X11-Gost kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * Copyright (c) 2014 Girino Vey + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Ivan + */ + +#ifdef __ECLIPSE_EDITOR__ + #include "OpenCLKernel.hpp" +#endif + +#ifndef SIBCOIN_MOD_CL +#define SIBCOIN_MOD_CL + +#if __ENDIAN_LITTLE__ + #define SPH_LITTLE_ENDIAN 1 +#else + #define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 +typedef unsigned int sph_u32; +typedef int sph_s32; + +#ifndef __OPENCL_VERSION__ + typedef unsigned long long sph_u64; + typedef long long sph_s64; +#else + typedef unsigned long sph_u64; + typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_CUBEHASH_UNROLL 0 + +#ifndef SPH_COMPACT_BLAKE_64 + #define SPH_COMPACT_BLAKE_64 0 +#endif +#ifndef SPH_LUFFA_PARALLEL + #define SPH_LUFFA_PARALLEL 0 +#endif +#ifndef SPH_KECCAK_UNROLL + #define SPH_KECCAK_UNROLL 0 +#endif + +//#include "aes_helper.cl" +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" +#include "luffa.cl" +#include "cubehash.cl" +#include "shavite.cl" +#include "simd.cl" +#include "echo.cl" +#include "gost-mod.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); +#endif + +#define SHL(x, n) ((x) << (n)) +#define SHR(x, n) ((x) >> (n)) + +#define CONST_EXP2 q[i+0] + SPH_ROTL64(q[i+1], 5) + q[i+2] + SPH_ROTL64(q[i+3], 11) + \ + q[i+4] + SPH_ROTL64(q[i+5], 27) + q[i+6] + SPH_ROTL64(q[i+7], 32) + \ + q[i+8] + SPH_ROTL64(q[i+9], 37) + q[i+10] + SPH_ROTL64(q[i+11], 43) + \ + q[i+12] + SPH_ROTL64(q[i+13], 53) + (SHR(q[i+14],1) ^ q[i+14]) + (SHR(q[i+15],2) ^ q[i+15]) + +typedef union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; +} hash_t; + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, __global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // blake + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + T1 = SPH_T64(T1 + 1); + + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash->h8[0] = H0; + hash->h8[1] = H1; + hash->h8[2] = H2; + hash->h8[3] = H3; + hash->h8[4] = H4; + hash->h8[5] = H5; + hash->h8[6] = H6; + hash->h8[7] = H7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // bmw + sph_u64 BMW_H[16]; + +#pragma unroll 16 + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 mv[16],q[32]; + sph_u64 tmp; + + mv[0] = SWAP8(hash->h8[0]); + mv[1] = SWAP8(hash->h8[1]); + mv[2] = SWAP8(hash->h8[2]); + mv[3] = SWAP8(hash->h8[3]); + mv[4] = SWAP8(hash->h8[4]); + mv[5] = SWAP8(hash->h8[5]); + mv[6] = SWAP8(hash->h8[6]); + mv[7] = SWAP8(hash->h8[7]); + mv[8] = 0x80; + mv[9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = SPH_C64(512); + + tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); + q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; + tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); + q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); + q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); + q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; + tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); + q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; + tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); + q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); + q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); + q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); + q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; + tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); + q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); + q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; + tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); + q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; + +#pragma unroll 2 + for(int i=0;i<2;i++) + { + q[i+16] = + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=2;i<6;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 3 + for(int i=6;i<9;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=9;i<13;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + +#pragma unroll 3 + for(int i=13;i<16;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + + sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; + sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + + BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); + BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); + BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); + BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); + BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); + BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); + BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); + BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); + + BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); + BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); + BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); + BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); + BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); + BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); + BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); + BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); + +#pragma unroll 16 + for(int i=0;i<16;i++) + { + mv[i] = BMW_H[i]; + BMW_H[i] = 0xaaaaaaaaaaaaaaa0ull + (sph_u64)i; + } + + tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); + q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; + tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); + q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); + q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); + q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; + tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); + q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; + tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); + q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); + q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); + q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); + q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; + tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); + q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); + q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; + tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); + q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; + +#pragma unroll 2 + for(int i=0;i<2;i++) + { + q[i+16] = + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=2;i<6;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 3 + for(int i=6;i<9;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=9;i<13;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + +#pragma unroll 3 + for(int i=13;i<16;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + + XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; + XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + + BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); + BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); + BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); + BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); + BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); + BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); + BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); + BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); + + BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); + BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); + BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); + BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); + BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); + BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); + BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); + BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); + + hash->h8[0] = SWAP8(BMW_H[8]); + hash->h8[1] = SWAP8(BMW_H[9]); + hash->h8[2] = SWAP8(BMW_H[10]); + hash->h8[3] = SWAP8(BMW_H[11]); + hash->h8[4] = SWAP8(BMW_H[12]); + hash->h8[5] = SWAP8(BMW_H[13]); + hash->h8[6] = SWAP8(BMW_H[14]); + hash->h8[7] = SWAP8(BMW_H[15]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + +#if !SPH_SMALL_FOOTPRINT_GROESTL + __local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256]; + __local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256]; +#else + __local sph_u64 T0_C[256], T4_C[256]; +#endif + int init = get_local_id(0); + int step = get_local_size(0); + + _Pragma("unroll") for (int i = init; i < 256; i += step) + { + T0_C[i] = T0[i]; + T4_C[i] = T4[i]; +#if !SPH_SMALL_FOOTPRINT_GROESTL + T1_C[i] = T1[i]; + T2_C[i] = T2[i]; + T3_C[i] = T3[i]; + T5_C[i] = T5[i]; + T6_C[i] = T6[i]; + T7_C[i] = T7[i]; +#endif + } + barrier(CLK_LOCAL_MEM_FENCE); // groestl +#define T0 T0_C +#define T1 T1_C +#define T2 T2_C +#define T3 T3_C +#define T4 T4_C +#define T5 T5_C +#define T6 T6_C +#define T7 T7_C + + + sph_u64 H[16]; +//#pragma unroll 15 + _Pragma("unroll") for (unsigned int u = 0; u < 15; u ++) H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash->h8[0]); + m[1] = DEC64E(hash->h8[1]); + m[2] = DEC64E(hash->h8[2]); + m[3] = DEC64E(hash->h8[3]); + m[4] = DEC64E(hash->h8[4]); + m[5] = DEC64E(hash->h8[5]); + m[6] = DEC64E(hash->h8[6]); + m[7] = DEC64E(hash->h8[7]); + +//#pragma unroll 16 + _Pragma("unroll") for (unsigned int u = 0; u < 16; u ++) g[u] = m[u] ^ H[u]; + + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + +//#pragma unroll 16 + _Pragma("unroll") for (unsigned int u = 0; u < 16; u ++) H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + +//#pragma unroll 16 + _Pragma("unroll") for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + +//#pragma unroll 16 + _Pragma("unroll") for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + +//#pragma unroll 8 + _Pragma("unroll") for (unsigned int u = 0; u < 8; u ++) + hash->h8[u] = DEC64E(H[u + 8]); + + barrier(CLK_GLOBAL_MEM_FENCE); + +} +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global hash_t* hashes) +{ + + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // skein + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash->h8[0]); + m1 = SWAP8(hash->h8[1]); + m2 = SWAP8(hash->h8[2]); + m3 = SWAP8(hash->h8[3]); + m4 = SWAP8(hash->h8[4]); + m5 = SWAP8(hash->h8[5]); + m6 = SWAP8(hash->h8[6]); + m7 = SWAP8(hash->h8[7]); + + UBI_BIG(480, 64); + + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + + UBI_BIG(510, 8); + + hash->h8[0] = SWAP8(h0); + hash->h8[1] = SWAP8(h1); + hash->h8[2] = SWAP8(h2); + hash->h8[3] = SWAP8(h3); + hash->h8[4] = SWAP8(h4); + hash->h8[5] = SWAP8(h5); + hash->h8[6] = SWAP8(h6); + hash->h8[7] = SWAP8(h7); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // jh + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + h0h ^= DEC64E(hash->h8[0]); + h0l ^= DEC64E(hash->h8[1]); + h1h ^= DEC64E(hash->h8[2]); + h1l ^= DEC64E(hash->h8[3]); + h2h ^= DEC64E(hash->h8[4]); + h2l ^= DEC64E(hash->h8[5]); + h3h ^= DEC64E(hash->h8[6]); + h3l ^= DEC64E(hash->h8[7]); + + E8; + + h4h ^= DEC64E(hash->h8[0]); + h4l ^= DEC64E(hash->h8[1]); + h5h ^= DEC64E(hash->h8[2]); + h5l ^= DEC64E(hash->h8[3]); + h6h ^= DEC64E(hash->h8[4]); + h6l ^= DEC64E(hash->h8[5]); + h7h ^= DEC64E(hash->h8[6]); + h7l ^= DEC64E(hash->h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + + E8; + + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash->h8[0] = DEC64E(h4h); + hash->h8[1] = DEC64E(h4l); + hash->h8[2] = DEC64E(h5h); + hash->h8[3] = DEC64E(h5l); + hash->h8[4] = DEC64E(h6h); + hash->h8[5] = DEC64E(h6l); + hash->h8[6] = DEC64E(h7h); + hash->h8[7] = DEC64E(h7l); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search5(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash->h8[0]); + a10 ^= SWAP8(hash->h8[1]); + a20 ^= SWAP8(hash->h8[2]); + a30 ^= SWAP8(hash->h8[3]); + a40 ^= SWAP8(hash->h8[4]); + a01 ^= SWAP8(hash->h8[5]); + a11 ^= SWAP8(hash->h8[6]); + a21 ^= SWAP8(hash->h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash->h8[0] = SWAP8(a00); + hash->h8[1] = SWAP8(a10); + hash->h8[2] = SWAP8(a20); + hash->h8[3] = SWAP8(a30); + hash->h8[4] = SWAP8(a40); + hash->h8[5] = SWAP8(a01); + hash->h8[6] = SWAP8(a11); + hash->h8[7] = SWAP8(a21); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search6(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // gost + + sph_u64 message[8], out[8]; + sph_u64 len = 512; + + __local sph_u64 lT[8][256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + _Pragma("unroll") for(int j=init;j<256;j+=step) { + _Pragma("unroll") for (int i=0; i<8; i++) lT[i][j] = T[i][j]; + } + + message[0] = SWAP8(hash->h8[0]); + message[1] = SWAP8(hash->h8[1]); + message[2] = SWAP8(hash->h8[2]); + message[3] = SWAP8(hash->h8[3]); + message[4] = SWAP8(hash->h8[4]); + message[5] = SWAP8(hash->h8[5]); + message[6] = SWAP8(hash->h8[6]); + message[7] = SWAP8(hash->h8[7]); + + GOST_HASH_512(message, out); + + hash->h8[0] = SWAP8(out[0]); + hash->h8[1] = SWAP8(out[1]); + hash->h8[2] = SWAP8(out[2]); + hash->h8[3] = SWAP8(out[3]); + hash->h8[4] = SWAP8(out[4]); + hash->h8[5] = SWAP8(out[5]); + hash->h8[6] = SWAP8(out[6]); + hash->h8[7] = SWAP8(out[7]); + + + barrier(CLK_GLOBAL_MEM_FENCE); +} + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search7(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // luffa + + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash->h4[1]; + M1 = hash->h4[0]; + M2 = hash->h4[3]; + M3 = hash->h4[2]; + M4 = hash->h4[5]; + M5 = hash->h4[4]; + M6 = hash->h4[7]; + M7 = hash->h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) + { + M0 = hash->h4[9]; + M1 = hash->h4[8]; + M2 = hash->h4[11]; + M3 = hash->h4[10]; + M4 = hash->h4[13]; + M5 = hash->h4[12]; + M6 = hash->h4[15]; + M7 = hash->h4[14]; + } + else if(i == 1) + { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } + else if(i == 2) + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + else if(i == 3) + { + hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + } + + hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search8(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // cubehash.h1 + + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash->h4[1]); + x1 ^= SWAP4(hash->h4[0]); + x2 ^= SWAP4(hash->h4[3]); + x3 ^= SWAP4(hash->h4[2]); + x4 ^= SWAP4(hash->h4[5]); + x5 ^= SWAP4(hash->h4[4]); + x6 ^= SWAP4(hash->h4[7]); + x7 ^= SWAP4(hash->h4[6]); + + for (int i = 0; i < 13; i ++) + { + SIXTEEN_ROUNDS; + + if (i == 0) + { + x0 ^= SWAP4(hash->h4[9]); + x1 ^= SWAP4(hash->h4[8]); + x2 ^= SWAP4(hash->h4[11]); + x3 ^= SWAP4(hash->h4[10]); + x4 ^= SWAP4(hash->h4[13]); + x5 ^= SWAP4(hash->h4[12]); + x6 ^= SWAP4(hash->h4[15]); + x7 ^= SWAP4(hash->h4[14]); + } + else if(i == 1) + x0 ^= 0x80; + else if (i == 2) + xv ^= SPH_C32(1); + } + + hash->h4[0] = x0; + hash->h4[1] = x1; + hash->h4[2] = x2; + hash->h4[3] = x3; + hash->h4[4] = x4; + hash->h4[5] = x5; + hash->h4[6] = x6; + hash->h4[7] = x7; + hash->h4[8] = x8; + hash->h4[9] = x9; + hash->h4[10] = xa; + hash->h4[11] = xb; + hash->h4[12] = xc; + hash->h4[13] = xd; + hash->h4[14] = xe; + hash->h4[15] = xf; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search9(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + _Pragma("unroll") for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // shavite + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash->h4[0]; + rk01 = hash->h4[1]; + rk02 = hash->h4[2]; + rk03 = hash->h4[3]; + rk04 = hash->h4[4]; + rk05 = hash->h4[5]; + rk06 = hash->h4[6]; + rk07 = hash->h4[7]; + rk08 = hash->h4[8]; + rk09 = hash->h4[9]; + rk0A = hash->h4[10]; + rk0B = hash->h4[11]; + rk0C = hash->h4[12]; + rk0D = hash->h4[13]; + rk0E = hash->h4[14]; + rk0F = hash->h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash->h4[0] = h0; + hash->h4[1] = h1; + hash->h4[2] = h2; + hash->h4[3] = h3; + hash->h4[4] = h4; + hash->h4[5] = h5; + hash->h4[6] = h6; + hash->h4[7] = h7; + hash->h4[8] = h8; + hash->h4[9] = h9; + hash->h4[10] = hA; + hash->h4[11] = hB; + hash->h4[12] = hC; + hash->h4[13] = hD; + hash->h4[14] = hE; + hash->h4[15] = hF; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search10(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // simd + s32 q[256]; + unsigned char x[128]; + _Pragma("unroll") for(unsigned int i = 0; i < 64; i++) + x[i] = hash->h1[i]; + _Pragma("unroll") for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) + { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } + + A0 ^= hash->h4[0]; + A1 ^= hash->h4[1]; + A2 ^= hash->h4[2]; + A3 ^= hash->h4[3]; + A4 ^= hash->h4[4]; + A5 ^= hash->h4[5]; + A6 ^= hash->h4[6]; + A7 ^= hash->h4[7]; + B0 ^= hash->h4[8]; + B1 ^= hash->h4[9]; + B2 ^= hash->h4[10]; + B3 ^= hash->h4[11]; + B4 ^= hash->h4[12]; + B5 ^= hash->h4[13]; + B6 ^= hash->h4[14]; + B7 ^= hash->h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + + #undef q + + hash->h4[0] = A0; + hash->h4[1] = A1; + hash->h4[2] = A2; + hash->h4[3] = A3; + hash->h4[4] = A4; + hash->h4[5] = A5; + hash->h4[6] = A6; + hash->h4[7] = A7; + hash->h4[8] = B0; + hash->h4[9] = B1; + hash->h4[10] = B2; + hash->h4[11] = B3; + hash->h4[12] = B4; + hash->h4[13] = B5; + hash->h4[14] = B6; + hash->h4[15] = B7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search11(__global hash_t* hashes, __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + _Pragma("unroll") for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // copies hashes to "hash" + // echo + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash->h8[0]; + W81 = hash->h8[1]; + W90 = hash->h8[2]; + W91 = hash->h8[3]; + WA0 = hash->h8[4]; + WA1 = hash->h8[5]; + WB0 = hash->h8[6]; + WB1 = hash->h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) + BIG_ROUND; + + Vb00 ^= hash->h8[0] ^ W00 ^ W80; + Vb01 ^= hash->h8[1] ^ W01 ^ W81; + Vb10 ^= hash->h8[2] ^ W10 ^ W90; + Vb11 ^= hash->h8[3] ^ W11 ^ W91; + Vb20 ^= hash->h8[4] ^ W20 ^ WA0; + Vb21 ^= hash->h8[5] ^ W21 ^ WA1; + Vb30 ^= hash->h8[6] ^ W30 ^ WB0; + Vb31 ^= hash->h8[7] ^ W31 ^ WB1; + + barrier(CLK_GLOBAL_MEM_FENCE); + + bool result = (Vb11 <= target); + + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); +} + +#endif// SIBCOIN_MOD_CL \ No newline at end of file diff --git a/kernel/sibcoin.cl b/kernel/sibcoin.cl new file mode 100644 index 000000000..184a6133c --- /dev/null +++ b/kernel/sibcoin.cl @@ -0,0 +1,759 @@ +/* + * GOST К 34.11-2012 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2015 ivansib + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Ivan + */ + +#ifndef SIBCOIN_CL +#define SIBCOIN_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_CUBEHASH_UNROLL 0 +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_LUFFA_PARALLEL 0 +#define SPH_KECCAK_UNROLL 0 + +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" +#include "luffa.cl" +#include "cubehash.cl" +#include "shavite.cl" +#include "simd.cl" +#include "echo.cl" +#include "gost.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); +#endif + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; + } hash; + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + // blake + { + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; + } + // bmw + { + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash.h8[0]); + mv[ 1] = SWAP8(hash.h8[1]); + mv[ 2] = SWAP8(hash.h8[2]); + mv[ 3] = SWAP8(hash.h8[3]); + mv[ 4] = SWAP8(hash.h8[4]); + mv[ 5] = SWAP8(hash.h8[5]); + mv[ 6] = SWAP8(hash.h8[6]); + mv[ 7] = SWAP8(hash.h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; +#define M(x) (mv[x]) +#define H(x) (BMW_H[x]) +#define dH(x) (BMW_h2[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + +#define M(x) (BMW_h2[x]) +#define H(x) (final_b[x]) +#define dH(x) (BMW_h1[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + } + + // groestl + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + } + + // skein + { + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + } + + // jh + { + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); + } + + // keccak + { + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + } + + // gost + { + sph_u64 m[8], out[8]; + + m[0] = SWAP8(hash.h8[0]); + m[1] = SWAP8(hash.h8[1]); + m[2] = SWAP8(hash.h8[2]); + m[3] = SWAP8(hash.h8[3]); + m[4] = SWAP8(hash.h8[4]); + m[5] = SWAP8(hash.h8[5]); + m[6] = SWAP8(hash.h8[6]); + m[7] = SWAP8(hash.h8[7]); + + GOST_hash_512((const unsigned char*)m, 8*64, (unsigned char*)out); + + hash.h8[0] = SWAP8(out[0]); + hash.h8[1] = SWAP8(out[1]); + hash.h8[2] = SWAP8(out[2]); + hash.h8[3] = SWAP8(out[3]); + hash.h8[4] = SWAP8(out[4]); + hash.h8[5] = SWAP8(out[5]); + hash.h8[6] = SWAP8(out[6]); + hash.h8[7] = SWAP8(out[7]); + + } + + // luffa + { + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash.h4[1]; + M1 = hash.h4[0]; + M2 = hash.h4[3]; + M3 = hash.h4[2]; + M4 = hash.h4[5]; + M5 = hash.h4[4]; + M6 = hash.h4[7]; + M7 = hash.h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) { + M0 = hash.h4[9]; + M1 = hash.h4[8]; + M2 = hash.h4[11]; + M3 = hash.h4[10]; + M4 = hash.h4[13]; + M5 = hash.h4[12]; + M6 = hash.h4[15]; + M7 = hash.h4[14]; + } else if(i == 1) { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 2) { + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 3) { + hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + } + hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + + // cubehash.h1 + { + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash.h4[1]); + x1 ^= SWAP4(hash.h4[0]); + x2 ^= SWAP4(hash.h4[3]); + x3 ^= SWAP4(hash.h4[2]); + x4 ^= SWAP4(hash.h4[5]); + x5 ^= SWAP4(hash.h4[4]); + x6 ^= SWAP4(hash.h4[7]); + x7 ^= SWAP4(hash.h4[6]); + + for (int i = 0; i < 13; i ++) { + SIXTEEN_ROUNDS; + + if (i == 0) { + x0 ^= SWAP4(hash.h4[9]); + x1 ^= SWAP4(hash.h4[8]); + x2 ^= SWAP4(hash.h4[11]); + x3 ^= SWAP4(hash.h4[10]); + x4 ^= SWAP4(hash.h4[13]); + x5 ^= SWAP4(hash.h4[12]); + x6 ^= SWAP4(hash.h4[15]); + x7 ^= SWAP4(hash.h4[14]); + } else if(i == 1) { + x0 ^= 0x80; + } else if (i == 2) { + xv ^= SPH_C32(1); + } + } + + hash.h4[0] = x0; + hash.h4[1] = x1; + hash.h4[2] = x2; + hash.h4[3] = x3; + hash.h4[4] = x4; + hash.h4[5] = x5; + hash.h4[6] = x6; + hash.h4[7] = x7; + hash.h4[8] = x8; + hash.h4[9] = x9; + hash.h4[10] = xa; + hash.h4[11] = xb; + hash.h4[12] = xc; + hash.h4[13] = xd; + hash.h4[14] = xe; + hash.h4[15] = xf; + } + + // shavite + { + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash.h4[0]; + rk01 = hash.h4[1]; + rk02 = hash.h4[2]; + rk03 = hash.h4[3]; + rk04 = hash.h4[4]; + rk05 = hash.h4[5]; + rk06 = hash.h4[6]; + rk07 = hash.h4[7]; + rk08 = hash.h4[8]; + rk09 = hash.h4[9]; + rk0A = hash.h4[10]; + rk0B = hash.h4[11]; + rk0C = hash.h4[12]; + rk0D = hash.h4[13]; + rk0E = hash.h4[14]; + rk0F = hash.h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash.h4[0] = h0; + hash.h4[1] = h1; + hash.h4[2] = h2; + hash.h4[3] = h3; + hash.h4[4] = h4; + hash.h4[5] = h5; + hash.h4[6] = h6; + hash.h4[7] = h7; + hash.h4[8] = h8; + hash.h4[9] = h9; + hash.h4[10] = hA; + hash.h4[11] = hB; + hash.h4[12] = hC; + hash.h4[13] = hD; + hash.h4[14] = hE; + hash.h4[15] = hF; + } + + // simd + { + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash.h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } + + A0 ^= hash.h4[0]; + A1 ^= hash.h4[1]; + A2 ^= hash.h4[2]; + A3 ^= hash.h4[3]; + A4 ^= hash.h4[4]; + A5 ^= hash.h4[5]; + A6 ^= hash.h4[6]; + A7 ^= hash.h4[7]; + B0 ^= hash.h4[8]; + B1 ^= hash.h4[9]; + B2 ^= hash.h4[10]; + B3 ^= hash.h4[11]; + B4 ^= hash.h4[12]; + B5 ^= hash.h4[13]; + B6 ^= hash.h4[14]; + B7 ^= hash.h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + #undef q + + hash.h4[0] = A0; + hash.h4[1] = A1; + hash.h4[2] = A2; + hash.h4[3] = A3; + hash.h4[4] = A4; + hash.h4[5] = A5; + hash.h4[6] = A6; + hash.h4[7] = A7; + hash.h4[8] = B0; + hash.h4[9] = B1; + hash.h4[10] = B2; + hash.h4[11] = B3; + hash.h4[12] = B4; + hash.h4[13] = B5; + hash.h4[14] = B6; + hash.h4[15] = B7; + } + + // echo + { + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) { + BIG_ROUND; + } + + Vb00 ^= hash.h8[0] ^ W00 ^ W80; + Vb01 ^= hash.h8[1] ^ W01 ^ W81; + Vb10 ^= hash.h8[2] ^ W10 ^ W90; + Vb11 ^= hash.h8[3] ^ W11 ^ W91; + Vb20 ^= hash.h8[4] ^ W20 ^ WA0; + Vb21 ^= hash.h8[5] ^ W21 ^ WA1; + Vb30 ^= hash.h8[6] ^ W30 ^ WB0; + Vb31 ^= hash.h8[7] ^ W31 ^ WB1; + + bool result = (Vb11 <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); + } +} + +#endif // SIBCOIN_CL \ No newline at end of file diff --git a/sph/Makefile.am b/sph/Makefile.am index 2c2a69502..ce1c7ed96 100644 --- a/sph/Makefile.am +++ b/sph/Makefile.am @@ -1,3 +1,3 @@ noinst_LIBRARIES = libsph.a -libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c ripemd.c +libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c gost.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c ripemd.c diff --git a/sph/gost.c b/sph/gost.c new file mode 100644 index 000000000..f46aeb583 --- /dev/null +++ b/sph/gost.c @@ -0,0 +1,1104 @@ +/* $Id: gost.c 259 2011-07-19 22:11:27Z tp $ */ +/* + * Keccak implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +//#include +//#include + +#include +#include +#include +#include + +#include "sph_gost.h" + +#ifdef __cplusplus +extern "C"{ +#endif + + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +//-------------------------------------------------------------------------------------------- +// +// stribog implementation +// +//-------------------------------------------------------------------------------------------- + + +// Tables for function F +const unsigned long long T[8][256] = { + { + 0xE6F87E5C5B711FD0,0x258377800924FA16,0xC849E07E852EA4A8,0x5B4686A18F06C16A, + 0x0B32E9A2D77B416E,0xABDA37A467815C66,0xF61796A81A686676,0xF5DC0B706391954B, + 0x4862F38DB7E64BF1,0xFF5C629A68BD85C5,0xCB827DA6FCD75795,0x66D36DAF69B9F089, + 0x356C9F74483D83B0,0x7CBCECB1238C99A1,0x36A702AC31C4708D,0x9EB6A8D02FBCDFD6, + 0x8B19FA51E5B3AE37,0x9CCFB5408A127D0B,0xBC0C78B508208F5A,0xE533E3842288ECED, + 0xCEC2C7D377C15FD2,0xEC7817B6505D0F5E,0xB94CC2C08336871D,0x8C205DB4CB0B04AD, + 0x763C855B28A0892F,0x588D1B79F6FF3257,0x3FECF69E4311933E,0x0FC0D39F803A18C9, + 0xEE010A26F5F3AD83,0x10EFE8F4411979A6,0x5DCDA10C7DE93A10,0x4A1BEE1D1248E92C, + 0x53BFF2DB21847339,0xB4F50CCFA6A23D09,0x5FB4BC9CD84798CD,0xE88A2D8B071C56F9, + 0x7F7771695A756A9C,0xC5F02E71A0BA1EBC,0xA663F9AB4215E672,0x2EB19E22DE5FBB78, + 0x0DB9CE0F2594BA14,0x82520E6397664D84,0x2F031E6A0208EA98,0x5C7F2144A1BE6BF0, + 0x7A37CB1CD16362DB,0x83E08E2B4B311C64,0xCF70479BAB960E32,0x856BA986B9DEE71E, + 0xB5478C877AF56CE9,0xB8FE42885F61D6FD,0x1BDD0156966238C8,0x622157923EF8A92E, + 0xFC97FF42114476F8,0x9D7D350856452CEB,0x4C90C9B0E0A71256,0x2308502DFBCB016C, + 0x2D7A03FAA7A64845,0xF46E8B38BFC6C4AB,0xBDBEF8FDD477DEBA,0x3AAC4CEBC8079B79, + 0xF09CB105E8879D0C,0x27FA6A10AC8A58CB,0x8960E7C1401D0CEA,0x1A6F811E4A356928, + 0x90C4FB0773D196FF,0x43501A2F609D0A9F,0xF7A516E0C63F3796,0x1CE4A6B3B8DA9252, + 0x1324752C38E08A9B,0xA5A864733BEC154F,0x2BF124575549B33F,0xD766DB15440DC5C7, + 0xA7D179E39E42B792,0xDADF151A61997FD3,0x86A0345EC0271423,0x38D5517B6DA939A4, + 0x6518F077104003B4,0x02791D90A5AEA2DD,0x88D267899C4A5D0A,0x930F66DF0A2865C2, + 0x4EE9D4204509B08B,0x325538916685292A,0x412907BFC533A842,0xB27E2B62544DC673, + 0x6C5304456295E007,0x5AF406E95351908A,0x1F2F3B6BC123616F,0xC37B09DC5255E5C6, + 0x3967D133B1FE6844,0x298839C7F0E711E2,0x409B87F71964F9A2,0xE938ADC3DB4B0719, + 0x0C0B4E47F9C3EBF4,0x5534D576D36B8843,0x4610A05AEB8B02D8,0x20C3CDF58232F251, + 0x6DE1840DBEC2B1E7,0xA0E8DE06B0FA1D08,0x7B854B540D34333B,0x42E29A67BCCA5B7F, + 0xD8A6088AC437DD0E,0xC63BB3A9D943ED81,0x21714DBD5E65A3B1,0x6761EDE7B5EEA169, + 0x2431F7C8D573ABF6,0xD51FC685E1A3671A,0x5E063CD40410C92D,0x283AB98F2CB04002, + 0x8FEBC06CB2F2F790,0x17D64F116FA1D33C,0xE07359F1A99EE4AA,0x784ED68C74CDC006, + 0x6E2A19D5C73B42DA,0x8712B4161C7045C3,0x371582E4ED93216D,0xACE390414939F6FC, + 0x7EC5F12186223B7C,0xC0B094042BAC16FB,0xF9D745379A527EBF,0x737C3F2EA3B68168, + 0x33E7B8D9BAD278CA,0xA9A32A34C22FFEBB,0xE48163CCFEDFBD0D,0x8E5940246EA5A670, + 0x51C6EF4B842AD1E4,0x22BAD065279C508C,0xD91488C218608CEE,0x319EA5491F7CDA17, + 0xD394E128134C9C60,0x094BF43272D5E3B3,0x9BF612A5A4AAD791,0xCCBBDA43D26FFD0F, + 0x34DE1F3C946AD250,0x4F5B5468995EE16B,0xDF9FAF6FEA8F7794,0x2648EA5870DD092B, + 0xBFC7E56D71D97C67,0xDDE6B2FF4F21D549,0x3C276B463AE86003,0x91767B4FAF86C71F, + 0x68A13E7835D4B9A0,0xB68C115F030C9FD4,0x141DD2C916582001,0x983D8F7DDD5324AC, + 0x64AA703FCC175254,0xC2C989948E02B426,0x3E5E76D69F46C2DE,0x50746F03587D8004, + 0x45DB3D829272F1E5,0x60584A029B560BF3,0xFBAE58A73FFCDC62,0xA15A5E4E6CAD4CE8, + 0x4BA96E55CE1FB8CC,0x08F9747AAE82B253,0xC102144CF7FB471B,0x9F042898F3EB8E36, + 0x068B27ADF2EFFB7A,0xEDCA97FE8C0A5EBE,0x778E0513F4F7D8CF,0x302C2501C32B8BF7, + 0x8D92DDFC175C554D,0xF865C57F46052F5F,0xEAF3301BA2B2F424,0xAA68B7ECBBD60D86, + 0x998F0F350104754C,0x0000000000000000,0xF12E314D34D0CCEC,0x710522BE061823B5, + 0xAF280D9930C005C1,0x97FD5CE25D693C65,0x19A41CC633CC9A15,0x95844172F8C79EB8, + 0xDC5432B7937684A9,0x9436C13A2490CF58,0x802B13F332C8EF59,0xC442AE397CED4F5C, + 0xFA1CD8EFE3AB8D82,0xF2E5AC954D293FD1,0x6AD823E8907A1B7D,0x4D2249F83CF043B6, + 0x03CB9DD879F9F33D,0xDE2D2F2736D82674,0x2A43A41F891EE2DF,0x6F98999D1B6C133A, + 0xD4AD46CD3DF436FA,0xBB35DF50269825C0,0x964FDCAA813E6D85,0xEB41B0537EE5A5C4, + 0x0540BA758B160847,0xA41AE43BE7BB44AF,0xE3B8C429D0671797,0x819993BBEE9FBEB9, + 0xAE9A8DD1EC975421,0xF3572CDD917E6E31,0x6393D7DAE2AFF8CE,0x47A2201237DC5338, + 0xA32343DEC903EE35,0x79FC56C4A89A91E6,0x01B28048DC5751E0,0x1296F564E4B7DB7B, + 0x75F7188351597A12,0xDB6D9552BDCE2E33,0x1E9DBB231D74308F,0x520D7293FDD322D9, + 0xE20A44610C304677,0xFEEEE2D2B4EAD425,0xCA30FDEE20800675,0x61EACA4A47015A13, + 0xE74AFE1487264E30,0x2CC883B27BF119A5,0x1664CF59B3F682DC,0xA811AA7C1E78AF5B, + 0x1D5626FB648DC3B2,0xB73E9117DF5BCE34,0xD05F7CF06AB56F5D,0xFD257F0ACD132718, + 0x574DC8E676C52A9E,0x0739A7E52EB8AA9A,0x5486553E0F3CD9A3,0x56FF48AEAA927B7E, + 0xBE756525AD8E2D87,0x7D0E6CF9FFDBC841,0x3B1ECCA31450CA99,0x6913BE30E983E840, + 0xAD511009956EA71C,0xB1B5B6BA2DB4354E,0x4469BDCA4E25A005,0x15AF5281CA0F71E1, + 0x744598CB8D0E2BF2,0x593F9B312AA863B7,0xEFB38A6E29A4FC63,0x6B6AA3A04C2D4A9D, + 0x3D95EB0EE6BF31E3,0xA291C3961554BFD5,0x18169C8EEF9BCBF5,0x115D68BC9D4E2846, + 0xBA875F18FACF7420,0xD1EDFCB8B6E23EBD,0xB00736F2F1E364AE,0x84D929CE6589B6FE, + 0x70B7A2F6DA4F7255,0x0E7253D75C6D4929,0x04F23A3D574159A7,0x0A8069EA0B2C108E, + 0x49D073C56BB11A11,0x8AAB7A1939E4FFD7,0xCD095A0B0E38ACEF,0xC9FB60365979F548, + 0x92BDE697D67F3422,0xC78933E10514BC61,0xE1C1D9B975C9B54A,0xD2266160CF1BCD80, + 0x9A4492ED78FD8671,0xB3CCAB2A881A9793,0x72CEBF667FE1D088,0xD6D45B5D985A9427 + }, + { + 0xC811A8058C3F55DE,0x65F5B43196B50619,0xF74F96B1D6706E43,0x859D1E8BCB43D336, + 0x5AAB8A85CCFA3D84,0xF9C7BF99C295FCFD,0xA21FD5A1DE4B630F,0xCDB3EF763B8B456D, + 0x803F59F87CF7C385,0xB27C73BE5F31913C,0x98E3AC6633B04821,0xBF61674C26B8F818, + 0x0FFBC995C4C130C8,0xAAA0862010761A98,0x6057F342210116AA,0xF63C760C0654CC35, + 0x2DDB45CC667D9042,0xBCF45A964BD40382,0x68E8A0C3EF3C6F3D,0xA7BD92D269FF73BC, + 0x290AE20201ED2287,0xB7DE34CDE885818F,0xD901EEA7DD61059B,0xD6FA273219A03553, + 0xD56F1AE874CCCEC9,0xEA31245C2E83F554,0x7034555DA07BE499,0xCE26D2AC56E7BEF7, + 0xFD161857A5054E38,0x6A0E7DA4527436D1,0x5BD86A381CDE9FF2,0xCAF7756231770C32, + 0xB09AAED9E279C8D0,0x5DEF1091C60674DB,0x111046A2515E5045,0x23536CE4729802FC, + 0xC50CBCF7F5B63CFA,0x73A16887CD171F03,0x7D2941AFD9F28DBD,0x3F5E3EB45A4F3B9D, + 0x84EEFE361B677140,0x3DB8E3D3E7076271,0x1A3A28F9F20FD248,0x7EBC7C75B49E7627, + 0x74E5F293C7EB565C,0x18DCF59E4F478BA4,0x0C6EF44FA9ADCB52,0xC699812D98DAC760, + 0x788B06DC6E469D0E,0xFC65F8EA7521EC4E,0x30A5F7219E8E0B55,0x2BEC3F65BCA57B6B, + 0xDDD04969BAF1B75E,0x99904CDBE394EA57,0x14B201D1E6EA40F6,0xBBB0C08241284ADD, + 0x50F20463BF8F1DFF,0xE8D7F93B93CBACB8,0x4D8CB68E477C86E8,0xC1DD1B3992268E3F, + 0x7C5AA11209D62FCB,0x2F3D98ABDB35C9AE,0x671369562BFD5FF5,0x15C1E16C36CEE280, + 0x1D7EB2EDF8F39B17,0xDA94D37DB00DFE01,0x877BC3EC760B8ADA,0xCB8495DFE153AE44, + 0x05A24773B7B410B3,0x12857B783C32ABDF,0x8EB770D06812513B,0x536739B9D2E3E665, + 0x584D57E271B26468,0xD789C78FC9849725,0xA935BBFA7D1AE102,0x8B1537A3DFA64188, + 0xD0CD5D9BC378DE7A,0x4AC82C9A4D80CFB7,0x42777F1B83BDB620,0x72D2883A1D33BD75, + 0x5E7A2D4BAB6A8F41,0xF4DAAB6BBB1C95D9,0x905CFFE7FD8D31B6,0x83AA6422119B381F, + 0xC0AEFB8442022C49,0xA0F908C663033AE3,0xA428AF0804938826,0xADE41C341A8A53C7, + 0xAE7121EE77E6A85D,0xC47F5C4A25929E8C,0xB538E9AA55CDD863,0x06377AA9DAD8EB29, + 0xA18AE87BB3279895,0x6EDFDA6A35E48414,0x6B7D9D19825094A7,0xD41CFA55A4E86CBF, + 0xE5CAEDC9EA42C59C,0xA36C351C0E6FC179,0x5181E4DE6FABBF89,0xFFF0C530184D17D4, + 0x9D41EB1584045892,0x1C0D525028D73961,0xF178EC180CA8856A,0x9A0571018EF811CD, + 0x4091A27C3EF5EFCC,0x19AF15239F6329D2,0x347450EFF91EB990,0xE11B4A078DD27759, + 0xB9561DE5FC601331,0x912F1F5A2DA993C0,0x1654DCB65BA2191A,0x3E2DDE098A6B99EB, + 0x8A66D71E0F82E3FE,0x8C51ADB7D55A08D7,0x4533E50F8941FF7F,0x02E6DD67BD4859EC, + 0xE068AABA5DF6D52F,0xC24826E3FF4A75A5,0x6C39070D88ACDDF8,0x6486548C4691A46F, + 0xD1BEBD26135C7C0C,0xB30F93038F15334A,0x82D9849FC1BF9A69,0x9C320BA85420FAE4, + 0xFA528243AFF90767,0x9ED4D6CFE968A308,0xB825FD582C44B147,0x9B7691BC5EDCB3BB, + 0xC7EA619048FE6516,0x1063A61F817AF233,0x47D538683409A693,0x63C2CE984C6DED30, + 0x2A9FDFD86C81D91D,0x7B1E3B06032A6694,0x666089EBFBD9FD83,0x0A598EE67375207B, + 0x07449A140AFC495F,0x2CA8A571B6593234,0x1F986F8A45BBC2FB,0x381AA4A050B372C2, + 0x5423A3ADD81FAF3A,0x17273C0B8B86BB6C,0xFE83258DC869B5A2,0x287902BFD1C980F1, + 0xF5A94BD66B3837AF,0x88800A79B2CABA12,0x55504310083B0D4C,0xDF36940E07B9EEB2, + 0x04D1A7CE6790B2C5,0x612413FFF125B4DC,0x26F12B97C52C124F,0x86082351A62F28AC, + 0xEF93632F9937E5E7,0x3507B052293A1BE6,0xE72C30AE570A9C70,0xD3586041AE1425E0, + 0xDE4574B3D79D4CC4,0x92BA228040C5685A,0xF00B0CA5DC8C271C,0xBE1287F1F69C5A6E, + 0xF39E317FB1E0DC86,0x495D114020EC342D,0x699B407E3F18CD4B,0xDCA3A9D46AD51528, + 0x0D1D14F279896924,0x0000000000000000,0x593EB75FA196C61E,0x2E4E78160B116BD8, + 0x6D4AE7B058887F8E,0xE65FD013872E3E06,0x7A6DDBBBD30EC4E2,0xAC97FC89CAAEF1B1, + 0x09CCB33C1E19DBE1,0x89F3EAC462EE1864,0x7770CF49AA87ADC6,0x56C57ECA6557F6D6, + 0x03953DDA6D6CFB9A,0x36928D884456E07C,0x1EEB8F37959F608D,0x31D6179C4EAAA923, + 0x6FAC3AD7E5C02662,0x43049FA653991456,0xABD3669DC052B8EE,0xAF02C153A7C20A2B, + 0x3CCB036E3723C007,0x93C9C23D90E1CA2C,0xC33BC65E2F6ED7D3,0x4CFF56339758249E, + 0xB1E94E64325D6AA6,0x37E16D359472420A,0x79F8E661BE623F78,0x5214D90402C74413, + 0x482EF1FDF0C8965B,0x13F69BC5EC1609A9,0x0E88292814E592BE,0x4E198B542A107D72, + 0xCCC00FCBEBAFE71B,0x1B49C844222B703E,0x2564164DA840E9D5,0x20C6513E1FF4F966, + 0xBAC3203F910CE8AB,0xF2EDD1C261C47EF0,0x814CB945ACD361F3,0x95FEB8944A392105, + 0x5C9CF02C1622D6AD,0x971865F3F77178E9,0xBD87BA2B9BF0A1F4,0x444005B259655D09, + 0xED75BE48247FBC0B,0x7596122E17CFF42A,0xB44B091785E97A15,0x966B854E2755DA9F, + 0xEEE0839249134791,0x32432A4623C652B9,0xA8465B47AD3E4374,0xF8B45F2412B15E8B, + 0x2417F6F078644BA3,0xFB2162FE7FDDA511,0x4BBBCC279DA46DC1,0x0173E0BDD024A276, + 0x22208C59A2BCA08A,0x8FC4906DB836F34D,0xE4B90D743A6667EA,0x7147B5E0705F46EF, + 0x2782CB2A1508B039,0xEC065EF5F45B1E7D,0x21B5B183CFD05B10,0xDBE733C060295C77, + 0x9FA73672394C017E,0xCF55321186C31C81,0xD8720E1A0D45A7ED,0x3B8F997A3DDF8958, + 0x3AFC79C7EDFB2B2E,0xE9A4198643EF0ECE,0x5F09CDF67B4E2D37,0x4F6A6BE9FA34DF04, + 0xB6ADD47038A123F9,0x8D224D0A057EAAA1,0xC96248B85C1BF7A8,0xE3FD9760309A2EB5, + 0x0B2A6E5BA351820D,0xEB42C4E1FEA75722,0x948D58299A1D8373,0x7FCF9CC864BAD451, + 0xA55B4FB5D4B72A50,0x08BF5381CE3D7997,0x46A6D8D5E42D04E5,0xD22B80FC7E308796, + 0x57B69E77B57354A0,0x3969441D8097D0B4,0x3330CAFBF3E2F0CF,0xE28E77DDE0BE8CC3, + 0x62B12E259C494F46,0xA6CE726FB9DBD1CA,0x41E242C1EED14DBA,0x76032FF47AA30FB0 + }, + { + 0x45B268A93ACDE4CC,0xAF7F0BE884549D08,0x048354B3C1468263,0x925435C2C80EFED2, + 0xEE4E37F27FDFFBA7,0x167A33920C60F14D,0xFB123B52EA03E584,0x4A0CAB53FDBB9007, + 0x9DEAF6380F788A19,0xCB48EC558F0CB32A,0xB59DC4B2D6FEF7E0,0xDCDBCA22F4F3ECB6, + 0x11DF5813549A9C40,0xE33FDEDF568ACED3,0xA0C1C8124322E9C3,0x07A56B8158FA6D0D, + 0x77279579B1E1F3DD,0xD9B18B74422AC004,0xB8EC2D9FFFABC294,0xF4ACF8A82D75914F, + 0x7BBF69B1EF2B6878,0xC4F62FAF487AC7E1,0x76CE809CC67E5D0C,0x6711D88F92E4C14C, + 0x627B99D9243DEDFE,0x234AA5C3DFB68B51,0x909B1F15262DBF6D,0x4F66EA054B62BCB5, + 0x1AE2CF5A52AA6AE8,0xBEA053FBD0CE0148,0xED6808C0E66314C9,0x43FE16CD15A82710, + 0xCD049231A06970F6,0xE7BC8A6C97CC4CB0,0x337CE835FCB3B9C0,0x65DEF2587CC780F3, + 0x52214EDE4132BB50,0x95F15E4390F493DF,0x870839625DD2E0F1,0x41313C1AFB8B66AF, + 0x91720AF051B211BC,0x477D427ED4EEA573,0x2E3B4CEEF6E3BE25,0x82627834EB0BCC43, + 0x9C03E3DD78E724C8,0x2877328AD9867DF9,0x14B51945E243B0F2,0x574B0F88F7EB97E2, + 0x88B6FA989AA4943A,0x19C4F068CB168586,0x50EE6409AF11FAEF,0x7DF317D5C04EABA4, + 0x7A567C5498B4C6A9,0xB6BBFB804F42188E,0x3CC22BCF3BC5CD0B,0xD04336EAAA397713, + 0xF02FAC1BEC33132C,0x2506DBA7F0D3488D,0xD7E65D6BF2C31A1E,0x5EB9B2161FF820F5, + 0x842E0650C46E0F9F,0x716BEB1D9E843001,0xA933758CAB315ED4,0x3FE414FDA2792265, + 0x27C9F1701EF00932,0x73A4C1CA70A771BE,0x94184BA6E76B3D0E,0x40D829FF8C14C87E, + 0x0FBEC3FAC77674CB,0x3616A9634A6A9572,0x8F139119C25EF937,0xF545ED4D5AEA3F9E, + 0xE802499650BA387B,0x6437E7BD0B582E22,0xE6559F89E053E261,0x80AD52E305288DFC, + 0x6DC55A23E34B9935,0xDE14E0F51AD0AD09,0xC6390578A659865E,0x96D7617109487CB1, + 0xE2D6CB3A21156002,0x01E915E5779FAED1,0xADB0213F6A77DCB7,0x9880B76EB9A1A6AB, + 0x5D9F8D248644CF9B,0xFD5E4536C5662658,0xF1C6B9FE9BACBDFD,0xEACD6341BE9979C4, + 0xEFA7221708405576,0x510771ECD88E543E,0xC2BA51CB671F043D,0x0AD482AC71AF5879, + 0xFE787A045CDAC936,0xB238AF338E049AED,0xBD866CC94972EE26,0x615DA6EBBD810290, + 0x3295FDD08B2C1711,0xF834046073BF0AEA,0xF3099329758FFC42,0x1CAEB13E7DCFA934, + 0xBA2307481188832B,0x24EFCE42874CE65C,0x0E57D61FB0E9DA1A,0xB3D1BAD6F99B343C, + 0xC0757B1C893C4582,0x2B510DB8403A9297,0x5C7698C1F1DB614A,0x3E0D0118D5E68CB4, + 0xD60F488E855CB4CF,0xAE961E0DF3CB33D9,0x3A8E55AB14A00ED7,0x42170328623789C1, + 0x838B6DD19C946292,0x895FEF7DED3B3AEB,0xCFCBB8E64E4A3149,0x064C7E642F65C3DC, + 0x3D2B3E2A4C5A63DA,0x5BD3F340A9210C47,0xB474D157A1615931,0xAC5934DA1DE87266, + 0x6EE365117AF7765B,0xC86ED36716B05C44,0x9BA6885C201D49C5,0xB905387A88346C45, + 0x131072C4BAB9DDFF,0xBF49461EA751AF99,0xD52977BC1CE05BA1,0xB0F785E46027DB52, + 0x546D30BA6E57788C,0x305AD707650F56AE,0xC987C682612FF295,0xA5AB8944F5FBC571, + 0x7ED528E759F244CA,0x8DDCBBCE2C7DB888,0xAA154ABE328DB1BA,0x1E619BE993ECE88B, + 0x09F2BD9EE813B717,0x7401AA4B285D1CB3,0x21858F143195CAEE,0x48C381841398D1B8, + 0xFCB750D3B2F98889,0x39A86A998D1CE1B9,0x1F888E0CE473465A,0x7899568376978716, + 0x02CF2AD7EE2341BF,0x85C713B5B3F1A14E,0xFF916FE12B4567E7,0x7C1A0230B7D10575, + 0x0C98FCC85ECA9BA5,0xA3E7F720DA9E06AD,0x6A6031A2BBB1F438,0x973E74947ED7D260, + 0x2CF4663918C0FF9A,0x5F50A7F368678E24,0x34D983B4A449D4CD,0x68AF1B755592B587, + 0x7F3C3D022E6DEA1B,0xABFC5F5B45121F6B,0x0D71E92D29553574,0xDFFDF5106D4F03D8, + 0x081BA87B9F8C19C6,0xDB7EA1A3AC0981BB,0xBBCA12AD66172DFA,0x79704366010829C7, + 0x179326777BFF5F9C,0x0000000000000000,0xEB2476A4C906D715,0x724DD42F0738DF6F, + 0xB752EE6538DDB65F,0x37FFBC863DF53BA3,0x8EFA84FCB5C157E6,0xE9EB5C73272596AA, + 0x1B0BDABF2535C439,0x86E12C872A4D4E20,0x9969A28BCE3E087A,0xFAFB2EB79D9C4B55, + 0x056A4156B6D92CB2,0x5A3AE6A5DEBEA296,0x22A3B026A8292580,0x53C85B3B36AD1581, + 0xB11E900117B87583,0xC51F3A4A3FE56930,0xE019E1EDCF3621BD,0xEC811D2591FCBA18, + 0x445B7D4C4D524A1D,0xA8DA6069DCAEF005,0x58F5CC72309DE329,0xD4C062596B7FF570, + 0xCE22AD0339D59F98,0x591CD99747024DF8,0x8B90C5AA03187B54,0xF663D27FC356D0F0, + 0xD8589E9135B56ED5,0x35309651D3D67A1C,0x12F96721CD26732E,0xD28C1C3D441A36AC, + 0x492A946164077F69,0x2D1D73DC6F5F514B,0x6F0A70F40D68D88A,0x60B4B30ECA1EAC41, + 0xD36509D83385987D,0x0B3D97490630F6A8,0x9ECCC90A96C46577,0xA20EE2C5AD01A87C, + 0xE49AB55E0E70A3DE,0xA4429CA182646BA0,0xDA97B446DB962F6A,0xCCED87D4D7F6DE27, + 0x2AB8185D37A53C46,0x9F25DCEFE15BCBA6,0xC19C6EF9FEA3EB53,0xA764A3931BD884CE, + 0x2FD2590B817C10F4,0x56A21A6D80743933,0xE573A0BB79EF0D0F,0x155C0CA095DC1E23, + 0x6C2C4FC694D437E4,0x10364DF623053291,0xDD32DFC7836C4267,0x03263F3299BCEF6E, + 0x66F8CD6AE57B6F9D,0x8C35AE2B5BE21659,0x31B3C2E21290F87F,0x93BD2027BF915003, + 0x69460E90220D1B56,0x299E276FAE19D328,0x63928C3C53A2432F,0x7082FEF8E91B9ED0, + 0xBC6F792C3EED40F7,0x4C40D537D2DE53DB,0x75E8BFAE5FC2B262,0x4DA9C0D2A541FD0A, + 0x4E8FFFE03CFD1264,0x2620E495696FA7E3,0xE1F0F408B8A98F6C,0xD1AA230FDDA6D9C2, + 0xC7D0109DD1C6288F,0x8A79D04F7487D585,0x4694579BA3710BA2,0x38417F7CFA834F68, + 0x1D47A4DB0A5007E5,0x206C9AF1460A643F,0xA128DDF734BD4712,0x8144470672B7232D, + 0xF2E086CC02105293,0x182DE58DBC892B57,0xCAA1F9B0F8931DFB,0x6B892447CC2E5AE9, + 0xF9DD11850420A43B,0x4BE5BEB68A243ED6,0x5584255F19C8D65D,0x3B67404E633FA006, + 0xA68DB6766C472A1F,0xF78AC79AB4C97E21,0xC353442E1080AAEC,0x9A4F9DB95782E714 + }, + { + 0x05BA7BC82C9B3220,0x31A54665F8B65E4F,0xB1B651F77547F4D4,0x8BFA0D857BA46682, + 0x85A96C5AA16A98BB,0x990FAEF908EB79C9,0xA15E37A247F4A62D,0x76857DCD5D27741E, + 0xF8C50B800A1820BC,0xBE65DCB201F7A2B4,0x666D1B986F9426E7,0x4CC921BF53C4E648, + 0x95410A0F93D9CA42,0x20CDCCAA647BA4EF,0x429A4060890A1871,0x0C4EA4F69B32B38B, + 0xCCDA362DDE354CD3,0x96DC23BC7C5B2FA9,0xC309BB68AA851AB3,0xD26131A73648E013, + 0x021DC52941FC4DB2,0xCD5ADAB7704BE48A,0xA77965D984ED71E6,0x32386FD61734BBA4, + 0xE82D6DD538AB7245,0x5C2147EA6177B4B1,0x5DA1AB70CF091CE8,0xAC907FCE72B8BDFF, + 0x57C85DFD972278A8,0xA4E44C6A6B6F940D,0x3851995B4F1FDFE4,0x62578CCAED71BC9E, + 0xD9882BB0C01D2C0A,0x917B9D5D113C503B,0xA2C31E11A87643C6,0xE463C923A399C1CE, + 0xF71686C57EA876DC,0x87B4A973E096D509,0xAF0D567D9D3A5814,0xB40C2A3F59DCC6F4, + 0x3602F88495D121DD,0xD3E1DD3D9836484A,0xF945E71AA46688E5,0x7518547EB2A591F5, + 0x9366587450C01D89,0x9EA81018658C065B,0x4F54080CBC4603A3,0x2D0384C65137BF3D, + 0xDC325078EC861E2A,0xEA30A8FC79573FF7,0x214D2030CA050CB6,0x65F0322B8016C30C, + 0x69BE96DD1B247087,0xDB95EE9981E161B8,0xD1FC1814D9CA05F8,0x820ED2BBCC0DE729, + 0x63D76050430F14C7,0x3BCCB0E8A09D3A0F,0x8E40764D573F54A2,0x39D175C1E16177BD, + 0x12F5A37C734F1F4B,0xAB37C12F1FDFC26D,0x5648B167395CD0F1,0x6C04ED1537BF42A7, + 0xED97161D14304065,0x7D6C67DAAB72B807,0xEC17FA87BA4EE83C,0xDFAF79CB0304FBC1, + 0x733F060571BC463E,0x78D61C1287E98A27,0xD07CF48E77B4ADA1,0xB9C262536C90DD26, + 0xE2449B5860801605,0x8FC09AD7F941FCFB,0xFAD8CEA94BE46D0E,0xA343F28B0608EB9F, + 0x9B126BD04917347B,0x9A92874AE7699C22,0x1B017C42C4E69EE0,0x3A4C5C720EE39256, + 0x4B6E9F5E3EA399DA,0x6BA353F45AD83D35,0xE7FEE0904C1B2425,0x22D009832587E95D, + 0x842980C00F1430E2,0xC6B3C0A0861E2893,0x087433A419D729F2,0x341F3DADD42D6C6F, + 0xEE0A3FAEFBB2A58E,0x4AEE73C490DD3183,0xAAB72DB5B1A16A34,0xA92A04065E238FDF, + 0x7B4B35A1686B6FCC,0x6A23BF6EF4A6956C,0x191CB96B851AD352,0x55D598D4D6DE351A, + 0xC9604DE5F2AE7EF3,0x1CA6C2A3A981E172,0xDE2F9551AD7A5398,0x3025AAFF56C8F616, + 0x15521D9D1E2860D9,0x506FE31CFA45073A,0x189C55F12B647B0B,0x0180EC9AAE7EA859, + 0x7CEC8B40050C105E,0x2350E5198BF94104,0xEF8AD33455CC0DD7,0x07A7BEE16D677F92, + 0xE5E325B90DE76997,0x5A061591A26E637A,0xB611EF1618208B46,0x09F4DF3EB7A981AB, + 0x1EBB078AE87DACC0,0xB791038CB65E231F,0x0FD38D4574B05660,0x67EDF702C1EA8EBE, + 0xBA5F4BE0831238CD,0xE3C477C2CEFEBE5C,0x0DCE486C354C1BD2,0x8C5DB36416C31910, + 0x26EA9ED1A7627324,0x039D29B3EF82E5EB,0x9F28FC82CBF2AE02,0xA8AAE89CF05D2786, + 0x431AACFA2774B028,0xCF471F9E31B7A938,0x581BD0B8E3922EC8,0xBC78199B400BEF06, + 0x90FB71C7BF42F862,0x1F3BEB1046030499,0x683E7A47B55AD8DE,0x988F4263A695D190, + 0xD808C72A6E638453,0x0627527BC319D7CB,0xEBB04466D72997AE,0xE67E0C0AE2658C7C, + 0x14D2F107B056C880,0x7122C32C30400B8C,0x8A7AE11FD5DACEDB,0xA0DEDB38E98A0E74, + 0xAD109354DCC615A6,0x0BE91A17F655CC19,0x8DDD5FFEB8BDB149,0xBFE53028AF890AED, + 0xD65BA6F5B4AD7A6A,0x7956F0882997227E,0x10E8665532B352F9,0x0E5361DFDACEFE39, + 0xCEC7F3049FC90161,0xFF62B561677F5F2E,0x975CCF26D22587F0,0x51EF0F86543BAF63, + 0x2F1E41EF10CBF28F,0x52722635BBB94A88,0xAE8DBAE73344F04D,0x410769D36688FD9A, + 0xB3AB94DE34BBB966,0x801317928DF1AA9B,0xA564A0F0C5113C54,0xF131D4BEBDB1A117, + 0x7F71A2F3EA8EF5B5,0x40878549C8F655C3,0x7EF14E6944F05DEC,0xD44663DCF55137D8, + 0xF2ACFD0D523344FC,0x0000000000000000,0x5FBC6E598EF5515A,0x16CF342EF1AA8532, + 0xB036BD6DDB395C8D,0x13754FE6DD31B712,0xBBDFA77A2D6C9094,0x89E7C8AC3A582B30, + 0x3C6B0E09CDFA459D,0xC4AE0589C7E26521,0x49735A777F5FD468,0xCAFD64561D2C9B18, + 0xDA1502032F9FC9E1,0x8867243694268369,0x3782141E3BAF8984,0x9CB5D53124704BE9, + 0xD7DB4A6F1AD3D233,0xA6F989432A93D9BF,0x9D3539AB8A0EE3B0,0x53F2CAAF15C7E2D1, + 0x6E19283C76430F15,0x3DEBE2936384EDC4,0x5E3C82C3208BF903,0x33B8834CB94A13FD, + 0x6470DEB12E686B55,0x359FD1377A53C436,0x61CAA57902F35975,0x043A975282E59A79, + 0xFD7F70482683129C,0xC52EE913699CCD78,0x28B9FF0E7DAC8D1D,0x5455744E78A09D43, + 0xCB7D88CCB3523341,0x44BD121B4A13CFBA,0x4D49CD25FDBA4E11,0x3E76CB208C06082F, + 0x3FF627BA2278A076,0xC28957F204FBB2EA,0x453DFE81E46D67E3,0x94C1E6953DA7621B, + 0x2C83685CFF491764,0xF32C1197FC4DECA5,0x2B24D6BD922E68F6,0xB22B78449AC5113F, + 0x48F3B6EDD1217C31,0x2E9EAD75BEB55AD6,0x174FD8B45FD42D6B,0x4ED4E4961238ABFA, + 0x92E6B4EEFEBEB5D0,0x46A0D7320BEF8208,0x47203BA8A5912A51,0x24F75BF8E69E3E96, + 0xF0B1382413CF094E,0xFEE259FBC901F777,0x276A724B091CDB7D,0xBDF8F501EE75475F, + 0x599B3C224DEC8691,0x6D84018F99C1EAFE,0x7498B8E41CDB39AC,0xE0595E71217C5BB7, + 0x2AA43A273C50C0AF,0xF50B43EC3F543B6E,0x838E3E2162734F70,0xC09492DB4507FF58, + 0x72BFEA9FDFC2EE67,0x11688ACF9CCDFAA0,0x1A8190D86A9836B9,0x7ACBD93BC615C795, + 0xC7332C3A286080CA,0x863445E94EE87D50,0xF6966A5FD0D6DE85,0xE9AD814F96D5DA1C, + 0x70A22FB69E3EA3D5,0x0A69F68D582B6440,0xB8428EC9C2EE757F,0x604A49E3AC8DF12C, + 0x5B86F90B0C10CB23,0xE1D9B2EB8F02F3EE,0x29391394D3D22544,0xC8E0A17F5CD0D6AA, + 0xB58CC6A5F7A26EAD,0x8193FB08238F02C2,0xD5C68F465B2F9F81,0xFCFF9CD288FDBAC5, + 0x77059157F359DC47,0x1D262E3907FF492B,0xFB582233E59AC557,0xDDB2BCE242F8B673, + 0x2577B76248E096CF,0x6F99C4A6D83DA74C,0xC1147E41EB795701,0xF48BAF76912A9337 + }, + { + 0x3EF29D249B2C0A19,0xE9E16322B6F8622F,0x5536994047757F7A,0x9F4D56D5A47B0B33, + 0x822567466AA1174C,0xB8F5057DEB082FB2,0xCC48C10BF4475F53,0x373088D4275DEC3A, + 0x968F4325180AED10,0x173D232CF7016151,0xAE4ED09F946FCC13,0xFD4B4741C4539873, + 0x1B5B3F0DD9933765,0x2FFCB0967B644052,0xE02376D20A89840C,0xA3AE3A70329B18D7, + 0x419CBD2335DE8526,0xFAFEBF115B7C3199,0x0397074F85AA9B0D,0xC58AD4FB4836B970, + 0xBEC60BE3FC4104A8,0x1EFF36DC4B708772,0x131FDC33ED8453B6,0x0844E33E341764D3, + 0x0FF11B6EAB38CD39,0x64351F0A7761B85A,0x3B5694F509CFBA0E,0x30857084B87245D0, + 0x47AFB3BD2297AE3C,0xF2BA5C2F6F6B554A,0x74BDC4761F4F70E1,0xCFDFC64471EDC45E, + 0xE610784C1DC0AF16,0x7ACA29D63C113F28,0x2DED411776A859AF,0xAC5F211E99A3D5EE, + 0xD484F949A87EF33B,0x3CE36CA596E013E4,0xD120F0983A9D432C,0x6BC40464DC597563, + 0x69D5F5E5D1956C9E,0x9AE95F043698BB24,0xC9ECC8DA66A4EF44,0xD69508C8A5B2EAC6, + 0xC40C2235C0503B80,0x38C193BA8C652103,0x1CEEC75D46BC9E8F,0xD331011937515AD1, + 0xD8E2E56886ECA50F,0xB137108D5779C991,0x709F3B6905CA4206,0x4FEB50831680CAEF, + 0xEC456AF3241BD238,0x58D673AFE181ABBE,0x242F54E7CAD9BF8C,0x0211F1810DCC19FD, + 0x90BC4DBB0F43C60A,0x9518446A9DA0761D,0xA1BFCBF13F57012A,0x2BDE4F8961E172B5, + 0x27B853A84F732481,0xB0B1E643DF1F4B61,0x18CC38425C39AC68,0xD2B7F7D7BF37D821, + 0x3103864A3014C720,0x14AA246372ABFA5C,0x6E600DB54EBAC574,0x394765740403A3F3, + 0x09C215F0BC71E623,0x2A58B947E987F045,0x7B4CDF18B477BDD8,0x9709B5EB906C6FE0, + 0x73083C268060D90B,0xFEDC400E41F9037E,0x284948C6E44BE9B8,0x728ECAE808065BFB, + 0x06330E9E17492B1A,0x5950856169E7294E,0xBAE4F4FCE6C4364F,0xCA7BCF95E30E7449, + 0x7D7FD186A33E96C2,0x52836110D85AD690,0x4DFAA1021B4CD312,0x913ABB75872544FA, + 0xDD46ECB9140F1518,0x3D659A6B1E869114,0xC23F2CABD719109A,0xD713FE062DD46836, + 0xD0A60656B2FBC1DC,0x221C5A79DD909496,0xEFD26DBCA1B14935,0x0E77EDA0235E4FC9, + 0xCBFD395B6B68F6B9,0x0DE0EAEFA6F4D4C4,0x0422FF1F1A8532E7,0xF969B85EDED6AA94, + 0x7F6E2007AEF28F3F,0x3AD0623B81A938FE,0x6624EE8B7AADA1A7,0xB682E8DDC856607B, + 0xA78CC56F281E2A30,0xC79B257A45FAA08D,0x5B4174E0642B30B3,0x5F638BFF7EAE0254, + 0x4BC9AF9C0C05F808,0xCE59308AF98B46AE,0x8FC58DA9CC55C388,0x803496C7676D0EB1, + 0xF33CAAE1E70DD7BA,0xBB6202326EA2B4BF,0xD5020F87201871CB,0x9D5CA754A9B712CE, + 0x841669D87DE83C56,0x8A6184785EB6739F,0x420BBA6CB0741E2B,0xF12D5B60EAC1CE47, + 0x76AC35F71283691C,0x2C6BB7D9FECEDB5F,0xFCCDB18F4C351A83,0x1F79C012C3160582, + 0xF0ABADAE62A74CB7,0xE1A5801C82EF06FC,0x67A21845F2CB2357,0x5114665F5DF04D9D, + 0xBF40FD2D74278658,0xA0393D3FB73183DA,0x05A409D192E3B017,0xA9FB28CF0B4065F9, + 0x25A9A22942BF3D7C,0xDB75E22703463E02,0xB326E10C5AB5D06C,0xE7968E8295A62DE6, + 0xB973F3B3636EAD42,0xDF571D3819C30CE5,0xEE549B7229D7CBC5,0x12992AFD65E2D146, + 0xF8EF4E9056B02864,0xB7041E134030E28B,0xC02EDD2ADAD50967,0x932B4AF48AE95D07, + 0x6FE6FB7BC6DC4784,0x239AACB755F61666,0x401A4BEDBDB807D6,0x485EA8D389AF6305, + 0xA41BC220ADB4B13D,0x753B32B89729F211,0x997E584BB3322029,0x1D683193CEDA1C7F, + 0xFF5AB6C0C99F818E,0x16BBD5E27F67E3A1,0xA59D34EE25D233CD,0x98F8AE853B54A2D9, + 0x6DF70AFACB105E79,0x795D2E99B9BBA425,0x8E437B6744334178,0x0186F6CE886682F0, + 0xEBF092A3BB347BD2,0xBCD7FA62F18D1D55,0xADD9D7D011C5571E,0x0BD3E471B1BDFFDE, + 0xAA6C2F808EEAFEF4,0x5EE57D31F6C880A4,0xF50FA47FF044FCA0,0x1ADDC9C351F5B595, + 0xEA76646D3352F922,0x0000000000000000,0x85909F16F58EBEA6,0x46294573AAF12CCC, + 0x0A5512BF39DB7D2E,0x78DBD85731DD26D5,0x29CFBE086C2D6B48,0x218B5D36583A0F9B, + 0x152CD2ADFACD78AC,0x83A39188E2C795BC,0xC3B9DA655F7F926A,0x9ECBA01B2C1D89C3, + 0x07B5F8509F2FA9EA,0x7EE8D6C926940DCF,0x36B67E1AAF3B6ECA,0x86079859702425AB, + 0xFB7849DFD31AB369,0x4C7C57CC932A51E2,0xD96413A60E8A27FF,0x263EA566C715A671, + 0x6C71FC344376DC89,0x4A4F595284637AF8,0xDAF314E98B20BCF2,0x572768C14AB96687, + 0x1088DB7C682EC8BB,0x887075F9537A6A62,0x2E7A4658F302C2A2,0x619116DBE582084D, + 0xA87DDE018326E709,0xDCC01A779C6997E8,0xEDC39C3DAC7D50C8,0xA60A33A1A078A8C0, + 0xC1A82BE452B38B97,0x3F746BEA134A88E9,0xA228CCBEBAFD9A27,0xABEAD94E068C7C04, + 0xF48952B178227E50,0x5CF48CB0FB049959,0x6017E0156DE48ABD,0x4438B4F2A73D3531, + 0x8C528AE649FF5885,0xB515EF924DFCFB76,0x0C661C212E925634,0xB493195CC59A7986, + 0x9CDA519A21D1903E,0x32948105B5BE5C2D,0x194ACE8CD45F2E98,0x438D4CA238129CDB, + 0x9B6FA9CABEFE39D4,0x81B26009EF0B8C41,0xDED1EBF691A58E15,0x4E6DA64D9EE6481F, + 0x54B06F8ECF13FD8A,0x49D85E1D01C9E1F5,0xAFC826511C094EE3,0xF698A33075EE67AD, + 0x5AC7822EEC4DB243,0x8DD47C28C199DA75,0x89F68337DB1CE892,0xCDCE37C57C21DDA3, + 0x530597DE503C5460,0x6A42F2AA543FF793,0x5D727A7E73621BA9,0xE232875307459DF1, + 0x56A19E0FC2DFE477,0xC61DD3B4CD9C227D,0xE5877F03986A341B,0x949EB2A415C6F4ED, + 0x6206119460289340,0x6380E75AE84E11B0,0x8BE772B6D6D0F16F,0x50929091D596CF6D, + 0xE86795EC3E9EE0DF,0x7CF927482B581432,0xC86A3E14EEC26DB4,0x7119CDA78DACC0F6, + 0xE40189CD100CB6EB,0x92ADBC3A028FDFF7,0xB2A017C2D2D3529C,0x200DABF8D05C8D6B, + 0x34A78F9BA2F77737,0xE3B4719D8F231F01,0x45BE423C2F5BB7C1,0xF71E55FEFD88E55D, + 0x6853032B59F3EE6E,0x65B3E9C4FF073AAA,0x772AC3399AE5EBEC,0x87816E97F842A75B, + 0x110E2DB2E0484A4B,0x331277CB3DD8DEDD,0xBD510CAC79EB9FA5,0x352179552A91F5C7 + }, + { + 0x8AB0A96846E06A6D,0x43C7E80B4BF0B33A,0x08C9B3546B161EE5,0x39F1C235EBA990BE, + 0xC1BEF2376606C7B2,0x2C209233614569AA,0xEB01523B6FC3289A,0x946953AB935ACEDD, + 0x272838F63E13340E,0x8B0455ECA12BA052,0x77A1B2C4978FF8A2,0xA55122CA13E54086, + 0x2276135862D3F1CD,0xDB8DDFDE08B76CFE,0x5D1E12C89E4A178A,0x0E56816B03969867, + 0xEE5F79953303ED59,0xAFED748BAB78D71D,0x6D929F2DF93E53EE,0xF5D8A8F8BA798C2A, + 0xF619B1698E39CF6B,0x95DDAF2F749104E2,0xEC2A9C80E0886427,0xCE5C8FD8825B95EA, + 0xC4E0D9993AC60271,0x4699C3A5173076F9,0x3D1B151F50A29F42,0x9ED505EA2BC75946, + 0x34665ACFDC7F4B98,0x61B1FB53292342F7,0xC721C0080E864130,0x8693CD1696FD7B74, + 0x872731927136B14B,0xD3446C8A63A1721B,0x669A35E8A6680E4A,0xCAB658F239509A16, + 0xA4E5DE4EF42E8AB9,0x37A7435EE83F08D9,0x134E6239E26C7F96,0x82791A3C2DF67488, + 0x3F6EF00A8329163C,0x8E5A7E42FDEB6591,0x5CAAEE4C7981DDB5,0x19F234785AF1E80D, + 0x255DDDE3ED98BD70,0x50898A32A99CCCAC,0x28CA4519DA4E6656,0xAE59880F4CB31D22, + 0x0D9798FA37D6DB26,0x32F968F0B4FFCD1A,0xA00F09644F258545,0xFA3AD5175E24DE72, + 0xF46C547C5DB24615,0x713E80FBFF0F7E20,0x7843CF2B73D2AAFA,0xBD17EA36AEDF62B4, + 0xFD111BACD16F92CF,0x4ABAA7DBC72D67E0,0xB3416B5DAD49FAD3,0xBCA316B24914A88B, + 0x15D150068AECF914,0xE27C1DEBE31EFC40,0x4FE48C759BEDA223,0x7EDCFD141B522C78, + 0x4E5070F17C26681C,0xE696CAC15815F3BC,0x35D2A64B3BB481A7,0x800CFF29FE7DFDF6, + 0x1ED9FAC3D5BAA4B0,0x6C2663A91EF599D1,0x03C1199134404341,0xF7AD4DED69F20554, + 0xCD9D9649B61BD6AB,0xC8C3BDE7EADB1368,0xD131899FB02AFB65,0x1D18E352E1FAE7F1, + 0xDA39235AEF7CA6C1,0xA1BBF5E0A8EE4F7A,0x91377805CF9A0B1E,0x3138716180BF8E5B, + 0xD9F83ACBDB3CE580,0x0275E515D38B897E,0x472D3F21F0FBBCC6,0x2D946EB7868EA395, + 0xBA3C248D21942E09,0xE7223645BFDE3983,0xFF64FEB902E41BB1,0xC97741630D10D957, + 0xC3CB1722B58D4ECC,0xA27AEC719CAE0C3B,0x99FECB51A48C15FB,0x1465AC826D27332B, + 0xE1BD047AD75EBF01,0x79F733AF941960C5,0x672EC96C41A3C475,0xC27FEBA6524684F3, + 0x64EFD0FD75E38734,0xED9E60040743AE18,0xFB8E2993B9EF144D,0x38453EB10C625A81, + 0x6978480742355C12,0x48CF42CE14A6EE9E,0x1CAC1FD606312DCE,0x7B82D6BA4792E9BB, + 0x9D141C7B1F871A07,0x5616B80DC11C4A2E,0xB849C198F21FA777,0x7CA91801C8D9A506, + 0xB1348E487EC273AD,0x41B20D1E987B3A44,0x7460AB55A3CFBBE3,0x84E628034576F20A, + 0x1B87D16D897A6173,0x0FE27DEFE45D5258,0x83CDE6B8CA3DBEB7,0x0C23647ED01D1119, + 0x7A362A3EA0592384,0xB61F40F3F1893F10,0x75D457D1440471DC,0x4558DA34237035B8, + 0xDCA6116587FC2043,0x8D9B67D3C9AB26D0,0x2B0B5C88EE0E2517,0x6FE77A382AB5DA90, + 0x269CC472D9D8FE31,0x63C41E46FAA8CB89,0xB7ABBC771642F52F,0x7D1DE4852F126F39, + 0xA8C6BA3024339BA0,0x600507D7CEE888C8,0x8FEE82C61A20AFAE,0x57A2448926D78011, + 0xFCA5E72836A458F0,0x072BCEBB8F4B4CBD,0x497BBE4AF36D24A1,0x3CAFE99BB769557D, + 0x12FA9EBD05A7B5A9,0xE8C04BAA5B836BDB,0x4273148FAC3B7905,0x908384812851C121, + 0xE557D3506C55B0FD,0x72FF996ACB4F3D61,0x3EDA0C8E64E2DC03,0xF0868356E6B949E9, + 0x04EAD72ABB0B0FFC,0x17A4B5135967706A,0xE3C8E16F04D5367F,0xF84F30028DAF570C, + 0x1846C8FCBD3A2232,0x5B8120F7F6CA9108,0xD46FA231ECEA3EA6,0x334D947453340725, + 0x58403966C28AD249,0xBED6F3A79A9F21F5,0x68CCB483A5FE962D,0xD085751B57E1315A, + 0xFED0023DE52FD18E,0x4B0E5B5F20E6ADDF,0x1A332DE96EB1AB4C,0xA3CE10F57B65C604, + 0x108F7BA8D62C3CD7,0xAB07A3A11073D8E1,0x6B0DAD1291BED56C,0xF2F366433532C097, + 0x2E557726B2CEE0D4,0x0000000000000000,0xCB02A476DE9B5029,0xE4E32FD48B9E7AC2, + 0x734B65EE2C84F75E,0x6E5386BCCD7E10AF,0x01B4FC84E7CBCA3F,0xCFE8735C65905FD5, + 0x3613BFDA0FF4C2E6,0x113B872C31E7F6E8,0x2FE18BA255052AEB,0xE974B72EBC48A1E4, + 0x0ABC5641B89D979B,0xB46AA5E62202B66E,0x44EC26B0C4BBFF87,0xA6903B5B27A503C7, + 0x7F680190FC99E647,0x97A84A3AA71A8D9C,0xDD12EDE16037EA7C,0xC554251DDD0DC84E, + 0x88C54C7D956BE313,0x4D91696048662B5D,0xB08072CC9909B992,0xB5DE5962C5C97C51, + 0x81B803AD19B637C9,0xB2F597D94A8230EC,0x0B08AAC55F565DA4,0xF1327FD2017283D6, + 0xAD98919E78F35E63,0x6AB9519676751F53,0x24E921670A53774F,0xB9FD3D1C15D46D48, + 0x92F66194FBDA485F,0x5A35DC7311015B37,0xDED3F4705477A93D,0xC00A0EB381CD0D8D, + 0xBB88D809C65FE436,0x16104997BEACBA55,0x21B70AC95693B28C,0x59F4C5E225411876, + 0xD5DB5EB50B21F499,0x55D7A19CF55C096F,0xA97246B4C3F8519F,0x8552D487A2BD3835, + 0x54635D181297C350,0x23C2EFDC85183BF2,0x9F61F96ECC0C9379,0x534893A39DDC8FED, + 0x5EDF0B59AA0A54CB,0xAC2C6D1A9F38945C,0xD7AEBBA0D8AA7DE7,0x2ABFA00C09C5EF28, + 0xD84CC64F3CF72FBF,0x2003F64DB15878B3,0xA724C7DFC06EC9F8,0x069F323F68808682, + 0xCC296ACD51D01C94,0x055E2BAE5CC0C5C3,0x6270E2C21D6301B6,0x3B842720382219C0, + 0xD2F0900E846AB824,0x52FC6F277A1745D2,0xC6953C8CE94D8B0F,0xE009F8FE3095753E, + 0x655B2C7992284D0B,0x984A37D54347DFC4,0xEAB5AEBF8808E2A5,0x9A3FD2C090CC56BA, + 0x9CA0E0FFF84CD038,0x4C2595E4AFADE162,0xDF6708F4B3BC6302,0xBF620F237D54EBCA, + 0x93429D101C118260,0x097D4FD08CDDD4DA,0x8C2F9B572E60ECEF,0x708A7C7F18C4B41F, + 0x3A30DBA4DFE9D3FF,0x4006F19A7FB0F07B,0x5F6BF7DD4DC19EF4,0x1F6D064732716E8F, + 0xF9FBCC866A649D33,0x308C8DE567744464,0x8971B0F972A0292C,0xD61A47243F61B7D8, + 0xEFEB8511D4C82766,0x961CB6BE40D147A3,0xAAB35F25F7B812DE,0x76154E407044329D, + 0x513D76B64E570693,0xF3479AC7D2F90AA8,0x9B8B2E4477079C85,0x297EB99D3D85AC69 + }, + { + 0x7E37E62DFC7D40C3,0x776F25A4EE939E5B,0xE045C850DD8FB5AD,0x86ED5BA711FF1952, + 0xE91D0BD9CF616B35,0x37E0AB256E408FFB,0x9607F6C031025A7A,0x0B02F5E116D23C9D, + 0xF3D8486BFB50650C,0x621CFF27C40875F5,0x7D40CB71FA5FD34A,0x6DAA6616DAA29062, + 0x9F5F354923EC84E2,0xEC847C3DC507C3B3,0x025A3668043CE205,0xA8BF9E6C4DAC0B19, + 0xFA808BE2E9BEBB94,0xB5B99C5277C74FA3,0x78D9BC95F0397BCC,0xE332E50CDBAD2624, + 0xC74FCE129332797E,0x1729ECEB2EA709AB,0xC2D6B9F69954D1F8,0x5D898CBFBAB8551A, + 0x859A76FB17DD8ADB,0x1BE85886362F7FB5,0xF6413F8FF136CD8A,0xD3110FA5BBB7E35C, + 0x0A2FEED514CC4D11,0xE83010EDCD7F1AB9,0xA1E75DE55F42D581,0xEEDE4A55C13B21B6, + 0xF2F5535FF94E1480,0x0CC1B46D1888761E,0xBCE15FDB6529913B,0x2D25E8975A7181C2, + 0x71817F1CE2D7A554,0x2E52C5CB5C53124B,0xF9F7A6BEEF9C281D,0x9E722E7D21F2F56E, + 0xCE170D9B81DCA7E6,0x0E9B82051CB4941B,0x1E712F623C49D733,0x21E45CFA42F9F7DC, + 0xCB8E7A7F8BBA0F60,0x8E98831A010FB646,0x474CCF0D8E895B23,0xA99285584FB27A95, + 0x8CC2B57205335443,0x42D5B8E984EFF3A5,0x012D1B34021E718C,0x57A6626AAE74180B, + 0xFF19FC06E3D81312,0x35BA9D4D6A7C6DFE,0xC9D44C178F86ED65,0x506523E6A02E5288, + 0x03772D5C06229389,0x8B01F4FE0B691EC0,0xF8DABD8AED825991,0x4C4E3AEC985B67BE, + 0xB10DF0827FBF96A9,0x6A69279AD4F8DAE1,0xE78689DCD3D5FF2E,0x812E1A2B1FA553D1, + 0xFBAD90D6EBA0CA18,0x1AC543B234310E39,0x1604F7DF2CB97827,0xA6241C6951189F02, + 0x753513CCEAAF7C5E,0x64F2A59FC84C4EFA,0x247D2B1E489F5F5A,0xDB64D718AB474C48, + 0x79F4A7A1F2270A40,0x1573DA832A9BEBAE,0x3497867968621C72,0x514838D2A2302304, + 0xF0AF6537FD72F685,0x1D06023E3A6B44BA,0x678588C3CE6EDD73,0x66A893F7CC70ACFF, + 0xD4D24E29B5EDA9DF,0x3856321470EA6A6C,0x07C3418C0E5A4A83,0x2BCBB22F5635BACD, + 0x04B46CD00878D90A,0x06EE5AB80C443B0F,0x3B211F4876C8F9E5,0x0958C38912EEDE98, + 0xD14B39CDBF8B0159,0x397B292072F41BE0,0x87C0409313E168DE,0xAD26E98847CAA39F, + 0x4E140C849C6785BB,0xD5FF551DB7F3D853,0xA0CA46D15D5CA40D,0xCD6020C787FE346F, + 0x84B76DCF15C3FB57,0xDEFDA0FCA121E4CE,0x4B8D7B6096012D3D,0x9AC642AD298A2C64, + 0x0875D8BD10F0AF14,0xB357C6EA7B8374AC,0x4D6321D89A451632,0xEDA96709C719B23F, + 0xF76C24BBF328BC06,0xC662D526912C08F2,0x3CE25EC47892B366,0xB978283F6F4F39BD, + 0xC08C8F9E9D6833FD,0x4F3917B09E79F437,0x593DE06FB2C08C10,0xD6887841B1D14BDA, + 0x19B26EEE32139DB0,0xB494876675D93E2F,0x825937771987C058,0x90E9AC783D466175, + 0xF1827E03FF6C8709,0x945DC0A8353EB87F,0x4516F9658AB5B926,0x3F9573987EB020EF, + 0xB855330B6D514831,0x2AE6A91B542BCB41,0x6331E413C6160479,0x408F8E8180D311A0, + 0xEFF35161C325503A,0xD06622F9BD9570D5,0x8876D9A20D4B8D49,0xA5533135573A0C8B, + 0xE168D364DF91C421,0xF41B09E7F50A2F8F,0x12B09B0F24C1A12D,0xDA49CC2CA9593DC4, + 0x1F5C34563E57A6BF,0x54D14F36A8568B82,0xAF7CDFE043F6419A,0xEA6A2685C943F8BC, + 0xE5DCBFB4D7E91D2B,0xB27ADDDE799D0520,0x6B443CAED6E6AB6D,0x7BAE91C9F61BE845, + 0x3EB868AC7CAE5163,0x11C7B65322E332A4,0xD23C1491B9A992D0,0x8FB5982E0311C7CA, + 0x70AC6428E0C9D4D8,0x895BC2960F55FCC5,0x76423E90EC8DEFD7,0x6FF0507EDE9E7267, + 0x3DCF45F07A8CC2EA,0x4AA06054941F5CB1,0x5810FB5BB0DEFD9C,0x5EFEA1E3BC9AC693, + 0x6EDD4B4ADC8003EB,0x741808F8E8B10DD2,0x145EC1B728859A22,0x28BC9F7350172944, + 0x270A06424EBDCCD3,0x972AEDF4331C2BF6,0x059977E40A66A886,0x2550302A4A812ED6, + 0xDD8A8DA0A7037747,0xC515F87A970E9B7B,0x3023EAA9601AC578,0xB7E3AA3A73FBADA6, + 0x0FB699311EAAE597,0x0000000000000000,0x310EF19D6204B4F4,0x229371A644DB6455, + 0x0DECAF591A960792,0x5CA4978BB8A62496,0x1C2B190A38753536,0x41A295B582CD602C, + 0x3279DCC16426277D,0xC1A194AA9F764271,0x139D803B26DFD0A1,0xAE51C4D441E83016, + 0xD813FA44AD65DFC1,0xAC0BF2BC45D4D213,0x23BE6A9246C515D9,0x49D74D08923DCF38, + 0x9D05032127D066E7,0x2F7FDEFF5E4D63C7,0xA47E2A0155247D07,0x99B16FF12FA8BFED, + 0x4661D4398C972AAF,0xDFD0BBC8A33F9542,0xDCA79694A51D06CB,0xB020EBB67DA1E725, + 0xBA0F0563696DAA34,0xE4F1A480D5F76CA7,0xC438E34E9510EAF7,0x939E81243B64F2FC, + 0x8DEFAE46072D25CF,0x2C08F3A3586FF04E,0xD7A56375B3CF3A56,0x20C947CE40E78650, + 0x43F8A3DD86F18229,0x568B795EAC6A6987,0x8003011F1DBB225D,0xF53612D3F7145E03, + 0x189F75DA300DEC3C,0x9570DB9C3720C9F3,0xBB221E576B73DBB8,0x72F65240E4F536DD, + 0x443BE25188ABC8AA,0xE21FFE38D9B357A8,0xFD43CA6EE7E4F117,0xCAA3614B89A47EEC, + 0xFE34E732E1C6629E,0x83742C431B99B1D4,0xCF3A16AF83C2D66A,0xAAE5A8044990E91C, + 0x26271D764CA3BD5F,0x91C4B74C3F5810F9,0x7C6DD045F841A2C6,0x7F1AFD19FE63314F, + 0xC8F957238D989CE9,0xA709075D5306EE8E,0x55FC5402AA48FA0E,0x48FA563C9023BEB4, + 0x65DFBEABCA523F76,0x6C877D22D8BCE1EE,0xCC4D3BF385E045E3,0xBEBB69B36115733E, + 0x10EAAD6720FD4328,0xB6CEB10E71E5DC2A,0xBDCC44EF6737E0B7,0x523F158EA412B08D, + 0x989C74C52DB6CE61,0x9BEB59992B945DE8,0x8A2CEFCA09776F4C,0xA3BD6B8D5B7E3784, + 0xEB473DB1CB5D8930,0xC3FBA2C29B4AA074,0x9C28181525CE176B,0x683311F2D0C438E4, + 0x5FD3BAD7BE84B71F,0xFC6ED15AE5FA809B,0x36CDB0116C5EFE77,0x29918447520958C8, + 0xA29070B959604608,0x53120EBAA60CC101,0x3A0C047C74D68869,0x691E0AC6D2DA4968, + 0x73DB4974E6EB4751,0x7A838AFDF40599C9,0x5A4ACD33B4E21F99,0x6046C94FC03497F0, + 0xE6AB92E8D1CB8EA2,0x3354C7F5663856F1,0xD93EE170AF7BAE4D,0x616BD27BC22AE67C, + 0x92B39A10397A8370,0xABC8B3304B8E9890,0xBF967287630B02B2,0x5B67D607B6FC6E15 + }, + { + 0xD031C397CE553FE6,0x16BA5B01B006B525,0xA89BADE6296E70C8,0x6A1F525D77D3435B, + 0x6E103570573DFA0B,0x660EFB2A17FC95AB,0x76327A9E97634BF6,0x4BAD9D6462458BF5, + 0xF1830CAEDBC3F748,0xC5C8F542669131FF,0x95044A1CDC48B0CB,0x892962DF3CF8B866, + 0xB0B9E208E930C135,0xA14FB3F0611A767C,0x8D2605F21C160136,0xD6B71922FECC549E, + 0x37089438A5907D8B,0x0B5DA38E5803D49C,0x5A5BCC9CEA6F3CBC,0xEDAE246D3B73FFE5, + 0xD2B87E0FDE22EDCE,0x5E54ABB1CA8185EC,0x1DE7F88FE80561B9,0xAD5E1A870135A08C, + 0x2F2ADBD665CECC76,0x5780B5A782F58358,0x3EDC8A2EEDE47B3F,0xC9D95C3506BEE70F, + 0x83BE111D6C4E05EE,0xA603B90959367410,0x103C81B4809FDE5D,0x2C69B6027D0C774A, + 0x399080D7D5C87953,0x09D41E16487406B4,0xCDD63B1826505E5F,0xF99DC2F49B0298E8, + 0x9CD0540A943CB67F,0xBCA84B7F891F17C5,0x723D1DB3B78DF2A6,0x78AA6E71E73B4F2E, + 0x1433E699A071670D,0x84F21BE454620782,0x98DF3327B4D20F2F,0xF049DCE2D3769E5C, + 0xDB6C60199656EB7A,0x648746B2078B4783,0x32CD23598DCBADCF,0x1EA4955BF0C7DA85, + 0xE9A143401B9D46B5,0xFD92A5D9BBEC21B8,0xC8138C790E0B8E1B,0x2EE00B9A6D7BA562, + 0xF85712B893B7F1FC,0xEB28FED80BEA949D,0x564A65EB8A40EA4C,0x6C9988E8474A2823, + 0x4535898B121D8F2D,0xABD8C03231ACCBF4,0xBA2E91CAB9867CBD,0x7960BE3DEF8E263A, + 0x0C11A977602FD6F0,0xCB50E1AD16C93527,0xEAE22E94035FFD89,0x2866D12F5DE2CE1A, + 0xFF1B1841AB9BF390,0x9F9339DE8CFE0D43,0x964727C8C48A0BF7,0x524502C6AAAE531C, + 0x9B9C5EF3AC10B413,0x4FA2FA4942AB32A5,0x3F165A62E551122B,0xC74148DA76E6E3D7, + 0x924840E5E464B2A7,0xD372AE43D69784DA,0x233B72A105E11A86,0xA48A04914941A638, + 0xB4B68525C9DE7865,0xDDEABAACA6CF8002,0x0A9773C250B6BD88,0xC284FFBB5EBD3393, + 0x8BA0DF472C8F6A4E,0x2AEF6CB74D951C32,0x427983722A318D41,0x73F7CDFFBF389BB2, + 0x074C0AF9382C026C,0x8A6A0F0B243A035A,0x6FDAE53C5F88931F,0xC68B98967E538AC3, + 0x44FF59C71AA8E639,0xE2FCE0CE439E9229,0xA20CDE2479D8CD40,0x19E89FA2C8EBD8E9, + 0xF446BBCFF398270C,0x43B3533E2284E455,0xD82F0DCD8E945046,0x51066F12B26CE820, + 0xE73957AF6BC5426D,0x081ECE5A40C16FA0,0x3B193D4FC5BFAB7B,0x7FE66488DF174D42, + 0x0E9814EF705804D8,0x8137AC857C39D7C6,0xB1733244E185A821,0x695C3F896F11F867, + 0xF6CF0657E3EFF524,0x1AABF276D02963D5,0x2DA3664E75B91E5E,0x0289BD981077D228, + 0x90C1FD7DF413608F,0x3C5537B6FD93A917,0xAA12107E3919A2E0,0x0686DAB530996B78, + 0xDAA6B0559EE3826E,0xC34E2FF756085A87,0x6D5358A44FFF4137,0xFC587595B35948AC, + 0x7CA5095CC7D5F67E,0xFB147F6C8B754AC0,0xBFEB26AB91DDACF9,0x6896EFC567A49173, + 0xCA9A31E11E7C5C33,0xBBE44186B13315A9,0x0DDB793B689ABFE4,0x70B4A02BA7FA208E, + 0xE47A3A7B7307F951,0x8CECD5BE14A36822,0xEEED49B923B144D9,0x17708B4DB8B3DC31, + 0x6088219F2765FED3,0xB3FA8FDCF1F27A09,0x910B2D31FCA6099B,0x0F52C4A378ED6DCC, + 0x50CCBF5EBAD98134,0x6BD582117F662A4F,0x94CE9A50D4FDD9DF,0x2B25BCFB45207526, + 0x67C42B661F49FCBF,0x492420FC723259DD,0x03436DD418C2BB3C,0x1F6E4517F872B391, + 0xA08563BC69AF1F68,0xD43EA4BAEEBB86B6,0x01CAD04C08B56914,0xAC94CACB0980C998, + 0x54C3D8739A373864,0x26FEC5C02DBACAC2,0xDEA9D778BE0D3B3E,0x040F672D20EEB950, + 0xE5B0EA377BB29045,0xF30AB136CBB42560,0x62019C0737122CFB,0xE86B930C13282FA1, + 0xCC1CEB542EE5374B,0x538FD28AA21B3A08,0x1B61223AD89C0AC1,0x36C24474AD25149F, + 0x7A23D3E9F74C9D06,0xBE21F6E79968C5ED,0xCF5F868036278C77,0xF705D61BEB5A9C30, + 0x4D2B47D152DCE08D,0x5F9E7BFDC234ECF8,0x247778583DCD18EA,0x867BA67C4415D5AA, + 0x4CE1979D5A698999,0x0000000000000000,0xEC64F42133C696F1,0xB57C5569C16B1171, + 0xC1C7926F467F88AF,0x654D96FE0F3E2E97,0x15F936D5A8C40E19,0xB8A72C52A9F1AE95, + 0xA9517DAA21DB19DC,0x58D27104FA18EE94,0x5918A148F2AD8780,0x5CDD1629DAF657C4, + 0x8274C15164FB6CFA,0xD1FB13DBC6E056F2,0x7D6FD910CF609F6A,0xB63F38BDD9A9AA4D, + 0x3D9FE7FAF526C003,0x74BBC706871499DE,0xDF630734B6B8522A,0x3AD3ED03CD0AC26F, + 0xFADEAF2083C023D4,0xC00D42234ECAE1BB,0x8538CBA85CD76E96,0xC402250E6E2458EB, + 0x47BC3413026A5D05,0xAFD7A71F114272A4,0x978DF784CC3F62E3,0xB96DFC1EA144C781, + 0x21B2CF391596C8AE,0x318E4E8D950916F3,0xCE9556CC3E92E563,0x385A509BDD7D1047, + 0x358129A0B5E7AFA3,0xE6F387E363702B79,0xE0755D5653E94001,0x7BE903A5FFF9F412, + 0x12B53C2C90E80C75,0x3307F315857EC4DB,0x8FAFB86A0C61D31E,0xD9E5DD8186213952, + 0x77F8AAD29FD622E2,0x25BDA814357871FE,0x7571174A8FA1F0CA,0x137FEC60985D6561, + 0x30449EC19DBC7FE7,0xA540D4DD41F4CF2C,0xDC206AE0AE7AE916,0x5B911CD0E2DA55A8, + 0xB2305F90F947131D,0x344BF9ECBD52C6B7,0x5D17C665D2433ED0,0x18224FEEC05EB1FD, + 0x9E59E992844B6457,0x9A568EBFA4A5DD07,0xA3C60E68716DA454,0x7E2CB4C4D7A22456, + 0x87B176304CA0BCBE,0x413AEEA632F3367D,0x9915E36BBC67663B,0x40F03EEA3A465F69, + 0x1C2D28C3E0B008AD,0x4E682A054A1E5BB1,0x05C5B761285BD044,0xE1BF8D1A5B5C2915, + 0xF2C0617AC3014C74,0xB7F5E8F1D11CC359,0x63CB4C4B3FA745EF,0x9D1A84469C89DF6B, + 0xE33630824B2BFB3D,0xD5F474F6E60EEFA2,0xF58C6B83FB2D4E18,0x4676E45F0ADF3411, + 0x20781F751D23A1BA,0xBD629B3381AA7ED1,0xAE1D775319F71BB0,0xFED1C80DA32E9A84, + 0x5509083F92825170,0x29AC01635557A70E,0xA7C9694551831D04,0x8E65682604D4BA0A, + 0x11F651F8882AB749,0xD77DC96EF6793D8A,0xEF2799F52B042DCD,0x48EEF0B07A8730C9, + 0x22F1A2ED0D547392,0x6142F1D32FD097C7,0x4A674D286AF0E2E1,0x80FD7CC9748CBED2, + 0x717E7067AF4F499A,0x938290A9ECD1DBB3,0x88E3B293344DD172,0x2734158C250FA3D6 + } +}; + +// Constant values for KeySchedule function +const unsigned char C[12][64] = { + { + 0xB1,0x08,0x5B,0xDA,0x1E,0xCA,0xDA,0xE9,0xEB,0xCB,0x2F,0x81,0xC0,0x65,0x7C,0x1F, + 0x2F,0x6A,0x76,0x43,0x2E,0x45,0xD0,0x16,0x71,0x4E,0xB8,0x8D,0x75,0x85,0xC4,0xFC, + 0x4B,0x7C,0xE0,0x91,0x92,0x67,0x69,0x01,0xA2,0x42,0x2A,0x08,0xA4,0x60,0xD3,0x15, + 0x05,0x76,0x74,0x36,0xCC,0x74,0x4D,0x23,0xDD,0x80,0x65,0x59,0xF2,0xA6,0x45,0x07 + }, + { + 0x6F,0xA3,0xB5,0x8A,0xA9,0x9D,0x2F,0x1A,0x4F,0xE3,0x9D,0x46,0x0F,0x70,0xB5,0xD7, + 0xF3,0xFE,0xEA,0x72,0x0A,0x23,0x2B,0x98,0x61,0xD5,0x5E,0x0F,0x16,0xB5,0x01,0x31, + 0x9A,0xB5,0x17,0x6B,0x12,0xD6,0x99,0x58,0x5C,0xB5,0x61,0xC2,0xDB,0x0A,0xA7,0xCA, + 0x55,0xDD,0xA2,0x1B,0xD7,0xCB,0xCD,0x56,0xE6,0x79,0x04,0x70,0x21,0xB1,0x9B,0xB7 + }, + { + 0xF5,0x74,0xDC,0xAC,0x2B,0xCE,0x2F,0xC7,0x0A,0x39,0xFC,0x28,0x6A,0x3D,0x84,0x35, + 0x06,0xF1,0x5E,0x5F,0x52,0x9C,0x1F,0x8B,0xF2,0xEA,0x75,0x14,0xB1,0x29,0x7B,0x7B, + 0xD3,0xE2,0x0F,0xE4,0x90,0x35,0x9E,0xB1,0xC1,0xC9,0x3A,0x37,0x60,0x62,0xDB,0x09, + 0xC2,0xB6,0xF4,0x43,0x86,0x7A,0xDB,0x31,0x99,0x1E,0x96,0xF5,0x0A,0xBA,0x0A,0xB2 + }, + { + 0xEF,0x1F,0xDF,0xB3,0xE8,0x15,0x66,0xD2,0xF9,0x48,0xE1,0xA0,0x5D,0x71,0xE4,0xDD, + 0x48,0x8E,0x85,0x7E,0x33,0x5C,0x3C,0x7D,0x9D,0x72,0x1C,0xAD,0x68,0x5E,0x35,0x3F, + 0xA9,0xD7,0x2C,0x82,0xED,0x03,0xD6,0x75,0xD8,0xB7,0x13,0x33,0x93,0x52,0x03,0xBE, + 0x34,0x53,0xEA,0xA1,0x93,0xE8,0x37,0xF1,0x22,0x0C,0xBE,0xBC,0x84,0xE3,0xD1,0x2E + }, + { + 0x4B,0xEA,0x6B,0xAC,0xAD,0x47,0x47,0x99,0x9A,0x3F,0x41,0x0C,0x6C,0xA9,0x23,0x63, + 0x7F,0x15,0x1C,0x1F,0x16,0x86,0x10,0x4A,0x35,0x9E,0x35,0xD7,0x80,0x0F,0xFF,0xBD, + 0xBF,0xCD,0x17,0x47,0x25,0x3A,0xF5,0xA3,0xDF,0xFF,0x00,0xB7,0x23,0x27,0x1A,0x16, + 0x7A,0x56,0xA2,0x7E,0xA9,0xEA,0x63,0xF5,0x60,0x17,0x58,0xFD,0x7C,0x6C,0xFE,0x57 + }, + { + 0xAE,0x4F,0xAE,0xAE,0x1D,0x3A,0xD3,0xD9,0x6F,0xA4,0xC3,0x3B,0x7A,0x30,0x39,0xC0, + 0x2D,0x66,0xC4,0xF9,0x51,0x42,0xA4,0x6C,0x18,0x7F,0x9A,0xB4,0x9A,0xF0,0x8E,0xC6, + 0xCF,0xFA,0xA6,0xB7,0x1C,0x9A,0xB7,0xB4,0x0A,0xF2,0x1F,0x66,0xC2,0xBE,0xC6,0xB6, + 0xBF,0x71,0xC5,0x72,0x36,0x90,0x4F,0x35,0xFA,0x68,0x40,0x7A,0x46,0x64,0x7D,0x6E + }, + { + 0xF4,0xC7,0x0E,0x16,0xEE,0xAA,0xC5,0xEC,0x51,0xAC,0x86,0xFE,0xBF,0x24,0x09,0x54, + 0x39,0x9E,0xC6,0xC7,0xE6,0xBF,0x87,0xC9,0xD3,0x47,0x3E,0x33,0x19,0x7A,0x93,0xC9, + 0x09,0x92,0xAB,0xC5,0x2D,0x82,0x2C,0x37,0x06,0x47,0x69,0x83,0x28,0x4A,0x05,0x04, + 0x35,0x17,0x45,0x4C,0xA2,0x3C,0x4A,0xF3,0x88,0x86,0x56,0x4D,0x3A,0x14,0xD4,0x93 + }, + { + 0x9B,0x1F,0x5B,0x42,0x4D,0x93,0xC9,0xA7,0x03,0xE7,0xAA,0x02,0x0C,0x6E,0x41,0x41, + 0x4E,0xB7,0xF8,0x71,0x9C,0x36,0xDE,0x1E,0x89,0xB4,0x44,0x3B,0x4D,0xDB,0xC4,0x9A, + 0xF4,0x89,0x2B,0xCB,0x92,0x9B,0x06,0x90,0x69,0xD1,0x8D,0x2B,0xD1,0xA5,0xC4,0x2F, + 0x36,0xAC,0xC2,0x35,0x59,0x51,0xA8,0xD9,0xA4,0x7F,0x0D,0xD4,0xBF,0x02,0xE7,0x1E + }, + { + 0x37,0x8F,0x5A,0x54,0x16,0x31,0x22,0x9B,0x94,0x4C,0x9A,0xD8,0xEC,0x16,0x5F,0xDE, + 0x3A,0x7D,0x3A,0x1B,0x25,0x89,0x42,0x24,0x3C,0xD9,0x55,0xB7,0xE0,0x0D,0x09,0x84, + 0x80,0x0A,0x44,0x0B,0xDB,0xB2,0xCE,0xB1,0x7B,0x2B,0x8A,0x9A,0xA6,0x07,0x9C,0x54, + 0x0E,0x38,0xDC,0x92,0xCB,0x1F,0x2A,0x60,0x72,0x61,0x44,0x51,0x83,0x23,0x5A,0xDB + }, + { + 0xAB,0xBE,0xDE,0xA6,0x80,0x05,0x6F,0x52,0x38,0x2A,0xE5,0x48,0xB2,0xE4,0xF3,0xF3, + 0x89,0x41,0xE7,0x1C,0xFF,0x8A,0x78,0xDB,0x1F,0xFF,0xE1,0x8A,0x1B,0x33,0x61,0x03, + 0x9F,0xE7,0x67,0x02,0xAF,0x69,0x33,0x4B,0x7A,0x1E,0x6C,0x30,0x3B,0x76,0x52,0xF4, + 0x36,0x98,0xFA,0xD1,0x15,0x3B,0xB6,0xC3,0x74,0xB4,0xC7,0xFB,0x98,0x45,0x9C,0xED + }, + { + 0x7B,0xCD,0x9E,0xD0,0xEF,0xC8,0x89,0xFB,0x30,0x02,0xC6,0xCD,0x63,0x5A,0xFE,0x94, + 0xD8,0xFA,0x6B,0xBB,0xEB,0xAB,0x07,0x61,0x20,0x01,0x80,0x21,0x14,0x84,0x66,0x79, + 0x8A,0x1D,0x71,0xEF,0xEA,0x48,0xB9,0xCA,0xEF,0xBA,0xCD,0x1D,0x7D,0x47,0x6E,0x98, + 0xDE,0xA2,0x59,0x4A,0xC0,0x6F,0xD8,0x5D,0x6B,0xCA,0xA4,0xCD,0x81,0xF3,0x2D,0x1B + }, + { + 0x37,0x8E,0xE7,0x67,0xF1,0x16,0x31,0xBA,0xD2,0x13,0x80,0xB0,0x04,0x49,0xB1,0x7A, + 0xCD,0xA4,0x3C,0x32,0xBC,0xDF,0x1D,0x77,0xF8,0x20,0x12,0xD4,0x30,0x21,0x9F,0x9B, + 0x5D,0x80,0xEF,0x9D,0x18,0x91,0xCC,0x86,0xE7,0x1D,0xA4,0xAA,0x88,0xE1,0x28,0x52, + 0xFA,0xF4,0x17,0xD5,0xD9,0xB2,0x1B,0x99,0x48,0xBC,0x92,0x4A,0xF1,0x1B,0xD7,0x20 + } +}; + + +void AddModulo512(const void *a,const void *b,void *c) +{ + const unsigned char *A=a, *B=b; + unsigned char *C=c; + int t = 0; +#ifdef FULL_UNROLL +#define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF; + + ADDBYTE_8(63) + ADDBYTE_8(62) + ADDBYTE_8(61) + ADDBYTE_8(60) + ADDBYTE_8(59) + ADDBYTE_8(58) + ADDBYTE_8(57) + ADDBYTE_8(56) + ADDBYTE_8(55) + ADDBYTE_8(54) + ADDBYTE_8(53) + ADDBYTE_8(52) + ADDBYTE_8(51) + ADDBYTE_8(50) + ADDBYTE_8(49) + ADDBYTE_8(48) + ADDBYTE_8(47) + ADDBYTE_8(46) + ADDBYTE_8(45) + ADDBYTE_8(44) + ADDBYTE_8(43) + ADDBYTE_8(42) + ADDBYTE_8(41) + ADDBYTE_8(40) + ADDBYTE_8(39) + ADDBYTE_8(38) + ADDBYTE_8(37) + ADDBYTE_8(36) + ADDBYTE_8(35) + ADDBYTE_8(34) + ADDBYTE_8(33) + ADDBYTE_8(32) + ADDBYTE_8(31) + ADDBYTE_8(30) + ADDBYTE_8(29) + ADDBYTE_8(28) + ADDBYTE_8(27) + ADDBYTE_8(26) + ADDBYTE_8(25) + ADDBYTE_8(24) + ADDBYTE_8(23) + ADDBYTE_8(22) + ADDBYTE_8(21) + ADDBYTE_8(20) + ADDBYTE_8(19) + ADDBYTE_8(18) + ADDBYTE_8(17) + ADDBYTE_8(16) + ADDBYTE_8(15) + ADDBYTE_8(14) + ADDBYTE_8(13) + ADDBYTE_8(12) + ADDBYTE_8(11) + ADDBYTE_8(10) + ADDBYTE_8(9) + ADDBYTE_8(8) + ADDBYTE_8(7) + ADDBYTE_8(6) + ADDBYTE_8(5) + ADDBYTE_8(4) + ADDBYTE_8(3) + ADDBYTE_8(2) + ADDBYTE_8(1) + ADDBYTE_8(0) + +#else + int i = 0; + + for(i=63;i>=0;i--) + { + t = A[i] + B[i] + (t >> 8); + C[i] = t & 0xFF; + } +#endif +} + +void AddXor512(const void *a,const void *b,void *c) +{ + const unsigned long long *A=a, *B=b; + unsigned long long *C=c; +#ifdef FULL_UNROLL + C[0] = A[0] ^ B[0]; + C[1] = A[1] ^ B[1]; + C[2] = A[2] ^ B[2]; + C[3] = A[3] ^ B[3]; + C[4] = A[4] ^ B[4]; + C[5] = A[5] ^ B[5]; + C[6] = A[6] ^ B[6]; + C[7] = A[7] ^ B[7]; +#else + int i = 0; + + for(i=0;i<8;i++) + { + C[i] = A[i] ^ B[i]; + } +#endif +} + +void F(unsigned char *state) +{ + unsigned long long return_state[8]; + register unsigned long long r = 0; + r ^= T[0][state[56]]; + r ^= T[1][state[48]]; + r ^= T[2][state[40]]; + r ^= T[3][state[32]]; + r ^= T[4][state[24]]; + r ^= T[5][state[16]]; + r ^= T[6][state[8]]; + r ^= T[7][state[0]]; + return_state[0] = r; + r = 0; + + r ^= T[0][state[57]]; + r ^= T[1][state[49]]; + r ^= T[2][state[41]]; + r ^= T[3][state[33]]; + r ^= T[4][state[25]]; + r ^= T[5][state[17]]; + r ^= T[6][state[9]]; + r ^= T[7][state[1]]; + return_state[1] = r; + r = 0; + + r ^= T[0][state[58]]; + r ^= T[1][state[50]]; + r ^= T[2][state[42]]; + r ^= T[3][state[34]]; + r ^= T[4][state[26]]; + r ^= T[5][state[18]]; + r ^= T[6][state[10]]; + r ^= T[7][state[2]]; + return_state[2] = r; + r = 0; + + r ^= T[0][state[59]]; + r ^= T[1][state[51]]; + r ^= T[2][state[43]]; + r ^= T[3][state[35]]; + r ^= T[4][state[27]]; + r ^= T[5][state[19]]; + r ^= T[6][state[11]]; + r ^= T[7][state[3]]; + return_state[3] = r; + r = 0; + + r ^= T[0][state[60]]; + r ^= T[1][state[52]]; + r ^= T[2][state[44]]; + r ^= T[3][state[36]]; + r ^= T[4][state[28]]; + r ^= T[5][state[20]]; + r ^= T[6][state[12]]; + r ^= T[7][state[4]]; + return_state[4] = r; + r = 0; + + r ^= T[0][state[61]]; + r ^= T[1][state[53]]; + r ^= T[2][state[45]]; + r ^= T[3][state[37]]; + r ^= T[4][state[29]]; + r ^= T[5][state[21]]; + r ^= T[6][state[13]]; + r ^= T[7][state[5]]; + return_state[5] = r; + r = 0; + + r ^= T[0][state[62]]; + r ^= T[1][state[54]]; + r ^= T[2][state[46]]; + r ^= T[3][state[38]]; + r ^= T[4][state[30]]; + r ^= T[5][state[22]]; + r ^= T[6][state[14]]; + r ^= T[7][state[6]]; + return_state[6] = r; + r = 0; + + r ^= T[0][state[63]]; + r ^= T[1][state[55]]; + r ^= T[2][state[47]]; + r ^= T[3][state[39]]; + r ^= T[4][state[31]]; + r ^= T[5][state[23]]; + r ^= T[6][state[15]]; + r ^= T[7][state[7]]; + return_state[7] = r; + + memcpy(state,(unsigned char*)return_state,64); +} + +#define KeySchedule(K,i) AddXor512(K,C[i],K); F(K); + +void E(unsigned char *K,const unsigned char *m, unsigned char *state) +{ +#ifdef FULL_UNROLL + AddXor512(m,K,state); + + F(state); + KeySchedule(K,0); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,1); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,2); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,3); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,4); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,5); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,6); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,7); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,8); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,9); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,10); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,11); + AddXor512(state,K,state); +#else + int i = 0; + + AddXor512(m,K,state); + + for(i=0;i<12;i++) + { + F(state); + KeySchedule(K,i); + AddXor512(state,K,state); + } +#endif +} + +static void g_N(const unsigned char *N,unsigned char *h,const unsigned char *m) +{ + unsigned char t[64], K[64]; + + AddXor512(N,h,K); + + F(K); + + E(K,m,t); + + AddXor512(t,h,t); + AddXor512(t,m,h); +} + +static void hash_X(unsigned char *IV,const unsigned char *message,unsigned long long length,unsigned char *out) +{ + unsigned char v512[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00 + }; + unsigned char v0[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char Sigma[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char N[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char m[64], *hash = IV; + unsigned long long len = length; + + // Stage 2 + while (len >= 512) + { + memcpy(m, message + len/8 - 63 - ( (len & 0x7) == 0 ), 64); + + g_N(N,hash,m); + AddModulo512(N,v512,N); + AddModulo512(Sigma,m,Sigma); + len -= 512; + } + + memset(m,0,64); + memcpy(m + 63 - len/8 + ( (len & 0x7) == 0 ), message, len/8 + 1 - ( (len & 0x7) == 0 )); + + // Stage 3 + m[ 63 - len/8 ] |= (1 << (len & 0x7)); + + g_N(N,hash,m); + v512[63] = len & 0xFF; + v512[62] = len >> 8; + AddModulo512(N,v512,N); + + AddModulo512(Sigma,m,Sigma); + + g_N(v0,hash,N); + g_N(v0,hash,Sigma); + + memcpy(out, hash, 64); +} + +static void hash_512(const unsigned char *message,unsigned long long length,unsigned char *out) +{ + unsigned char IV[64] = + { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + + hash_X(IV,message,length,out); +} + +static void hash_256(const unsigned char *message,unsigned long long length,unsigned char *out) +{ + unsigned char IV[64] = + { + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 + }; + unsigned char hash[64]; + + hash_X(IV,message,length,hash); + + memcpy(out,hash,32); +} + + + + + +/* see sph_gost.h */ +void +sph_gost256_init(void *cc) +{ + //gost_init(cc, 256); +} + +/* see sph_gost.h */ +void +sph_gost256(void *cc, const void *data, size_t len) +{ + hash_256(data, 8*len, cc); +} + +/* see sph_gost.h */ +void +sph_gost256_close(void *cc, void *dst) +{ + //sph_gost256_addbits_and_close(cc, 0, 0, dst); + memcpy(dst, cc, 32); +} + +/* see sph_gost.h */ +void +sph_gost256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + //gost_close32(cc, ub, n, dst); +} + +/* see sph_gost.h */ +void +sph_gost512_init(void *cc) +{ + //gost_init(cc, 512); +} + +/* see sph_gost.h */ +void +sph_gost512(void *cc, const void *data, size_t len) +{ + hash_512(data, 8*len, cc); +} + +/* see sph_gost.h */ +void +sph_gost512_close(void *cc, void *dst) +{ + //sph_gost512_addbits_and_close(cc, 0, 0, dst); + memcpy(dst, cc, 64); +} + +/* see sph_gost.h */ +void +sph_gost512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + //gost_close64(cc, ub, n, dst); +} + + +#ifdef __cplusplus +} +#endif diff --git a/sph/sph_gost.h b/sph/sph_gost.h new file mode 100644 index 000000000..c173b19b9 --- /dev/null +++ b/sph/sph_gost.h @@ -0,0 +1,185 @@ +/* $Id: sph_gost.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * GOST interface. This is the interface for GOST R 12 with the + * recommended parameters for SHA-3, with output lengths 256 + * and 512 bits. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_gost.h + * @author Thomas Pornin + */ + +#ifndef SPH_GOST_H__ +#define SPH_GOST_H__ + +#ifdef __cplusplus +extern "C"{ +#endif + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for GOST-256. + */ +#define SPH_SIZE_gost256 256 + +/** + * Output size (in bits) for GOST-512. + */ +#define SPH_SIZE_gost512 512 + +/** + * This structure is a context for Keccak computations: it contains the + * intermediate values and some data from the last entered block. Once a + * GOST computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running GOST computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ + +/** + * This structure is a context for Gost-256 computations. + */ + +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[32]; /* first field, for alignment */ + size_t ptr; + sph_u32 V[3][8]; +#endif +} sph_gost256_context; + +/** + * This structure is a context for Gost-512 computations. + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + size_t ptr; + sph_u32 V[5][8]; +#endif +} sph_gost512_context; + + +/** + * Initialize a GOST-256 context. This process performs no memory allocation. + * + * @param cc the GOST-256 context (pointer to a + * sph_gost256_context) + */ +void sph_gost256_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Gost-256 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_gost256(void *cc, const void *data, size_t len); + +/** + * Terminate the current GOST-256 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the GOST-256 context + * @param dst the destination buffer + */ +void sph_gost256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the GOST-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_gost256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Gost-512 context. This process performs no memory allocation. + * + * @param cc the GOST-512 context (pointer to a + * sph_gost512_context) + */ +void sph_gost512_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the GOST-512 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_gost512(void *cc, const void *data, size_t len); + +/** + * Terminate the current GOST-512 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the GOST-512 context + * @param dst the destination buffer + */ +void sph_gost512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the GOST-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_gost512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef __cplusplus +} +#endif + +#endif From d7ec25330061615853525a709cab4111da265866 Mon Sep 17 00:00:00 2001 From: ivan Date: Wed, 5 Oct 2016 05:41:46 +0200 Subject: [PATCH 56/63] remove duplicated function --- algorithm/sibcoin.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/algorithm/sibcoin.c b/algorithm/sibcoin.c index 5c3cbeee6..cf061c4c6 100644 --- a/algorithm/sibcoin.c +++ b/algorithm/sibcoin.c @@ -83,19 +83,6 @@ static void init_Xhash_contexts() sph_echo512_init(&base_contexts.echo1); } -/* - * Encode a length len/4 vector of (uint32_t) into a length len vector of - * (unsigned char) in big-endian form. Assumes len is a multiple of 4. - */ -static inline void -be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) -{ - uint32_t i; - - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); -} - static inline void xhash(void *state, const void *input) { From d72f53e97f03a7ffe935edc86d2532ff22da4c3c Mon Sep 17 00:00:00 2001 From: ivan Date: Mon, 7 Nov 2016 10:26:19 +0100 Subject: [PATCH 57/63] reduce buffer because I can't find any reason for such big one --- algorithm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithm.c b/algorithm.c index 6d3a2b946..4da3a8c8f 100644 --- a/algorithm.c +++ b/algorithm.c @@ -1169,7 +1169,7 @@ static algorithm_settings_t algos[] = { { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, NULL, NULL, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "sibcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 11, 8 * 16 * 4194304, 0, sibcoin_regenhash, NULL, NULL, queue_sibcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "sibcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 11, 1 * 4 * 4194304, 0, sibcoin_regenhash, NULL, NULL, queue_sibcoin_mod_kernel, gen_hash, append_x11_compiler_options }, { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x13_compiler_options }, { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, NULL, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, From f26cf3f546baa07437da538439e5dad838dd474d Mon Sep 17 00:00:00 2001 From: ivan Date: Mon, 7 Nov 2016 10:26:19 +0100 Subject: [PATCH 58/63] reduce buffer because I can't find any reason for such big one --- algorithm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithm.c b/algorithm.c index 6d3a2b946..b1ffa3277 100644 --- a/algorithm.c +++ b/algorithm.c @@ -1169,7 +1169,7 @@ static algorithm_settings_t algos[] = { { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, NULL, NULL, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options }, - { "sibcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 11, 8 * 16 * 4194304, 0, sibcoin_regenhash, NULL, NULL, queue_sibcoin_mod_kernel, gen_hash, append_x11_compiler_options }, + { "sibcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 11, 2 * 16 * 4194304, 0, sibcoin_regenhash, NULL, NULL, queue_sibcoin_mod_kernel, gen_hash, append_x11_compiler_options }, { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x13_compiler_options }, { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, NULL, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options }, From fb181f5e8803c7e5c7894814daf4e5cc04f46508 Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 1 Feb 2017 03:02:39 +0100 Subject: [PATCH 59/63] Add Pascal algo for PascalCoin --- Makefile.am | 1 + algorithm.c | 23 +++++ algorithm.h | 1 + algorithm/pascal.c | 117 +++++++++++++++++++++++ algorithm/pascal.h | 11 +++ kernel/pascal.cl | 229 +++++++++++++++++++++++++++++++++++++++++++++ miner.h | 18 ++++ ocl.c | 1 + sgminer.c | 26 ++++- 9 files changed, 425 insertions(+), 2 deletions(-) create mode 100644 algorithm/pascal.c create mode 100644 algorithm/pascal.h create mode 100644 kernel/pascal.cl diff --git a/Makefile.am b/Makefile.am index da0ba7859..3069bbe64 100644 --- a/Makefile.am +++ b/Makefile.am @@ -82,6 +82,7 @@ sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_ sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h sgminer_SOURCES += algorithm/decred.c algorithm/decred.h +sgminer_SOURCES += algorithm/pascal.c algorithm/pascal.h sgminer_SOURCES += algorithm/lbry.c algorithm/lbry.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index b1ffa3277..36f6e456b 100644 --- a/algorithm.c +++ b/algorithm.c @@ -41,6 +41,7 @@ #include "algorithm/blakecoin.h" #include "algorithm/sia.h" #include "algorithm/decred.h" +#include "algorithm/pascal.h" #include "algorithm/lbry.h" #include "algorithm/sibcoin.h" @@ -54,6 +55,7 @@ const char *algorithm_type_str[] = { "Credits", "Scrypt", "NScrypt", + "Pascal", "X11", "X13", "X14", @@ -204,6 +206,25 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx return status; } +static cl_int queue_pascal_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip196(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 196, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + CL_SET_ARG(blk->work->midstate); + + return status; +} + static cl_int queue_neoscrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; @@ -1206,6 +1227,8 @@ static algorithm_settings_t algos[] = { { "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL }, + { "pascal", ALGO_PASCAL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pascal_regenhash, pascal_midstate, NULL, queue_pascal_kernel, NULL, NULL }, + // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; diff --git a/algorithm.h b/algorithm.h index 2830e674d..d3238a139 100644 --- a/algorithm.h +++ b/algorithm.h @@ -16,6 +16,7 @@ typedef enum { ALGO_CRE, ALGO_SCRYPT, ALGO_NSCRYPT, + ALGO_PASCAL, ALGO_X11, ALGO_X13, ALGO_X14, diff --git a/algorithm/pascal.c b/algorithm/pascal.c new file mode 100644 index 000000000..bd6325376 --- /dev/null +++ b/algorithm/pascal.c @@ -0,0 +1,117 @@ +/* + * BLAKE implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + * + * Modified for more speed by BlueDragon747 for the Blakecoin project + */ + +#include +#include +#include +#include + +#include "sph/sph_sha2.h" +#include "algorithm/pascal.h" + +static const uint32_t diff1targ_pascal = 0x000000ff; + +void pascalhash(void *state, const void *input) +{ + sph_sha256_context ctx_sha; + uint32_t hash[16]; + + sph_sha256_init(&ctx_sha); + sph_sha256(&ctx_sha, input, 200); + sph_sha256_close(&ctx_sha, hash); + + sph_sha256_init(&ctx_sha); + sph_sha256(&ctx_sha, hash, 32); + sph_sha256_close(&ctx_sha, hash); + + memcpy(state, hash, 32); + +} + +void pascal_midstate(struct work *work) +{ + sph_sha256_context ctx_sha; + uint32_t data[48]; + + memcpy(data, work->data, 192); + + sph_sha256_init(&ctx_sha); + sph_sha256 (&ctx_sha, (unsigned char *)data, 192); + + memcpy(work->midstate, ctx_sha.val, 32); + endian_flip32(work->midstate, work->midstate); + +/* + char *strdata, *strmidstate; + strdata = bin2hex(work->data, 192); + strmidstate = bin2hex(work->midstate, 32); + applog(LOG_DEBUG, "data %s midstate %s", strdata, strmidstate); +*/ +} + +static const uint32_t diff1targ = 0x0000ffff; + +/* Used externally as confirmation of correct OCL code */ +int pascal_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[50], ohash[8]; + + memcpy(data, pdata, 200); + data[49] = htobe32(nonce); + pascalhash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void pascal_regenhash(struct work *work) +{ + uint32_t data[64]; + uint32_t hash[16]; + uint32_t *nonce = (uint32_t *)(work->data + 196); + uint32_t *ohash = (uint32_t *)(work->hash); + + memcpy(data, work->data, 200); + data[49] = htole32(*nonce); + pascalhash(hash, data); + swab256(ohash, hash); +} diff --git a/algorithm/pascal.h b/algorithm/pascal.h new file mode 100644 index 000000000..b213ac566 --- /dev/null +++ b/algorithm/pascal.h @@ -0,0 +1,11 @@ +#ifndef PASCAL_H +#define PASCAL_H + +#include "miner.h" + +extern int pascal_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void pascal_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata); +extern void pascal_midstate(struct work *work); +extern void pascal_regenhash(struct work *work); + +#endif /* PASCAL_H */ \ No newline at end of file diff --git a/kernel/pascal.cl b/kernel/pascal.cl new file mode 100644 index 000000000..ca4a618ea --- /dev/null +++ b/kernel/pascal.cl @@ -0,0 +1,229 @@ +/* +* "pascal" kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* +* Copyright (c) 2015 djm34 +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ +#if !defined(cl_khr_byte_addressable_store) +#error "Device does not support unaligned stores" +#endif + + +#define ROL32(x, n) rotate(x, (uint) n) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) +#define SWAP64(x) as_ulong(as_uchar8(x).s32107654) /// hmm... + +#define SHR(x, n) ((x) >> n) + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} + +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) + +/// generic sha transform +inline uint8 sha256_Transform(uint16 data, uint8 state) +{ + uint temp1; + uint8 res = state; + uint W0 = data.s0; + uint W1 = data.s1; + uint W2 = data.s2; + uint W3 = data.s3; + uint W4 = data.s4; + uint W5 = data.s5; + uint W6 = data.s6; + uint W7 = data.s7; + uint W8 = data.s8; + uint W9 = data.s9; + uint W10 = data.sA; + uint W11 = data.sB; + uint W12 = data.sC; + uint W13 = data.sD; + uint W14 = data.sE; + uint W15 = data.sF; + +#define v0 res.s0 +#define v1 res.s1 +#define v2 res.s2 +#define v3 res.s3 +#define v4 res.s4 +#define v5 res.s5 +#define v6 res.s6 +#define v7 res.s7 + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); +#undef v0 +#undef v1 +#undef v2 +#undef v3 +#undef v4 +#undef v5 +#undef v6 +#undef v7 + return (res + state); +} + + + +static __constant uint8 H256 = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, + 0xA54FF53A, 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19 +}; + + +static __constant uint16 pad_data = +{ + 0x00000000, 0x00000000, 0x80000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000640 +}; + +static __constant uint8 pad_state = +{ + 0x80000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000100 +}; + + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uchar* restrict input, __global uint* restrict output,const ulong target, uint8 midstate ) +{ + uint nonce = get_global_id(0); + uint16 in; + uint8 state1; + in = pad_data; + in.s0 = ((__global const uint *)input)[48]; + in.s1 = nonce; + state1 = sha256_Transform(in, midstate); + in.lo = state1; + in.hi = pad_state; + state1 = sha256_Transform(in, H256); + + if (as_ulong(state1.s10) <= target) { + output[atomic_inc(output + 0xFF)] = SWAP32(nonce); + } +} + diff --git a/miner.h b/miner.h index af9cdf90f..0b0bceee8 100644 --- a/miner.h +++ b/miner.h @@ -756,6 +756,16 @@ static inline void flip180(void *dest_p, const void *src_p) dest[i] = swab32(src[i]); } +static inline void flip196(void *dest_p, const void *src_p) +{ + uint32_t *dest = (uint32_t *)dest_p; + const uint32_t *src = (uint32_t *)src_p; + int i; + + for (i = 0; i < 49; i++) + dest[i] = swab32(src[i]); +} + /* * Encode a length len/4 vector of (uint32_t) into a length len vector of * (unsigned char) in big-endian form. Assumes len is a multiple of 4. @@ -788,6 +798,10 @@ static inline void endian_flip180(void *dest_p, const void *src_p) { flip180(dest_p, src_p); } +static inline void endian_flip196(void *dest_p, const void *src_p) +{ + flip196(dest_p, src_p); +} #else static inline void @@ -807,6 +821,10 @@ static inline void endian_flip180(void __maybe_unused *dest_p, const void __maybe_unused *src_p) { } +static inline void +endian_flip196(void __maybe_unused *dest_p, const void __maybe_unused *src_p) +{ +} #endif diff --git a/ocl.c b/ocl.c index 1dd74ee46..44920d2ea 100644 --- a/ocl.c +++ b/ocl.c @@ -762,6 +762,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg if (algorithm->type == ALGO_CRE) readbufsize = 168; else if (algorithm->type == ALGO_DECRED) readbufsize = 192; else if (algorithm->type == ALGO_LBRY) readbufsize = 112; + else if (algorithm->type == ALGO_PASCAL) readbufsize = 196; if (algorithm->rw_buffer_size < 0) { // calc buffer size for neoscrypt diff --git a/sgminer.c b/sgminer.c index 3f8446f4e..79503abe9 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5633,6 +5633,9 @@ static void *stratum_sthread(void *userdata) else if (pool->algorithm.type == ALGO_SIA) { nonce = *((uint32_t *)(work->data + 32)); } + else if (pool->algorithm.type == ALGO_PASCAL) { + nonce = htobe32(*((uint32_t *)(work->data + 196))); + } else { nonce = *((uint32_t *)(work->data + 76)); } @@ -6118,6 +6121,15 @@ static void gen_stratum_work(struct pool *pool, struct work *work) cg_wlock(&pool->data_lock); + if (pool->algorithm.type == ALGO_PASCAL) { +/* TODO: refactor this */ + for (i = 0; i < 56; i += 8) { + if (((pool->nonce2 >> i) & 0xff) < 0x2d) pool->nonce2 = (pool->nonce2 & (0xffffffffffffff00 << i)) + (0x002d2d2d2d2d2d2d >> (48 - i)); + if (((pool->nonce2 >> i) & 0xff) > 0xfe) pool->nonce2 = (pool->nonce2 & (0xffffffffffffff00 << i)) + (0x012d2d2d2d2d2d2d >> (48 - i)); + } + if (((pool->nonce2 >> 56) & 0xff) < 0x2d) pool->nonce2 = 0x2d2d2d2d2d2d2d2d; + if (((pool->nonce2 >> 56) & 0xff) > 0xfe) pool->nonce2 = 0x2d2d2d2d2d2d2d2d; + } nonce2le = htole64(pool->nonce2); if (pool->algorithm.type != ALGO_DECRED && pool->algorithm.type != ALGO_SIA) { /* Update coinbase. Always use an LE encoded nonce2 to fill in values @@ -6130,7 +6142,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) /* Downgrade to a read lock to read off the pool variables */ cg_dwlock(&pool->data_lock); - if (pool->algorithm.type != ALGO_DECRED && pool->algorithm.type != ALGO_SIA) { + if (pool->algorithm.type != ALGO_DECRED && pool->algorithm.type != ALGO_SIA && pool->algorithm.type != ALGO_PASCAL) { /* Generate merkle root */ pool->algorithm.gen_hash(pool->coinbase, pool->swork.cb_len, merkle_root); memcpy(merkle_sha, merkle_root, 32); @@ -6192,6 +6204,14 @@ static void gen_stratum_work(struct pool *pool, struct work *work) memcpy(work->data + 32 + 8, pool->header_bin + 68, 4); // timestamp flip32(work->data + 32 + 8 + 8, pool->coinbase); // merkleroot } + else if (pool->algorithm.type == ALGO_PASCAL) { + uint32_t temp; + memcpy(work->data, pool->coinbase, pool->swork.cb_len); + hex2bin((unsigned char *)&temp, pool->swork.ntime, 4); + /* Add the nbits (big endianess). */ + ((uint32_t *)work->data)[48] = be32toh(temp); + ((uint32_t *)work->data)[49] = 0; + } else { data32 = (uint32_t *)merkle_sha; swap32 = (uint32_t *)merkle_root; @@ -6216,9 +6236,10 @@ static void gen_stratum_work(struct pool *pool, struct work *work) char *header, *merkle_hash; int datasize = 128; if (pool->algorithm.type == ALGO_DECRED) datasize = 180; + else if (pool->algorithm.type == ALGO_PASCAL) datasize = 256; header = bin2hex(work->data, datasize); - if (pool->algorithm.type != ALGO_DECRED) { + if (pool->algorithm.type != ALGO_DECRED && pool->algorithm.type != ALGO_SIA && pool->algorithm.type != ALGO_PASCAL) { merkle_hash = bin2hex((const unsigned char *)merkle_root, 32); applog(LOG_DEBUG, "[THR%d] Generated stratum merkle %s", work->thr_id, merkle_hash); free(merkle_hash); @@ -7165,6 +7186,7 @@ static void rebuild_nonce(struct work *work, uint32_t nonce) else if (work->pool->algorithm.type == ALGO_DECRED) nonce_pos = 140; else if (work->pool->algorithm.type == ALGO_LBRY) nonce_pos = 108; else if (work->pool->algorithm.type == ALGO_SIA) nonce_pos = 32; + else if (work->pool->algorithm.type == ALGO_PASCAL) nonce_pos = 196; uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos); From 13e0210bfc95f31432bbe095f27b66a3b59ca2dc Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 1 Feb 2017 03:03:29 +0100 Subject: [PATCH 60/63] Add missing sibcoin algo string --- algorithm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/algorithm.c b/algorithm.c index 36f6e456b..9d2f458e8 100644 --- a/algorithm.c +++ b/algorithm.c @@ -79,7 +79,8 @@ const char *algorithm_type_str[] = { "Sia", "Decred", "Vanilla", - "Lbry" + "Lbry", + "Sibcoin" }; void sha256(const unsigned char *message, unsigned int len, unsigned char *digest) From c72853fa408dcf30bda01371cd577bd6e10e49de Mon Sep 17 00:00:00 2001 From: elbandi Date: Wed, 1 Feb 2017 03:04:30 +0100 Subject: [PATCH 61/63] Version bump to 5.6.0 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index c1c67d1b7..e4cc47d19 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [5]) -m4_define([v_min], [5]) +m4_define([v_min], [6]) m4_define([v_mic], [0]) m4_define([v_rev], [nicehash]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## From 47938c38b29798c8683fc7496aa3c8eb9018c50e Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 23 Apr 2017 16:03:58 +0000 Subject: [PATCH 62/63] Handle decred new stake version field --- sgminer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sgminer.c b/sgminer.c index 79503abe9..8a79d8bf7 100644 --- a/sgminer.c +++ b/sgminer.c @@ -6195,6 +6195,8 @@ static void gen_stratum_work(struct pool *pool, struct work *work) ((uint32_t *)work->data)[i] = 0; memcpy(work->data + 144, pool->nonce1bin, nonce2_offset); memcpy(work->data + 144 + nonce2_offset, &nonce2le, pool->n2size); + size_t extranonce_len = MAX((int)pool->swork.cb_len - pool->nonce2_offset - pool->n2size, 0); + memcpy(work->data + 180 - extranonce_len, pool->coinbase + pool->nonce2_offset + pool->n2size, extranonce_len); } else if (pool->algorithm.type == ALGO_SIA) { size_t nonce2_offset = MIN(pool->n1_len, 4); From 002021095feb64065c902863cf717af7f94854bd Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 23 Apr 2017 23:36:51 +0200 Subject: [PATCH 63/63] Version bump to 5.6.1 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index e4cc47d19..12dbfc151 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [5]) m4_define([v_min], [6]) -m4_define([v_mic], [0]) +m4_define([v_mic], [1]) m4_define([v_rev], [nicehash]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_ifdef([v_rev], [m4_define([v_ver], [v_maj.v_min.v_mic-v_rev])], [m4_define([v_ver], [v_maj.v_min.v_mic])])