From 82d28ffa25af2eddc927cde89202b76f1f7cbf8e Mon Sep 17 00:00:00 2001 From: Biryuzovye Kleshni Date: Sat, 23 Jun 2018 08:40:09 +0000 Subject: [PATCH] New POW calculation module --- src/workprover/Readme.md | 42 +++++ src/workprover/__init__.py | 238 ++++++++++++++++++++++++++ src/workprover/dumbsolver.py | 70 ++++++++ src/workprover/fastsolver.py | 71 ++++++++ src/workprover/fastsolver/common.c | 100 +++++++++++ src/workprover/fastsolver/common.h | 13 ++ src/workprover/fastsolver/main.map | 4 + src/workprover/fastsolver/makefile | 11 ++ src/workprover/fastsolver/options.txt | 1 + src/workprover/fastsolver/pthread.c | 214 +++++++++++++++++++++++ src/workprover/fastsolver/winapi.c | 160 +++++++++++++++++ src/workprover/forkingsolver.py | 106 ++++++++++++ src/workprover/gpusolver.cl | 136 +++++++++++++++ src/workprover/gpusolver.py | 103 +++++++++++ src/workprover/test.py | 205 ++++++++++++++++++++++ src/workprover/utils.py | 51 ++++++ 16 files changed, 1525 insertions(+) create mode 100644 src/workprover/Readme.md create mode 100644 src/workprover/__init__.py create mode 100644 src/workprover/dumbsolver.py create mode 100644 src/workprover/fastsolver.py create mode 100644 src/workprover/fastsolver/common.c create mode 100644 src/workprover/fastsolver/common.h create mode 100644 src/workprover/fastsolver/main.map create mode 100644 src/workprover/fastsolver/makefile create mode 100644 src/workprover/fastsolver/options.txt create mode 100644 src/workprover/fastsolver/pthread.c create mode 100644 src/workprover/fastsolver/winapi.c create mode 100644 src/workprover/forkingsolver.py create mode 100644 src/workprover/gpusolver.cl create mode 100644 src/workprover/gpusolver.py create mode 100755 src/workprover/test.py create mode 100644 src/workprover/utils.py diff --git a/src/workprover/Readme.md b/src/workprover/Readme.md new file mode 100644 index 00000000..105346be --- /dev/null +++ b/src/workprover/Readme.md @@ -0,0 +1,42 @@ +Please keep this module independent from the outside code, so that it can be reused in other applications. + +If you are going to use it, you should wrap your program's main file in this: + +```python +import workprover.dumbsolver + +workprover.dumbsolver.libcrypto = ... + +if __name__ == "__main__": + import multiprocessing + + multiprocessing.freeze_support() + + ... +``` + +See the `multiprocessing` module documentation for explaination. + +Build fast solver +----------------- + +On Linux, BSDs or MacOS: `make -C fastsolver`. + +On Windows: + +- Install OpenSSL. Build it yourself or install [third-party](https://wiki.openssl.org/index.php/Binaries) prebuilt binaries. + +- Install MSVC as part of Visual Studio or standalone. Official offline installer: https://aka.ms/vcpython27. + +- Open its command line and go to the `fastsolver` directory. + +- Add OpenSSL paths to environment variables: + +```bat +set INCLUDE=C:\OpenSSL-Win64\include;%INCLUDE% +set LIB=C:\OpenSSL-Win64\lib;%LIB% +``` + +- Do `cl @options.txt`. + +- Append the `-32` or `-64` suffix to the DLL file name. diff --git a/src/workprover/__init__.py b/src/workprover/__init__.py new file mode 100644 index 00000000..d7cd34ac --- /dev/null +++ b/src/workprover/__init__.py @@ -0,0 +1,238 @@ +import sys +import multiprocessing +import time +import struct +import threading +import Queue +import collections + +import utils +import dumbsolver +import forkingsolver +import fastsolver +import gpusolver + +timeout = .5 + +class Stop(Exception): + pass + +class Task(object): + previous = None + next = None + + def __init__(self, headlessPayload, TTL, expiryTime, target): + self.headlessPayload = headlessPayload + self.TTL = TTL + self.expiryTime = expiryTime + self.target = target + +class WorkProver(threading.Thread): + def __init__(self, codePath, GPUVendors, seed, statusUpdated): + super(self.__class__, self).__init__() + + self.availableSolvers = { + "dumb": dumbsolver.DumbSolver(codePath) + } + + # Comment from the previous version: + + # on my (Peter Surda) Windows 10, Windows Defender + # does not like this and fights with PyBitmessage + # over CPU, resulting in very slow PoW + # added on 2015-11-29: multiprocesing.freeze_support() doesn't help + + if not hasattr(sys, "frozen") or sys.frozen == "macosx_app": + self.availableSolvers["forking"] = forkingsolver.ForkingSolver(codePath) + + try: + self.availableSolvers["fast"] = fastsolver.FastSolver(codePath) + except: + pass + + try: + self.availableSolvers["gpu"] = gpusolver.GPUSolver(codePath, GPUVendors) + except: + pass + + try: + self.defaultParallelism = multiprocessing.cpu_count() + except NotImplementedError: + self.defaultParallelism = 1 + + self.seed = seed + self.roundsCounter = 0 + self.statusUpdated = statusUpdated + + self.commandsQueue = Queue.Queue() + self.resultsQueue = Queue.Queue() + + self.solverName = None + self.solver = None + + self.lastTime = utils.getTimePoint() + self.timedIntervals = collections.deque() + self.speed = 0 + + self.tasks = {} + self.currentTaskID = None + + def notifyStatus(self): + if self.statusUpdated is None: + return + + if self.solver is None: + parallelism = 0 + else: + parallelism = self.solver.parallelism + + self.statusUpdated((self.solverName, parallelism, self.speed)) + + def setSolver(self, name, parallelism): + if name is None and self.solverName is None: + pass + elif name == self.solverName: + if self.solver.parallelism != parallelism: + self.solver.setParallelism(parallelism) + else: + if self.solver is not None: + self.solver.setParallelism(0) + self.solverName = None + self.solver = None + + if name is not None: + if name not in self.availableSolvers: + name, parallelism = "dumb", 1 + + self.solverName = name + self.solver = self.availableSolvers[name] + self.solver.setParallelism(parallelism) + + self.notifyStatus() + + def updateSpeed(self, iterationsCount): + currentTime = utils.getTimePoint() + duration = currentTime - self.lastTime + self.lastTime = currentTime + + self.timedIntervals.append((currentTime, iterationsCount, duration)) + + for i in xrange(len(self.timedIntervals)): + time, iterationsCount, duration = self.timedIntervals[0] + + if time + duration < currentTime - 3: + self.timedIntervals.popleft() + + totalDuration = 0 + totalIterationsCount = 0 + + for time, iterationsCount, duration in self.timedIntervals: + totalIterationsCount += iterationsCount + totalDuration += duration + + if totalDuration < .25: + self.speed = 0 + else: + self.speed = totalIterationsCount / totalDuration + + self.notifyStatus() + + def addTask(self, ID, headlessPayload, TTL, expiryTime, byteDifficulty, lengthExtension): + target = utils.calculateTarget(8 + 8 + len(headlessPayload), TTL, byteDifficulty, lengthExtension) + + task = Task(headlessPayload, TTL, expiryTime, target) + + self.tasks[ID] = task + + if self.currentTaskID is None: + task.previous = ID + task.next = ID + + self.currentTaskID = ID + else: + task.previous = self.currentTaskID + task.next = self.tasks[self.currentTaskID].next + + self.tasks[task.previous].next = ID + self.tasks[task.next].previous = ID + + def cancelTask(self, ID): + if ID not in self.tasks: + return + + task = self.tasks.pop(ID) + + if len(self.tasks) == 0: + self.currentTaskID = None + else: + self.tasks[task.previous].next = task.next + self.tasks[task.next].previous = task.previous + + if self.currentTaskID == ID: + self.currentTaskID = task.next + + def nextTask(self): + self.currentTaskID = self.tasks[self.currentTaskID].next + + def shutdown(self): + self.setSolver(None, 0) + + for i in self.tasks.keys(): + self.cancelTask(i) + + raise Stop() + + def processCommand(self, command, *arguments): + getattr(self, command)(*arguments) + + def round(self): + while True: + try: + self.processCommand(*self.commandsQueue.get_nowait()) + except Queue.Empty: + break + + while self.solver is None or self.currentTaskID is None: + try: + self.processCommand(*self.commandsQueue.get(True, timeout)) + except Queue.Empty: + self.updateSpeed(0) + + task = self.tasks[self.currentTaskID] + + if task.expiryTime is None: + expiryTime = int(time.time() + task.TTL) + else: + expiryTime = task.expiryTime + + initialPayload = struct.pack(">Q", expiryTime) + task.headlessPayload + initialHash = utils.calculateInitialHash(initialPayload) + + appendedSeed = self.seed + struct.pack(">Q", self.roundsCounter) + self.roundsCounter += 1 + + try: + nonce, iterationsCount = self.solver.search(initialHash, task.target, appendedSeed, timeout) + except gpusolver.GPUSolverError: + self.setSolver("dumb", 1) + self.availableSolvers.pop("gpu") + + nonce, iterationsCount = None, 0 + + self.updateSpeed(iterationsCount) + + if nonce is None: + self.nextTask() + else: + self.resultsQueue.put(("taskDone", self.currentTaskID, nonce, expiryTime)) + + self.cancelTask(self.currentTaskID) + + def run(self): + try: + while True: + self.round() + except Stop: + return + except Exception as exception: + self.resultsQueue.put(exception) diff --git a/src/workprover/dumbsolver.py b/src/workprover/dumbsolver.py new file mode 100644 index 00000000..f3c974cb --- /dev/null +++ b/src/workprover/dumbsolver.py @@ -0,0 +1,70 @@ +import ctypes +import struct +import hashlib + +import utils + +libcrypto = None + +class DumbSolver(object): + def __init__(self, codePath): + libcrypto.SHA512.restype = ctypes.c_void_p + + self.prefixes = [chr(i) for i in xrange(256)] + + if ctypes.c_size_t is ctypes.c_uint: + self.proofLength = 8 + 64 + self.hashLength = 64 + else: + # Using the wrapper instead of a clear number slows the work down, but otherwise seems to be unsafe + + self.proofLength = ctypes.c_size_t(8 + 64) + self.hashLength = ctypes.c_size_t(64) + + self.firstHash = ctypes.create_string_buffer(64) + self.secondHash = ctypes.create_string_buffer(64) + + self.parallelism = 1 + + def search(self, initialHash, target, seed, timeout): + startTime = utils.getTimePoint() + + sha512 = libcrypto.SHA512 + + prefixes = self.prefixes + proofLength = self.proofLength + hashLength = self.hashLength + firstHash = self.firstHash + secondHash = self.secondHash + + encodedTarget = struct.pack(">Q", target) + + solutions = [] + i = 0 + + while True: + randomness = hashlib.sha512(seed + struct.pack(">Q", i)).digest() + i += 1 + + suffix = randomness[: 7] + initialHash + + for j in prefixes: + proof = j + suffix + + sha512(j + suffix, proofLength, firstHash) + sha512(firstHash, hashLength, secondHash) + + if secondHash[: 8] <= encodedTarget: + solutions.append(proof[: 8]) + + if len(solutions) != 0: + index, = struct.unpack(">Q", randomness[7: 15]) + nonce = solutions[index % len(solutions)] + + return nonce, 256 * i + + if utils.getTimePoint() - startTime >= timeout: + return None, 256 * i + + def setParallelism(self, parallelism): + pass diff --git a/src/workprover/fastsolver.py b/src/workprover/fastsolver.py new file mode 100644 index 00000000..b443aa0c --- /dev/null +++ b/src/workprover/fastsolver.py @@ -0,0 +1,71 @@ +import sys +import os.path +import platform +import subprocess +import ctypes + +def loadFastSolver(codePath): + if hasattr(sys, "winver"): + suffix = "-32" + + if platform.architecture()[0] == "64bit": + suffix = "-64" + + path = os.path.join(codePath, "fastsolver/libfastsolver{}.dll".format(suffix)) + + return ctypes.WinDLL(path) + + makePath = os.path.join(codePath, "fastsolver") + path = os.path.join(codePath, "fastsolver/libfastsolver.so") + + try: + return ctypes.CDLL(path) + except: + if not hasattr(sys, "frozen"): + subprocess.call(["make", "-C", makePath]) + + return ctypes.CDLL(path) + else: + raise Exception() + +class FastSolver(object): + def __init__(self, codePath): + self.libfastsolver = loadFastSolver(codePath) + + self.libfastsolver.fastsolver_add.restype = ctypes.c_size_t + self.libfastsolver.fastsolver_add.argtypes = [] + + self.libfastsolver.fastsolver_remove.restype = ctypes.c_size_t + self.libfastsolver.fastsolver_remove.argtypes = [ctypes.c_size_t] + + self.libfastsolver.fastsolver_search.restype = ctypes.c_int + + self.libfastsolver.fastsolver_search.argtypes = [ + ctypes.c_void_p, ctypes.c_void_p, + ctypes.c_void_p, ctypes.c_ulonglong, ctypes.c_void_p, ctypes.c_ulonglong + ] + + self.nonce = ctypes.create_string_buffer(8) + self.iterationsCount = ctypes.c_ulonglong() + + self.parallelism = 0 + + def search(self, initialHash, target, seed, timeout): + found = self.libfastsolver.fastsolver_search( + self.nonce, ctypes.byref(self.iterationsCount), + initialHash, target, seed, long(1000000000 * timeout) + ) + + if found == 1: + return self.nonce.raw, self.iterationsCount.value + else: + return None, self.iterationsCount.value + + def setParallelism(self, parallelism): + parallelism = min(4096, parallelism) + + for i in xrange(self.parallelism, parallelism): + self.parallelism = self.libfastsolver.fastsolver_add() + + if parallelism < self.parallelism: + self.parallelism = self.libfastsolver.fastsolver_remove(self.parallelism - parallelism) diff --git a/src/workprover/fastsolver/common.c b/src/workprover/fastsolver/common.c new file mode 100644 index 00000000..91477154 --- /dev/null +++ b/src/workprover/fastsolver/common.c @@ -0,0 +1,100 @@ +#include + +#include + +#include "common.h" + +volatile int run; + +const char *initial_hash; +unsigned long long target; +const char *seed; + +static void encode_big_endian(char *result, unsigned long long number) { + result[0] = number >> 56; + result[1] = number >> 48 & 0xff; + result[2] = number >> 40 & 0xff; + result[3] = number >> 32 & 0xff; + result[4] = number >> 24 & 0xff; + result[5] = number >> 16 & 0xff; + result[6] = number >> 8 & 0xff; + result[7] = number & 0xff; +} + +static unsigned long long decode_big_endian(const char *encoded) { + return ( + (unsigned long long) encoded[0] << 56 | + (unsigned long long) encoded[1] << 48 | + (unsigned long long) encoded[2] << 40 | + (unsigned long long) encoded[3] << 32 | + (unsigned long long) encoded[4] << 24 | + (unsigned long long) encoded[5] << 16 | + (unsigned long long) encoded[6] << 8 | + (unsigned long long) encoded[7] + ); +} + +int work(char *nonce, unsigned long long *iterations_count, size_t thread_number) { + unsigned long long i; + + char proof[8 + 64]; + char appended_seed[SEED_LENGTH + 8 + 8]; + + memcpy(proof + 8, initial_hash, 64); + memcpy(appended_seed, seed, SEED_LENGTH); + encode_big_endian(appended_seed + SEED_LENGTH, thread_number); + + for (i = 0; run; ++i) { + char randomness[64]; + + size_t solutions_count = 0; + char solutions[256]; + + size_t j; + + encode_big_endian(appended_seed + SEED_LENGTH + 8, i); + + SHA512((unsigned char *) appended_seed, SEED_LENGTH + 8 + 8, (unsigned char *) randomness); + + memcpy(proof + 1, randomness, 7); + + for (j = 0; j < 256; ++j) { + unsigned long long trial; + + SHA512_CTX context; + + char first_hash[64]; + char second_hash[64]; + + proof[0] = j; + + SHA512_Init(&context); + SHA512_Update(&context, (unsigned char *) proof, 8 + 64); + SHA512_Final((unsigned char *) first_hash, &context); + + SHA512_Init(&context); + SHA512_Update(&context, (unsigned char *) first_hash, 64); + SHA512_Final((unsigned char *) second_hash, &context); + + trial = decode_big_endian(second_hash); + + if (trial <= target) { + solutions[solutions_count] = j; + ++solutions_count; + } + + ++*iterations_count; + } + + if (solutions_count != 0) { + unsigned long long index = decode_big_endian(randomness + 7); + + nonce[0] = solutions[index % solutions_count]; + memcpy(nonce + 1, proof + 1, 7); + + return 1; + } + } + + return 0; +} diff --git a/src/workprover/fastsolver/common.h b/src/workprover/fastsolver/common.h new file mode 100644 index 00000000..bdca9232 --- /dev/null +++ b/src/workprover/fastsolver/common.h @@ -0,0 +1,13 @@ +#ifndef COMMON_H + #define COMMON_H + + extern volatile int run; + + #define SEED_LENGTH (32 + 8) + + extern const char *initial_hash; + extern unsigned long long target; + extern const char *seed; + + int work(char *nonce, unsigned long long *iterations_count, size_t thread_number); +#endif diff --git a/src/workprover/fastsolver/main.map b/src/workprover/fastsolver/main.map new file mode 100644 index 00000000..175c370d --- /dev/null +++ b/src/workprover/fastsolver/main.map @@ -0,0 +1,4 @@ +{ + global: fastsolver_*; + local: *; +}; diff --git a/src/workprover/fastsolver/makefile b/src/workprover/fastsolver/makefile new file mode 100644 index 00000000..e4b08034 --- /dev/null +++ b/src/workprover/fastsolver/makefile @@ -0,0 +1,11 @@ +CFLAGS += -std=gnu99 -Wall -Wextra -pedantic -O3 -fPIC +LDFLAGS += -shared -lpthread -lcrypto -Wl,-version-script=main.map + +libfastsolver.so: main.map common.o pthread.o + $(CC) $(LDFLAGS) -o $@ common.o pthread.o + +common.o: common.h common.c +pthread.o: common.h pthread.c + +clean: + rm -f common.o pthread.o libfastsolver.so diff --git a/src/workprover/fastsolver/options.txt b/src/workprover/fastsolver/options.txt new file mode 100644 index 00000000..cd8fe863 --- /dev/null +++ b/src/workprover/fastsolver/options.txt @@ -0,0 +1 @@ +/Ox /MD common.c winapi.c /link /DLL /OUT:libfastsolver.dll /EXPORT:fastsolver_add /EXPORT:fastsolver_remove /EXPORT:fastsolver_search libcrypto.lib diff --git a/src/workprover/fastsolver/pthread.c b/src/workprover/fastsolver/pthread.c new file mode 100644 index 00000000..fc62b98a --- /dev/null +++ b/src/workprover/fastsolver/pthread.c @@ -0,0 +1,214 @@ +#include +#include + +#include + +#include "common.h" + +static int initialized; + +#define MAXIMUM_THREADS_COUNT 4096 + +static size_t threads_count; +static pthread_t threads[MAXIMUM_THREADS_COUNT]; + +static pthread_mutex_t lock; +static pthread_cond_t start; +static pthread_cond_t done; + +static size_t running_threads_count; + +static int found; +static char best_nonce[8]; +static unsigned long long total_iterations_count; + +static void *thread_function(void *argument) { + size_t thread_number = (pthread_t *) argument - threads; + + while (1) { + char nonce[8]; + unsigned long long iterations_count = 0; + int result; + + pthread_mutex_lock(&lock); + + while (!run && threads_count > thread_number) { + pthread_cond_wait(&start, &lock); + } + + if (threads_count <= thread_number) { + pthread_mutex_unlock(&lock); + + return NULL; + } + + ++running_threads_count; + + pthread_mutex_unlock(&lock); + + result = work(nonce, &iterations_count, thread_number); + + pthread_mutex_lock(&lock); + + if (result == 1) { + found = 1; + memcpy(best_nonce, nonce, 8); + } + + total_iterations_count += iterations_count; + + run = 0; + --running_threads_count; + + pthread_cond_signal(&done); + pthread_mutex_unlock(&lock); + } +} + +static int initialize(void) { + pthread_condattr_t done_attributes; + + if (initialized == 1) { + return 1; + } + + if (pthread_mutex_init(&lock, NULL) != 0) { + goto error_lock; + } + + if (pthread_cond_init(&start, NULL) != 0) { + goto error_start; + } + + if (pthread_condattr_init(&done_attributes) != 0) { + goto error_done_attributes; + } + + #ifndef __APPLE__ + pthread_condattr_setclock(&done_attributes, CLOCK_MONOTONIC); + #endif + + if (pthread_cond_init(&done, &done_attributes) != 0) { + goto error_done; + } + + pthread_condattr_destroy(&done_attributes); + + initialized = 1; + + return 1; + + error_done: pthread_condattr_destroy(&done_attributes); + error_done_attributes: pthread_cond_destroy(&start); + error_start: pthread_mutex_destroy(&lock); + error_lock: return 0; +} + +size_t fastsolver_add(void) { + #ifdef SCHED_IDLE + int policy = SCHED_IDLE; + #else + int policy = SCHED_OTHER; + #endif + + struct sched_param parameters; + + if (initialize() == 0) { + return threads_count; + } + + pthread_mutex_lock(&lock); + + if (pthread_create(&threads[threads_count], NULL, thread_function, &threads[threads_count]) != 0) { + pthread_mutex_unlock(&lock); + + return threads_count; + } + + parameters.sched_priority = sched_get_priority_min(policy); + pthread_setschedparam(threads[threads_count], policy, ¶meters); + + ++threads_count; + + pthread_mutex_unlock(&lock); + + return threads_count; +} + +size_t fastsolver_remove(size_t count) { + size_t i; + + pthread_mutex_lock(&lock); + + threads_count -= count; + + pthread_cond_broadcast(&start); + pthread_mutex_unlock(&lock); + + for (i = 0; i < count; ++i) { + void *result; + + pthread_join(threads[threads_count + i], &result); + } + + return threads_count; +} + +int fastsolver_search( + char *local_nonce, + unsigned long long *local_iterations_count, + const char *local_initial_hash, + unsigned long long local_target, + const char *local_seed, + unsigned long long timeout +) { + struct timespec wait_time; + unsigned long long nanoseconds; + + initial_hash = local_initial_hash; + target = local_target; + seed = local_seed; + + found = 0; + total_iterations_count = 0; + + #ifdef __APPLE__ + wait_time.tv_sec = 0; + wait_time.tv_nsec = 0; + #else + clock_gettime(CLOCK_MONOTONIC, &wait_time); + #endif + + nanoseconds = wait_time.tv_nsec + timeout; + + wait_time.tv_sec += nanoseconds / 1000000000; + wait_time.tv_nsec = nanoseconds % 1000000000; + + pthread_mutex_lock(&lock); + + run = 1; + + pthread_cond_broadcast(&start); + + #ifdef __APPLE__ + pthread_cond_timedwait_relative_np(&done, &lock, &wait_time); + #else + pthread_cond_timedwait(&done, &lock, &wait_time); + #endif + + run = 0; + + while (running_threads_count != 0) { + pthread_cond_wait(&done, &lock); + } + + pthread_mutex_unlock(&lock); + + if (found) { + memcpy(local_nonce, best_nonce, 8); + } + + *local_iterations_count = total_iterations_count; + + return found; +} diff --git a/src/workprover/fastsolver/winapi.c b/src/workprover/fastsolver/winapi.c new file mode 100644 index 00000000..b620aeb8 --- /dev/null +++ b/src/workprover/fastsolver/winapi.c @@ -0,0 +1,160 @@ +#include + +#include + +#include "common.h" + +static int initialized; + +#define MAXIMUM_THREADS_COUNT 4096 + +static size_t threads_count; +static HANDLE threads[MAXIMUM_THREADS_COUNT]; + +static CRITICAL_SECTION lock; +static CONDITION_VARIABLE start = CONDITION_VARIABLE_INIT; +static CONDITION_VARIABLE done = CONDITION_VARIABLE_INIT; + +static size_t running_threads_count; + +static int found; +static char best_nonce[8]; +static unsigned long long total_iterations_count; + +DWORD WINAPI thread_function(LPVOID argument) { + size_t thread_number = (HANDLE *) argument - threads; + + while (1) { + char nonce[8]; + unsigned long long iterations_count = 0; + int result; + + EnterCriticalSection(&lock); + + while (!run && threads_count > thread_number) { + SleepConditionVariableCS(&start, &lock, INFINITE); + } + + if (threads_count <= thread_number) { + LeaveCriticalSection(&lock); + + return 0; + } + + ++running_threads_count; + + LeaveCriticalSection(&lock); + + result = work(nonce, &iterations_count, thread_number); + + EnterCriticalSection(&lock); + + if (result == 1) { + found = 1; + memcpy(best_nonce, nonce, 8); + } + + total_iterations_count += iterations_count; + + run = 0; + --running_threads_count; + + WakeConditionVariable(&done); + LeaveCriticalSection(&lock); + } +} + +static int initialize(void) { + if (initialized == 1) { + return 1; + } + + InitializeCriticalSection(&lock); + + initialized = 1; + + return 1; +} + +size_t fastsolver_add(void) { + if (initialize() == 0) { + return threads_count; + } + + EnterCriticalSection(&lock); + + threads[threads_count] = CreateThread(NULL, 0, thread_function, &threads[threads_count], 0, NULL); + + if (threads[threads_count] == NULL) { + LeaveCriticalSection(&lock); + + return threads_count; + } + + SetThreadPriority(threads[threads_count], THREAD_PRIORITY_IDLE); + + ++threads_count; + + LeaveCriticalSection(&lock); + + return threads_count; +} + +size_t fastsolver_remove(size_t count) { + size_t i; + + EnterCriticalSection(&lock); + + threads_count -= count; + + WakeAllConditionVariable(&start); + LeaveCriticalSection(&lock); + + WaitForMultipleObjects(count, threads + threads_count, TRUE, INFINITE); + + for (i = 0; i < count; ++i) { + CloseHandle(threads[threads_count + i]); + } + + return threads_count; +} + +int fastsolver_search( + char *local_nonce, + unsigned long long *local_iterations_count, + const char *local_initial_hash, + unsigned long long local_target, + const char *local_seed, + unsigned long long timeout +) { + initial_hash = local_initial_hash; + target = local_target; + seed = local_seed; + + found = 0; + total_iterations_count = 0; + + EnterCriticalSection(&lock); + + run = 1; + + WakeAllConditionVariable(&start); + + SleepConditionVariableCS(&done, &lock, timeout / 1000); + + run = 0; + + while (running_threads_count != 0) { + SleepConditionVariableCS(&done, &lock, INFINITE); + } + + LeaveCriticalSection(&lock); + + if (found) { + memcpy(local_nonce, best_nonce, 8); + } + + *local_iterations_count = total_iterations_count; + + return found; +} diff --git a/src/workprover/forkingsolver.py b/src/workprover/forkingsolver.py new file mode 100644 index 00000000..b31d2b15 --- /dev/null +++ b/src/workprover/forkingsolver.py @@ -0,0 +1,106 @@ +import os +import multiprocessing +import struct + +import dumbsolver + +def setIdle(): + try: + import psutil + + psutil.Process().nice(psutil.IDLE_PRIORITY_CLASS) + + return + except: + pass + + try: + import win32api + import win32con + import win32process + + PID = win32api.GetCurrentProcessId() + handle = win32api.OpenProcess(win32con.PROCESS_ALL_ACCESS, True, PID) + + win32process.SetPriorityClass(handle, win32process.IDLE_PRIORITY_CLASS) + + return + except: + pass + + if hasattr(os, "nice"): + os.nice(40) + +def threadFunction(local, remote, codePath, threadNumber): + remote.close() + setIdle() + + solver = dumbsolver.DumbSolver(codePath) + + while True: + received = local.recv() + + command = received[0] + arguments = received[1: ] + + if command == "search": + initialHash, target, seed, timeout = arguments + appendedSeed = seed + struct.pack(">Q", threadNumber) + + nonce, iterationsCount = solver.search(initialHash, target, appendedSeed, timeout) + + local.send(("done", nonce, iterationsCount)) + elif command == "shutdown": + local.close() + + return + +class ForkingSolver(object): + def __init__(self, codePath): + self.pipes = [] + self.processes = [] + + self.parallelism = 0 + + self.codePath = codePath + + def search(self, initialHash, target, seed, timeout): + for i in self.pipes: + i.send(("search", initialHash, target, seed, timeout)) + + bestNonce, totalIterationsCount = None, 0 + + for i in self.pipes: + event, nonce, iterationsCount = i.recv() + + if nonce is not None: + bestNonce = nonce + + totalIterationsCount += iterationsCount + + return bestNonce, totalIterationsCount + + def setParallelism(self, parallelism): + parallelism = min(4096, parallelism) + + for i in xrange(self.parallelism, parallelism): + local, remote = multiprocessing.Pipe() + + process = multiprocessing.Process(target = threadFunction, args = (remote, local, self.codePath, i)) + process.start() + + remote.close() + + self.pipes.append(local) + self.processes.append(process) + + for i in xrange(parallelism, self.parallelism): + pipe = self.pipes.pop() + + pipe.send(("shutdown", )) + pipe.close() + + for i in xrange(parallelism, self.parallelism): + self.processes.pop().join() + + self.parallelism = parallelism diff --git a/src/workprover/gpusolver.cl b/src/workprover/gpusolver.cl new file mode 100644 index 00000000..6bd6d010 --- /dev/null +++ b/src/workprover/gpusolver.cl @@ -0,0 +1,136 @@ +constant ulong k[80] = { + 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, + 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, + 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, + 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, + 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, + 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, + 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, + 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, + 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, + 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, + 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, + 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, + 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, + 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, + 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, + 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, + 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, + 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, + 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, + 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 +}; + +constant ulong h[8] = { + 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 +}; + +#define ROTATE(x, n) ((x) >> (n) | (x) << 64 - (n)) + +#define C(x, y, z) ((x) & (y) ^ ~(x) & (z)) +#define M(x, y, z) ((x) & (y) ^ (x) & (z) ^ (y) & (z)) +#define S0(x) (ROTATE((x), 28) ^ ROTATE((x), 34) ^ ROTATE((x), 39)) +#define S1(x) (ROTATE((x), 14) ^ ROTATE((x), 18) ^ ROTATE((x), 41)) +#define s0(x) (ROTATE((x), 1) ^ ROTATE((x), 8) ^ (x) >> 7) +#define s1(x) (ROTATE((x), 19) ^ ROTATE((x), 61) ^ (x) >> 6) + +void sha512_process_block(ulong *state, ulong *block) { + ulong a = state[0]; + ulong b = state[1]; + ulong c = state[2]; + ulong d = state[3]; + ulong e = state[4]; + ulong f = state[5]; + ulong g = state[6]; + ulong h = state[7]; + + ulong *w = block; + + #pragma unroll + + for (size_t i = 0; i < 16; i++) { + ulong t = k[i] + w[i & 15] + h + S1(e) + C(e, f, g); + + h = g; + g = f; + f = e; + e = d + t; + t += M(a, b, c) + S0(a); + d = c; + c = b; + b = a; + a = t; + } + + #pragma unroll 16 + + for (size_t i = 16; i < 80; i++) { + w[i & 15] += s0(w[i + 1 & 15]) + s1(w[i + 14 & 15]) + w[i + 9 & 15]; + + ulong t = k[i] + w[i & 15] + h + S1(e) + C(e, f, g); + + h = g; + g = f; + f = e; + e = d + t; + t += M(a, b, c) + S0(a); + d = c; + c = b; + b = a; + a = t; + } + + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; +} + +ulong compute_trial(ulong nonce, global const ulong *initial_hash) { + ulong fisrt_block[16] = { + nonce, + initial_hash[0], initial_hash[1], initial_hash[2], initial_hash[3], + initial_hash[4], initial_hash[5], initial_hash[6], initial_hash[7], + 0x8000000000000000, 0, 0, 0, 0, 0, 8 * (8 + 64) + }; + + ulong second_block[16] = { + h[0], h[1], h[2], h[3], + h[4], h[5], h[6], h[7], + 0x8000000000000000, 0, 0, 0, 0, 0, 0, 8 * 64 + }; + + ulong double_hash[8] = { + h[0], h[1], h[2], h[3], + h[4], h[5], h[6], h[7] + }; + + sha512_process_block(second_block, fisrt_block); + sha512_process_block(double_hash, second_block); + + return double_hash[0]; +} + +kernel void search(global unsigned int *output, global ulong *input) { + size_t thread_number = get_global_id(0); + + global unsigned int *solutions_count = output; + global unsigned int *solutions = output + 1; + + global ulong *nonce = input; + global ulong *initial_hash = input + 1; + global ulong *target = input + 9; + + ulong trial = compute_trial(*nonce + thread_number, initial_hash); + + if (trial <= *target) { + unsigned int index = atom_inc(solutions_count); + + solutions[index] = thread_number; + } +} diff --git a/src/workprover/gpusolver.py b/src/workprover/gpusolver.py new file mode 100644 index 00000000..59f56b04 --- /dev/null +++ b/src/workprover/gpusolver.py @@ -0,0 +1,103 @@ +import hashlib +import struct +import os.path + +import utils + +pyopencl = None +numpy = None + +class GPUSolverError(Exception): + pass + +class GPUSolver(object): + def __init__(self, codePath, vendors = None): + global pyopencl, numpy + + import pyopencl + import numpy + + device = None + + for i in pyopencl.get_platforms(): + if vendors is not None and i.vendor not in vendors: + continue + + devices = i.get_devices(device_type = pyopencl.device_type.GPU) + + if len(devices) != 0: + device = devices[0] + + break + else: + raise Exception() + + context = pyopencl.Context(devices = [device]) + + computeUnitsCount = device.get_info(pyopencl.device_info.MAX_COMPUTE_UNITS) + workGroupSize = device.get_info(pyopencl.device_info.MAX_WORK_GROUP_SIZE) + + self.parallelism = workGroupSize * computeUnitsCount + self.batchSize = self.parallelism * 256 + + self.queue = pyopencl.CommandQueue(context, device) + + with open(os.path.join(codePath, "gpusolver.cl")) as file: + source = file.read() + + program = pyopencl.Program(context, source).build() + + self.hostOutput = numpy.zeros(1 + self.batchSize, numpy.uint32) + self.hostInput = numpy.zeros(1 + 8 + 1, numpy.uint64) + + self.output = pyopencl.Buffer(context, pyopencl.mem_flags.READ_WRITE, 4 * (1 + self.batchSize)) + self.input = pyopencl.Buffer(context, pyopencl.mem_flags.READ_ONLY, 8 * (1 + 8 + 1)) + + self.kernel = program.search + self.kernel.set_args(self.output, self.input) + + def search(self, initialHash, target, seed, timeout): + startTime = utils.getTimePoint() + + self.hostOutput[0] = 0 + + for i in xrange(8): + self.hostInput[1 + i], = struct.unpack(">Q", initialHash[8 * i: 8 * (i + 1)]) + + self.hostInput[9] = target + + pyopencl.enqueue_copy(self.queue, self.output, self.hostOutput[: 1]) + + i = 0 + + while True: + randomness = hashlib.sha512(seed + struct.pack(">Q", i)).digest() + i += 1 + + self.hostInput[0], = struct.unpack(">Q", randomness[: 8]) + + pyopencl.enqueue_copy(self.queue, self.input, self.hostInput) + pyopencl.enqueue_nd_range_kernel(self.queue, self.kernel, (self.batchSize, ), None) + self.queue.finish() + pyopencl.enqueue_copy(self.queue, self.hostOutput[: 1], self.output) + + solutionsCount = long(self.hostOutput[0]) + + if solutionsCount != 0: + pyopencl.enqueue_copy(self.queue, self.hostOutput[0: 1 + solutionsCount], self.output) + + index, = struct.unpack(">Q", randomness[8: 16]) + threadNumber = self.hostOutput[1 + index % solutionsCount] + + nonce = struct.pack(">Q", long(self.hostInput[0]) + threadNumber) + + if not utils.checkProof(nonce, initialHash, target): + raise GPUSolverError() + + return nonce, self.batchSize * i + + if utils.getTimePoint() - startTime >= timeout: + return None, self.batchSize * i + + def setParallelism(self, parallelism): + pass diff --git a/src/workprover/test.py b/src/workprover/test.py new file mode 100755 index 00000000..c27ca38c --- /dev/null +++ b/src/workprover/test.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python2.7 + +import unittest +import os.path +import binascii +import struct +import ctypes +import ctypes.util +import sys + +import __init__ +import utils +import dumbsolver +import forkingsolver +import fastsolver +import gpusolver + +codePath = os.path.dirname(__file__) + +if hasattr(sys, "winver"): + dumbsolver.libcrypto = ctypes.WinDLL("libcrypto.dll") +else: + dumbsolver.libcrypto = ctypes.CDLL(ctypes.util.find_library("crypto")) + +nonce = binascii.unhexlify("9ca6790a249679f8") +expiryTime = 1525845600 + +headlessPayload = binascii.unhexlify("000000000001") +initialPayload = struct.pack(">Q", expiryTime) + headlessPayload +payload = nonce + initialPayload + +initialHash = binascii.unhexlify( + "1e87a288a10454dea0d3a9b606cc538db1b8b47fe8a21a37b8e57da3db6928eb" + "d854fd22aed3e1849c4a1c596fe0bfec266c05900a862c5b356a6b7e51a4b510" +) + +doubleHash = binascii.unhexlify( + "16cdf04b739412bea1bf58d6c5a53ec92e7d4aab180213405bf10d615354d417" + "00f8b1510d0844a4b7c7b7434e6c115b52fcec5c591e96c31f4b8769ee683552" +) + +TTL = 3600 +byteDifficulty = 1000 +lengthExtension = 1000 + +target = 0x00000f903320b7f6 + +seed = binascii.unhexlify("3941c24a1256660a8f65d962954c406dab7bc449317fa087c4a3f1a3ca7d95fd") +timeout = .5 + +class TestUtils(unittest.TestCase): + def testCalculateInitialHash(self): + self.assertEqual(utils.calculateInitialHash(initialPayload), initialHash) + + def testCalculateDoubleHash(self): + self.assertEqual(utils.calculateDoubleHash(payload), doubleHash) + + def testCalculateTarget(self): + self.assertEqual(utils.calculateTarget(1000, 1015, 1000, 1000), 0x00000843bf57fed2) + self.assertEqual(utils.calculateTarget(1000, 1016, 1000, 1000), 0x00000842b4a960c2) + + def testCheckProof(self): + self.assertFalse(utils.checkProof(nonce, initialHash, 0x000002fe91eba355)) + self.assertTrue(utils.checkProof(nonce, initialHash, 0x000002fe91eba356)) + + def testCheckWorkSufficient(self): + originalTime = utils.time.time + + utils.time.time = lambda: expiryTime - 293757.5 + self.assertFalse(utils.checkWorkSufficient(payload, byteDifficulty, lengthExtension)) + + utils.time.time = lambda: expiryTime - 293757 + self.assertTrue(utils.checkWorkSufficient(payload, byteDifficulty, lengthExtension)) + + utils.time.time = originalTime + + def testEstimateMaximumIterationsCount(self): + self.assertEqual(utils.estimateMaximumIterationsCount(0x000fffffffffffff, .1), 512) + self.assertEqual(utils.estimateMaximumIterationsCount(target, .8), 1735168) + +class TestSolver(unittest.TestCase): + def setUp(self): + self.solver = self.Solver(codePath) + self.solver.setParallelism(1) + + def testSearch(self): + nonce = None + + i = 0 + + while nonce is None: + appendedSeed = seed + struct.pack(">Q", i) + i += 1 + + nonce, iterationsCount = self.solver.search(initialHash, target, appendedSeed, timeout) + + self.assertTrue(utils.checkProof(nonce, initialHash, target)) + + def tearDown(self): + self.solver.setParallelism(0) + +class TestDumbSolver(TestSolver): + Solver = dumbsolver.DumbSolver + +class TestForkingSolver(TestSolver): + Solver = forkingsolver.ForkingSolver + +class TestFastSolver(TestSolver): + Solver = fastsolver.FastSolver + +class TestGPUSolver(TestSolver): + Solver = gpusolver.GPUSolver + +class TestWorkProver(unittest.TestCase): + def setUp(self): + self.thread = __init__.WorkProver(codePath, None, seed, None) + self.thread.start() + + def checkTaskLinks(self): + IDs = set(self.thread.tasks.keys()) + + if len(IDs) == 0: + return + + self.assertIn(self.thread.currentTaskID, IDs) + + linkID = next(iter(IDs)) + + for i in xrange(len(IDs)): + self.assertIn(linkID, IDs) + + IDs.remove(linkID) + + nextLinkID = self.thread.tasks[linkID].next + + self.assertEqual(self.thread.tasks[nextLinkID].previous, linkID) + + linkID = nextLinkID + + def testTasks(self): + self.thread.addTask(0, headlessPayload, TTL, None, byteDifficulty, lengthExtension) + + self.checkTaskLinks() + + self.thread.addTask(1, headlessPayload, TTL, None, byteDifficulty, lengthExtension) + self.thread.addTask(2, headlessPayload, TTL, None, byteDifficulty, lengthExtension) + + self.checkTaskLinks() + + self.thread.cancelTask(self.thread.currentTaskID) + self.thread.nextTask() + self.thread.nextTask() + self.thread.nextTask() + self.thread.addTask(3, headlessPayload, TTL, None, byteDifficulty, lengthExtension) + + self.checkTaskLinks() + + def testSearch(self): + self.thread.commandsQueue.put(( + "addTask", 0, + headlessPayload, TTL, None, byteDifficulty, lengthExtension + )) + + self.thread.commandsQueue.put(( + "addTask", 1, + headlessPayload, TTL, None, byteDifficulty, lengthExtension + )) + + self.thread.commandsQueue.put(( + "addTask", 2, + headlessPayload, TTL * 100, expiryTime, byteDifficulty, lengthExtension + )) + + self.thread.commandsQueue.put(("setSolver", "dumb", 1)) + + for i in xrange(3): + event, ID, nonce, localExpiryTime = self.thread.resultsQueue.get() + + initialPayload = struct.pack(">Q", localExpiryTime) + headlessPayload + initialHash = utils.calculateInitialHash(initialPayload) + + self.assertTrue(utils.checkProof(nonce, initialHash, target)) + + def tearDown(self): + self.thread.commandsQueue.put(("shutdown", )) + self.thread.join() + +loader = unittest.TestLoader() + +suite = unittest.TestSuite([ + loader.loadTestsFromTestCase(TestUtils), + loader.loadTestsFromTestCase(TestDumbSolver), + loader.loadTestsFromTestCase(TestForkingSolver), + loader.loadTestsFromTestCase(TestFastSolver), + loader.loadTestsFromTestCase(TestGPUSolver), + loader.loadTestsFromTestCase(TestWorkProver) +]) + +if __name__ == "__main__": + import multiprocessing + + multiprocessing.freeze_support() + + runner = unittest.TextTestRunner() + runner.run(suite) diff --git a/src/workprover/utils.py b/src/workprover/utils.py new file mode 100644 index 00000000..696181cb --- /dev/null +++ b/src/workprover/utils.py @@ -0,0 +1,51 @@ +import hashlib +import struct +import time +import math +import sys +import os + +def calculateInitialHash(initialPayload): + return hashlib.sha512(initialPayload).digest() + +def calculateDoubleHash(data): + return hashlib.sha512(hashlib.sha512(data).digest()).digest() + +# Length including nonce + +def calculateTarget(length, TTL, byteDifficulty, lengthExtension): + adjustedLength = length + lengthExtension + timeEquivalent = TTL * adjustedLength / 2 ** 16 + + difficulty = byteDifficulty * (adjustedLength + timeEquivalent) + + return 2 ** 64 / difficulty + +def checkProof(nonce, initialHash, target): + proof = nonce + initialHash + trial, = struct.unpack(">Q", calculateDoubleHash(proof)[: 8]) + + return trial <= target + +def checkWorkSufficient(payload, byteDifficulty, lengthExtension): + expiryTime, = struct.unpack(">Q", payload[8: 16]) + minimumTTL = max(300, expiryTime - int(time.time())) + + nonce = payload[: 8] + initialHash = calculateInitialHash(payload[8: ]) + + target = calculateTarget(len(payload), minimumTTL, byteDifficulty, lengthExtension) + + return checkProof(nonce, initialHash, target) + +def estimateMaximumIterationsCount(target, probability): + coefficient = -math.log(1 - probability) + difficulty = 2. ** 64 / target + + return int(coefficient * difficulty + 255) / 256 * 256 + +if hasattr(sys, "winver"): + getTimePoint = time.clock +else: + def getTimePoint(): + return os.times()[4]