New POW calculation module

This commit is contained in:
Biryuzovye Kleshni 2018-06-23 08:57:34 +00:00
parent 86a7311a78
commit a181da3632
16 changed files with 1525 additions and 0 deletions

42
src/workprover/Readme.md Normal file
View File

@ -0,0 +1,42 @@
Please keep this module independent from the outside code, so that it can be reused in other applications.
If you are going to use it, you should wrap your program's main file in this:
```python
import workprover.dumbsolver
workprover.dumbsolver.libcrypto = ...
if __name__ == "__main__":
import multiprocessing
multiprocessing.freeze_support()
...
```
See the `multiprocessing` module documentation for explaination.
Build fast solver
-----------------
On Linux, BSDs or MacOS: `make -C fastsolver`.
On Windows:
- Install OpenSSL. Build it yourself or install [third-party](https://wiki.openssl.org/index.php/Binaries) prebuilt binaries.
- Install MSVC as part of Visual Studio or standalone. Official offline installer: https://aka.ms/vcpython27.
- Open its command line and go to the `fastsolver` directory.
- Add OpenSSL paths to environment variables:
```bat
set INCLUDE=C:\OpenSSL-Win64\include;%INCLUDE%
set LIB=C:\OpenSSL-Win64\lib;%LIB%
```
- Do `cl @options.txt`.
- Append the `-32` or `-64` suffix to the DLL file name.

238
src/workprover/__init__.py Normal file
View File

@ -0,0 +1,238 @@
import sys
import multiprocessing
import time
import struct
import threading
import Queue
import collections
import utils
import dumbsolver
import forkingsolver
import fastsolver
import gpusolver
timeout = .5
class Stop(Exception):
pass
class Task(object):
previous = None
next = None
def __init__(self, headlessPayload, TTL, expiryTime, target):
self.headlessPayload = headlessPayload
self.TTL = TTL
self.expiryTime = expiryTime
self.target = target
class WorkProver(threading.Thread):
def __init__(self, codePath, GPUVendors, seed, statusUpdated):
super(self.__class__, self).__init__()
self.availableSolvers = {
"dumb": dumbsolver.DumbSolver(codePath)
}
# Comment from the previous version:
# on my (Peter Surda) Windows 10, Windows Defender
# does not like this and fights with PyBitmessage
# over CPU, resulting in very slow PoW
# added on 2015-11-29: multiprocesing.freeze_support() doesn't help
if not hasattr(sys, "frozen") or sys.frozen == "macosx_app":
self.availableSolvers["forking"] = forkingsolver.ForkingSolver(codePath)
try:
self.availableSolvers["fast"] = fastsolver.FastSolver(codePath)
except:
pass
try:
self.availableSolvers["gpu"] = gpusolver.GPUSolver(codePath, GPUVendors)
except:
pass
try:
self.defaultParallelism = multiprocessing.cpu_count()
except NotImplementedError:
self.defaultParallelism = 1
self.seed = seed
self.roundsCounter = 0
self.statusUpdated = statusUpdated
self.commandsQueue = Queue.Queue()
self.resultsQueue = Queue.Queue()
self.solverName = None
self.solver = None
self.lastTime = utils.getTimePoint()
self.timedIntervals = collections.deque()
self.speed = 0
self.tasks = {}
self.currentTaskID = None
def notifyStatus(self):
if self.statusUpdated is None:
return
if self.solver is None:
parallelism = 0
else:
parallelism = self.solver.parallelism
self.statusUpdated((self.solverName, parallelism, self.speed))
def setSolver(self, name, parallelism):
if name is None and self.solverName is None:
pass
elif name == self.solverName:
if self.solver.parallelism != parallelism:
self.solver.setParallelism(parallelism)
else:
if self.solver is not None:
self.solver.setParallelism(0)
self.solverName = None
self.solver = None
if name is not None:
if name not in self.availableSolvers:
name, parallelism = "dumb", 1
self.solverName = name
self.solver = self.availableSolvers[name]
self.solver.setParallelism(parallelism)
self.notifyStatus()
def updateSpeed(self, iterationsCount):
currentTime = utils.getTimePoint()
duration = currentTime - self.lastTime
self.lastTime = currentTime
self.timedIntervals.append((currentTime, iterationsCount, duration))
for i in xrange(len(self.timedIntervals)):
time, iterationsCount, duration = self.timedIntervals[0]
if time + duration < currentTime - 3:
self.timedIntervals.popleft()
totalDuration = 0
totalIterationsCount = 0
for time, iterationsCount, duration in self.timedIntervals:
totalIterationsCount += iterationsCount
totalDuration += duration
if totalDuration < .25:
self.speed = 0
else:
self.speed = totalIterationsCount / totalDuration
self.notifyStatus()
def addTask(self, ID, headlessPayload, TTL, expiryTime, byteDifficulty, lengthExtension):
target = utils.calculateTarget(8 + 8 + len(headlessPayload), TTL, byteDifficulty, lengthExtension)
task = Task(headlessPayload, TTL, expiryTime, target)
self.tasks[ID] = task
if self.currentTaskID is None:
task.previous = ID
task.next = ID
self.currentTaskID = ID
else:
task.previous = self.currentTaskID
task.next = self.tasks[self.currentTaskID].next
self.tasks[task.previous].next = ID
self.tasks[task.next].previous = ID
def cancelTask(self, ID):
if ID not in self.tasks:
return
task = self.tasks.pop(ID)
if len(self.tasks) == 0:
self.currentTaskID = None
else:
self.tasks[task.previous].next = task.next
self.tasks[task.next].previous = task.previous
if self.currentTaskID == ID:
self.currentTaskID = task.next
def nextTask(self):
self.currentTaskID = self.tasks[self.currentTaskID].next
def shutdown(self):
self.setSolver(None, 0)
for i in self.tasks.keys():
self.cancelTask(i)
raise Stop()
def processCommand(self, command, *arguments):
getattr(self, command)(*arguments)
def round(self):
while True:
try:
self.processCommand(*self.commandsQueue.get_nowait())
except Queue.Empty:
break
while self.solver is None or self.currentTaskID is None:
try:
self.processCommand(*self.commandsQueue.get(True, timeout))
except Queue.Empty:
self.updateSpeed(0)
task = self.tasks[self.currentTaskID]
if task.expiryTime is None:
expiryTime = int(time.time() + task.TTL)
else:
expiryTime = task.expiryTime
initialPayload = struct.pack(">Q", expiryTime) + task.headlessPayload
initialHash = utils.calculateInitialHash(initialPayload)
appendedSeed = self.seed + struct.pack(">Q", self.roundsCounter)
self.roundsCounter += 1
try:
nonce, iterationsCount = self.solver.search(initialHash, task.target, appendedSeed, timeout)
except gpusolver.GPUSolverError:
self.setSolver("dumb", 1)
self.availableSolvers.pop("gpu")
nonce, iterationsCount = None, 0
self.updateSpeed(iterationsCount)
if nonce is None:
self.nextTask()
else:
self.resultsQueue.put(("taskDone", self.currentTaskID, nonce, expiryTime))
self.cancelTask(self.currentTaskID)
def run(self):
try:
while True:
self.round()
except Stop:
return
except Exception as exception:
self.resultsQueue.put(exception)

View File

@ -0,0 +1,70 @@
import ctypes
import struct
import hashlib
import utils
libcrypto = None
class DumbSolver(object):
def __init__(self, codePath):
libcrypto.SHA512.restype = ctypes.c_void_p
self.prefixes = [chr(i) for i in xrange(256)]
if ctypes.c_size_t is ctypes.c_uint:
self.proofLength = 8 + 64
self.hashLength = 64
else:
# Using the wrapper instead of a clear number slows the work down, but otherwise seems to be unsafe
self.proofLength = ctypes.c_size_t(8 + 64)
self.hashLength = ctypes.c_size_t(64)
self.firstHash = ctypes.create_string_buffer(64)
self.secondHash = ctypes.create_string_buffer(64)
self.parallelism = 1
def search(self, initialHash, target, seed, timeout):
startTime = utils.getTimePoint()
sha512 = libcrypto.SHA512
prefixes = self.prefixes
proofLength = self.proofLength
hashLength = self.hashLength
firstHash = self.firstHash
secondHash = self.secondHash
encodedTarget = struct.pack(">Q", target)
solutions = []
i = 0
while True:
randomness = hashlib.sha512(seed + struct.pack(">Q", i)).digest()
i += 1
suffix = randomness[: 7] + initialHash
for j in prefixes:
proof = j + suffix
sha512(j + suffix, proofLength, firstHash)
sha512(firstHash, hashLength, secondHash)
if secondHash[: 8] <= encodedTarget:
solutions.append(proof[: 8])
if len(solutions) != 0:
index, = struct.unpack(">Q", randomness[7: 15])
nonce = solutions[index % len(solutions)]
return nonce, 256 * i
if utils.getTimePoint() - startTime >= timeout:
return None, 256 * i
def setParallelism(self, parallelism):
pass

View File

@ -0,0 +1,71 @@
import sys
import os.path
import platform
import subprocess
import ctypes
def loadFastSolver(codePath):
if hasattr(sys, "winver"):
suffix = "-32"
if platform.architecture()[0] == "64bit":
suffix = "-64"
path = os.path.join(codePath, "fastsolver/libfastsolver{}.dll".format(suffix))
return ctypes.WinDLL(path)
makePath = os.path.join(codePath, "fastsolver")
path = os.path.join(codePath, "fastsolver/libfastsolver.so")
try:
return ctypes.CDLL(path)
except:
if not hasattr(sys, "frozen"):
subprocess.call(["make", "-C", makePath])
return ctypes.CDLL(path)
else:
raise Exception()
class FastSolver(object):
def __init__(self, codePath):
self.libfastsolver = loadFastSolver(codePath)
self.libfastsolver.fastsolver_add.restype = ctypes.c_size_t
self.libfastsolver.fastsolver_add.argtypes = []
self.libfastsolver.fastsolver_remove.restype = ctypes.c_size_t
self.libfastsolver.fastsolver_remove.argtypes = [ctypes.c_size_t]
self.libfastsolver.fastsolver_search.restype = ctypes.c_int
self.libfastsolver.fastsolver_search.argtypes = [
ctypes.c_void_p, ctypes.c_void_p,
ctypes.c_void_p, ctypes.c_ulonglong, ctypes.c_void_p, ctypes.c_ulonglong
]
self.nonce = ctypes.create_string_buffer(8)
self.iterationsCount = ctypes.c_ulonglong()
self.parallelism = 0
def search(self, initialHash, target, seed, timeout):
found = self.libfastsolver.fastsolver_search(
self.nonce, ctypes.byref(self.iterationsCount),
initialHash, target, seed, long(1000000000 * timeout)
)
if found == 1:
return self.nonce.raw, self.iterationsCount.value
else:
return None, self.iterationsCount.value
def setParallelism(self, parallelism):
parallelism = min(4096, parallelism)
for i in xrange(self.parallelism, parallelism):
self.parallelism = self.libfastsolver.fastsolver_add()
if parallelism < self.parallelism:
self.parallelism = self.libfastsolver.fastsolver_remove(self.parallelism - parallelism)

View File

@ -0,0 +1,100 @@
#include <string.h>
#include <openssl/sha.h>
#include "common.h"
volatile int run;
const char *initial_hash;
unsigned long long target;
const char *seed;
static void encode_big_endian(char *result, unsigned long long number) {
result[0] = number >> 56;
result[1] = number >> 48 & 0xff;
result[2] = number >> 40 & 0xff;
result[3] = number >> 32 & 0xff;
result[4] = number >> 24 & 0xff;
result[5] = number >> 16 & 0xff;
result[6] = number >> 8 & 0xff;
result[7] = number & 0xff;
}
static unsigned long long decode_big_endian(const char *encoded) {
return (
(unsigned long long) encoded[0] << 56 |
(unsigned long long) encoded[1] << 48 |
(unsigned long long) encoded[2] << 40 |
(unsigned long long) encoded[3] << 32 |
(unsigned long long) encoded[4] << 24 |
(unsigned long long) encoded[5] << 16 |
(unsigned long long) encoded[6] << 8 |
(unsigned long long) encoded[7]
);
}
int work(char *nonce, unsigned long long *iterations_count, size_t thread_number) {
unsigned long long i;
char proof[8 + 64];
char appended_seed[SEED_LENGTH + 8 + 8];
memcpy(proof + 8, initial_hash, 64);
memcpy(appended_seed, seed, SEED_LENGTH);
encode_big_endian(appended_seed + SEED_LENGTH, thread_number);
for (i = 0; run; ++i) {
char randomness[64];
size_t solutions_count = 0;
char solutions[256];
size_t j;
encode_big_endian(appended_seed + SEED_LENGTH + 8, i);
SHA512((unsigned char *) appended_seed, SEED_LENGTH + 8 + 8, (unsigned char *) randomness);
memcpy(proof + 1, randomness, 7);
for (j = 0; j < 256; ++j) {
unsigned long long trial;
SHA512_CTX context;
char first_hash[64];
char second_hash[64];
proof[0] = j;
SHA512_Init(&context);
SHA512_Update(&context, (unsigned char *) proof, 8 + 64);
SHA512_Final((unsigned char *) first_hash, &context);
SHA512_Init(&context);
SHA512_Update(&context, (unsigned char *) first_hash, 64);
SHA512_Final((unsigned char *) second_hash, &context);
trial = decode_big_endian(second_hash);
if (trial <= target) {
solutions[solutions_count] = j;
++solutions_count;
}
++*iterations_count;
}
if (solutions_count != 0) {
unsigned long long index = decode_big_endian(randomness + 7);
nonce[0] = solutions[index % solutions_count];
memcpy(nonce + 1, proof + 1, 7);
return 1;
}
}
return 0;
}

View File

@ -0,0 +1,13 @@
#ifndef COMMON_H
#define COMMON_H
extern volatile int run;
#define SEED_LENGTH (32 + 8)
extern const char *initial_hash;
extern unsigned long long target;
extern const char *seed;
int work(char *nonce, unsigned long long *iterations_count, size_t thread_number);
#endif

View File

@ -0,0 +1,4 @@
{
global: fastsolver_*;
local: *;
};

View File

@ -0,0 +1,11 @@
CFLAGS += -std=gnu99 -Wall -Wextra -pedantic -O3 -fPIC
LDFLAGS += -shared -lpthread -lcrypto -Wl,-version-script=main.map
libfastsolver.so: main.map common.o pthread.o
$(CC) $(LDFLAGS) -o $@ common.o pthread.o
common.o: common.h common.c
pthread.o: common.h pthread.c
clean:
rm -f common.o pthread.o libfastsolver.so

View File

@ -0,0 +1 @@
/Ox /MD common.c winapi.c /link /DLL /OUT:libfastsolver.dll /EXPORT:fastsolver_add /EXPORT:fastsolver_remove /EXPORT:fastsolver_search libcrypto.lib

View File

@ -0,0 +1,214 @@
#include <string.h>
#include <time.h>
#include <pthread.h>
#include "common.h"
static int initialized;
#define MAXIMUM_THREADS_COUNT 4096
static size_t threads_count;
static pthread_t threads[MAXIMUM_THREADS_COUNT];
static pthread_mutex_t lock;
static pthread_cond_t start;
static pthread_cond_t done;
static size_t running_threads_count;
static int found;
static char best_nonce[8];
static unsigned long long total_iterations_count;
static void *thread_function(void *argument) {
size_t thread_number = (pthread_t *) argument - threads;
while (1) {
char nonce[8];
unsigned long long iterations_count = 0;
int result;
pthread_mutex_lock(&lock);
while (!run && threads_count > thread_number) {
pthread_cond_wait(&start, &lock);
}
if (threads_count <= thread_number) {
pthread_mutex_unlock(&lock);
return NULL;
}
++running_threads_count;
pthread_mutex_unlock(&lock);
result = work(nonce, &iterations_count, thread_number);
pthread_mutex_lock(&lock);
if (result == 1) {
found = 1;
memcpy(best_nonce, nonce, 8);
}
total_iterations_count += iterations_count;
run = 0;
--running_threads_count;
pthread_cond_signal(&done);
pthread_mutex_unlock(&lock);
}
}
static int initialize(void) {
pthread_condattr_t done_attributes;
if (initialized == 1) {
return 1;
}
if (pthread_mutex_init(&lock, NULL) != 0) {
goto error_lock;
}
if (pthread_cond_init(&start, NULL) != 0) {
goto error_start;
}
if (pthread_condattr_init(&done_attributes) != 0) {
goto error_done_attributes;
}
#ifndef __APPLE__
pthread_condattr_setclock(&done_attributes, CLOCK_MONOTONIC);
#endif
if (pthread_cond_init(&done, &done_attributes) != 0) {
goto error_done;
}
pthread_condattr_destroy(&done_attributes);
initialized = 1;
return 1;
error_done: pthread_condattr_destroy(&done_attributes);
error_done_attributes: pthread_cond_destroy(&start);
error_start: pthread_mutex_destroy(&lock);
error_lock: return 0;
}
size_t fastsolver_add(void) {
#ifdef SCHED_IDLE
int policy = SCHED_IDLE;
#else
int policy = SCHED_OTHER;
#endif
struct sched_param parameters;
if (initialize() == 0) {
return threads_count;
}
pthread_mutex_lock(&lock);
if (pthread_create(&threads[threads_count], NULL, thread_function, &threads[threads_count]) != 0) {
pthread_mutex_unlock(&lock);
return threads_count;
}
parameters.sched_priority = sched_get_priority_min(policy);
pthread_setschedparam(threads[threads_count], policy, &parameters);
++threads_count;
pthread_mutex_unlock(&lock);
return threads_count;
}
size_t fastsolver_remove(size_t count) {
size_t i;
pthread_mutex_lock(&lock);
threads_count -= count;
pthread_cond_broadcast(&start);
pthread_mutex_unlock(&lock);
for (i = 0; i < count; ++i) {
void *result;
pthread_join(threads[threads_count + i], &result);
}
return threads_count;
}
int fastsolver_search(
char *local_nonce,
unsigned long long *local_iterations_count,
const char *local_initial_hash,
unsigned long long local_target,
const char *local_seed,
unsigned long long timeout
) {
struct timespec wait_time;
unsigned long long nanoseconds;
initial_hash = local_initial_hash;
target = local_target;
seed = local_seed;
found = 0;
total_iterations_count = 0;
#ifdef __APPLE__
wait_time.tv_sec = 0;
wait_time.tv_nsec = 0;
#else
clock_gettime(CLOCK_MONOTONIC, &wait_time);
#endif
nanoseconds = wait_time.tv_nsec + timeout;
wait_time.tv_sec += nanoseconds / 1000000000;
wait_time.tv_nsec = nanoseconds % 1000000000;
pthread_mutex_lock(&lock);
run = 1;
pthread_cond_broadcast(&start);
#ifdef __APPLE__
pthread_cond_timedwait_relative_np(&done, &lock, &wait_time);
#else
pthread_cond_timedwait(&done, &lock, &wait_time);
#endif
run = 0;
while (running_threads_count != 0) {
pthread_cond_wait(&done, &lock);
}
pthread_mutex_unlock(&lock);
if (found) {
memcpy(local_nonce, best_nonce, 8);
}
*local_iterations_count = total_iterations_count;
return found;
}

View File

@ -0,0 +1,160 @@
#include <string.h>
#include <Windows.h>
#include "common.h"
static int initialized;
#define MAXIMUM_THREADS_COUNT 4096
static size_t threads_count;
static HANDLE threads[MAXIMUM_THREADS_COUNT];
static CRITICAL_SECTION lock;
static CONDITION_VARIABLE start = CONDITION_VARIABLE_INIT;
static CONDITION_VARIABLE done = CONDITION_VARIABLE_INIT;
static size_t running_threads_count;
static int found;
static char best_nonce[8];
static unsigned long long total_iterations_count;
DWORD WINAPI thread_function(LPVOID argument) {
size_t thread_number = (HANDLE *) argument - threads;
while (1) {
char nonce[8];
unsigned long long iterations_count = 0;
int result;
EnterCriticalSection(&lock);
while (!run && threads_count > thread_number) {
SleepConditionVariableCS(&start, &lock, INFINITE);
}
if (threads_count <= thread_number) {
LeaveCriticalSection(&lock);
return 0;
}
++running_threads_count;
LeaveCriticalSection(&lock);
result = work(nonce, &iterations_count, thread_number);
EnterCriticalSection(&lock);
if (result == 1) {
found = 1;
memcpy(best_nonce, nonce, 8);
}
total_iterations_count += iterations_count;
run = 0;
--running_threads_count;
WakeConditionVariable(&done);
LeaveCriticalSection(&lock);
}
}
static int initialize(void) {
if (initialized == 1) {
return 1;
}
InitializeCriticalSection(&lock);
initialized = 1;
return 1;
}
size_t fastsolver_add(void) {
if (initialize() == 0) {
return threads_count;
}
EnterCriticalSection(&lock);
threads[threads_count] = CreateThread(NULL, 0, thread_function, &threads[threads_count], 0, NULL);
if (threads[threads_count] == NULL) {
LeaveCriticalSection(&lock);
return threads_count;
}
SetThreadPriority(threads[threads_count], THREAD_PRIORITY_IDLE);
++threads_count;
LeaveCriticalSection(&lock);
return threads_count;
}
size_t fastsolver_remove(size_t count) {
size_t i;
EnterCriticalSection(&lock);
threads_count -= count;
WakeAllConditionVariable(&start);
LeaveCriticalSection(&lock);
WaitForMultipleObjects(count, threads + threads_count, TRUE, INFINITE);
for (i = 0; i < count; ++i) {
CloseHandle(threads[threads_count + i]);
}
return threads_count;
}
int fastsolver_search(
char *local_nonce,
unsigned long long *local_iterations_count,
const char *local_initial_hash,
unsigned long long local_target,
const char *local_seed,
unsigned long long timeout
) {
initial_hash = local_initial_hash;
target = local_target;
seed = local_seed;
found = 0;
total_iterations_count = 0;
EnterCriticalSection(&lock);
run = 1;
WakeAllConditionVariable(&start);
SleepConditionVariableCS(&done, &lock, timeout / 1000);
run = 0;
while (running_threads_count != 0) {
SleepConditionVariableCS(&done, &lock, INFINITE);
}
LeaveCriticalSection(&lock);
if (found) {
memcpy(local_nonce, best_nonce, 8);
}
*local_iterations_count = total_iterations_count;
return found;
}

View File

@ -0,0 +1,106 @@
import os
import multiprocessing
import struct
import dumbsolver
def setIdle():
try:
import psutil
psutil.Process().nice(psutil.IDLE_PRIORITY_CLASS)
return
except:
pass
try:
import win32api
import win32con
import win32process
PID = win32api.GetCurrentProcessId()
handle = win32api.OpenProcess(win32con.PROCESS_ALL_ACCESS, True, PID)
win32process.SetPriorityClass(handle, win32process.IDLE_PRIORITY_CLASS)
return
except:
pass
if hasattr(os, "nice"):
os.nice(40)
def threadFunction(local, remote, codePath, threadNumber):
remote.close()
setIdle()
solver = dumbsolver.DumbSolver(codePath)
while True:
received = local.recv()
command = received[0]
arguments = received[1: ]
if command == "search":
initialHash, target, seed, timeout = arguments
appendedSeed = seed + struct.pack(">Q", threadNumber)
nonce, iterationsCount = solver.search(initialHash, target, appendedSeed, timeout)
local.send(("done", nonce, iterationsCount))
elif command == "shutdown":
local.close()
return
class ForkingSolver(object):
def __init__(self, codePath):
self.pipes = []
self.processes = []
self.parallelism = 0
self.codePath = codePath
def search(self, initialHash, target, seed, timeout):
for i in self.pipes:
i.send(("search", initialHash, target, seed, timeout))
bestNonce, totalIterationsCount = None, 0
for i in self.pipes:
event, nonce, iterationsCount = i.recv()
if nonce is not None:
bestNonce = nonce
totalIterationsCount += iterationsCount
return bestNonce, totalIterationsCount
def setParallelism(self, parallelism):
parallelism = min(4096, parallelism)
for i in xrange(self.parallelism, parallelism):
local, remote = multiprocessing.Pipe()
process = multiprocessing.Process(target = threadFunction, args = (remote, local, self.codePath, i))
process.start()
remote.close()
self.pipes.append(local)
self.processes.append(process)
for i in xrange(parallelism, self.parallelism):
pipe = self.pipes.pop()
pipe.send(("shutdown", ))
pipe.close()
for i in xrange(parallelism, self.parallelism):
self.processes.pop().join()
self.parallelism = parallelism

136
src/workprover/gpusolver.cl Normal file
View File

@ -0,0 +1,136 @@
constant ulong k[80] = {
0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
};
constant ulong h[8] = {
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
};
#define ROTATE(x, n) ((x) >> (n) | (x) << 64 - (n))
#define C(x, y, z) ((x) & (y) ^ ~(x) & (z))
#define M(x, y, z) ((x) & (y) ^ (x) & (z) ^ (y) & (z))
#define S0(x) (ROTATE((x), 28) ^ ROTATE((x), 34) ^ ROTATE((x), 39))
#define S1(x) (ROTATE((x), 14) ^ ROTATE((x), 18) ^ ROTATE((x), 41))
#define s0(x) (ROTATE((x), 1) ^ ROTATE((x), 8) ^ (x) >> 7)
#define s1(x) (ROTATE((x), 19) ^ ROTATE((x), 61) ^ (x) >> 6)
void sha512_process_block(ulong *state, ulong *block) {
ulong a = state[0];
ulong b = state[1];
ulong c = state[2];
ulong d = state[3];
ulong e = state[4];
ulong f = state[5];
ulong g = state[6];
ulong h = state[7];
ulong *w = block;
#pragma unroll
for (size_t i = 0; i < 16; i++) {
ulong t = k[i] + w[i & 15] + h + S1(e) + C(e, f, g);
h = g;
g = f;
f = e;
e = d + t;
t += M(a, b, c) + S0(a);
d = c;
c = b;
b = a;
a = t;
}
#pragma unroll 16
for (size_t i = 16; i < 80; i++) {
w[i & 15] += s0(w[i + 1 & 15]) + s1(w[i + 14 & 15]) + w[i + 9 & 15];
ulong t = k[i] + w[i & 15] + h + S1(e) + C(e, f, g);
h = g;
g = f;
f = e;
e = d + t;
t += M(a, b, c) + S0(a);
d = c;
c = b;
b = a;
a = t;
}
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
}
ulong compute_trial(ulong nonce, global const ulong *initial_hash) {
ulong fisrt_block[16] = {
nonce,
initial_hash[0], initial_hash[1], initial_hash[2], initial_hash[3],
initial_hash[4], initial_hash[5], initial_hash[6], initial_hash[7],
0x8000000000000000, 0, 0, 0, 0, 0, 8 * (8 + 64)
};
ulong second_block[16] = {
h[0], h[1], h[2], h[3],
h[4], h[5], h[6], h[7],
0x8000000000000000, 0, 0, 0, 0, 0, 0, 8 * 64
};
ulong double_hash[8] = {
h[0], h[1], h[2], h[3],
h[4], h[5], h[6], h[7]
};
sha512_process_block(second_block, fisrt_block);
sha512_process_block(double_hash, second_block);
return double_hash[0];
}
kernel void search(global unsigned int *output, global ulong *input) {
size_t thread_number = get_global_id(0);
global unsigned int *solutions_count = output;
global unsigned int *solutions = output + 1;
global ulong *nonce = input;
global ulong *initial_hash = input + 1;
global ulong *target = input + 9;
ulong trial = compute_trial(*nonce + thread_number, initial_hash);
if (trial <= *target) {
unsigned int index = atom_inc(solutions_count);
solutions[index] = thread_number;
}
}

103
src/workprover/gpusolver.py Normal file
View File

@ -0,0 +1,103 @@
import hashlib
import struct
import os.path
import utils
pyopencl = None
numpy = None
class GPUSolverError(Exception):
pass
class GPUSolver(object):
def __init__(self, codePath, vendors = None):
global pyopencl, numpy
import pyopencl
import numpy
device = None
for i in pyopencl.get_platforms():
if vendors is not None and i.vendor not in vendors:
continue
devices = i.get_devices(device_type = pyopencl.device_type.GPU)
if len(devices) != 0:
device = devices[0]
break
else:
raise Exception()
context = pyopencl.Context(devices = [device])
computeUnitsCount = device.get_info(pyopencl.device_info.MAX_COMPUTE_UNITS)
workGroupSize = device.get_info(pyopencl.device_info.MAX_WORK_GROUP_SIZE)
self.parallelism = workGroupSize * computeUnitsCount
self.batchSize = self.parallelism * 256
self.queue = pyopencl.CommandQueue(context, device)
with open(os.path.join(codePath, "gpusolver.cl")) as file:
source = file.read()
program = pyopencl.Program(context, source).build()
self.hostOutput = numpy.zeros(1 + self.batchSize, numpy.uint32)
self.hostInput = numpy.zeros(1 + 8 + 1, numpy.uint64)
self.output = pyopencl.Buffer(context, pyopencl.mem_flags.READ_WRITE, 4 * (1 + self.batchSize))
self.input = pyopencl.Buffer(context, pyopencl.mem_flags.READ_ONLY, 8 * (1 + 8 + 1))
self.kernel = program.search
self.kernel.set_args(self.output, self.input)
def search(self, initialHash, target, seed, timeout):
startTime = utils.getTimePoint()
self.hostOutput[0] = 0
for i in xrange(8):
self.hostInput[1 + i], = struct.unpack(">Q", initialHash[8 * i: 8 * (i + 1)])
self.hostInput[9] = target
pyopencl.enqueue_copy(self.queue, self.output, self.hostOutput[: 1])
i = 0
while True:
randomness = hashlib.sha512(seed + struct.pack(">Q", i)).digest()
i += 1
self.hostInput[0], = struct.unpack(">Q", randomness[: 8])
pyopencl.enqueue_copy(self.queue, self.input, self.hostInput)
pyopencl.enqueue_nd_range_kernel(self.queue, self.kernel, (self.batchSize, ), None)
self.queue.finish()
pyopencl.enqueue_copy(self.queue, self.hostOutput[: 1], self.output)
solutionsCount = long(self.hostOutput[0])
if solutionsCount != 0:
pyopencl.enqueue_copy(self.queue, self.hostOutput[0: 1 + solutionsCount], self.output)
index, = struct.unpack(">Q", randomness[8: 16])
threadNumber = self.hostOutput[1 + index % solutionsCount]
nonce = struct.pack(">Q", long(self.hostInput[0]) + threadNumber)
if not utils.checkProof(nonce, initialHash, target):
raise GPUSolverError()
return nonce, self.batchSize * i
if utils.getTimePoint() - startTime >= timeout:
return None, self.batchSize * i
def setParallelism(self, parallelism):
pass

205
src/workprover/test.py Executable file
View File

@ -0,0 +1,205 @@
#!/usr/bin/env python2.7
import unittest
import os.path
import binascii
import struct
import ctypes
import ctypes.util
import sys
import __init__
import utils
import dumbsolver
import forkingsolver
import fastsolver
import gpusolver
codePath = os.path.dirname(__file__)
if hasattr(sys, "winver"):
dumbsolver.libcrypto = ctypes.WinDLL("libcrypto.dll")
else:
dumbsolver.libcrypto = ctypes.CDLL(ctypes.util.find_library("crypto"))
nonce = binascii.unhexlify("9ca6790a249679f8")
expiryTime = 1525845600
headlessPayload = binascii.unhexlify("000000000001")
initialPayload = struct.pack(">Q", expiryTime) + headlessPayload
payload = nonce + initialPayload
initialHash = binascii.unhexlify(
"1e87a288a10454dea0d3a9b606cc538db1b8b47fe8a21a37b8e57da3db6928eb"
"d854fd22aed3e1849c4a1c596fe0bfec266c05900a862c5b356a6b7e51a4b510"
)
doubleHash = binascii.unhexlify(
"16cdf04b739412bea1bf58d6c5a53ec92e7d4aab180213405bf10d615354d417"
"00f8b1510d0844a4b7c7b7434e6c115b52fcec5c591e96c31f4b8769ee683552"
)
TTL = 3600
byteDifficulty = 1000
lengthExtension = 1000
target = 0x00000f903320b7f6
seed = binascii.unhexlify("3941c24a1256660a8f65d962954c406dab7bc449317fa087c4a3f1a3ca7d95fd")
timeout = .5
class TestUtils(unittest.TestCase):
def testCalculateInitialHash(self):
self.assertEqual(utils.calculateInitialHash(initialPayload), initialHash)
def testCalculateDoubleHash(self):
self.assertEqual(utils.calculateDoubleHash(payload), doubleHash)
def testCalculateTarget(self):
self.assertEqual(utils.calculateTarget(1000, 1015, 1000, 1000), 0x00000843bf57fed2)
self.assertEqual(utils.calculateTarget(1000, 1016, 1000, 1000), 0x00000842b4a960c2)
def testCheckProof(self):
self.assertFalse(utils.checkProof(nonce, initialHash, 0x000002fe91eba355))
self.assertTrue(utils.checkProof(nonce, initialHash, 0x000002fe91eba356))
def testCheckWorkSufficient(self):
originalTime = utils.time.time
utils.time.time = lambda: expiryTime - 293757.5
self.assertFalse(utils.checkWorkSufficient(payload, byteDifficulty, lengthExtension))
utils.time.time = lambda: expiryTime - 293757
self.assertTrue(utils.checkWorkSufficient(payload, byteDifficulty, lengthExtension))
utils.time.time = originalTime
def testEstimateMaximumIterationsCount(self):
self.assertEqual(utils.estimateMaximumIterationsCount(0x000fffffffffffff, .1), 512)
self.assertEqual(utils.estimateMaximumIterationsCount(target, .8), 1735168)
class TestSolver(unittest.TestCase):
def setUp(self):
self.solver = self.Solver(codePath)
self.solver.setParallelism(1)
def testSearch(self):
nonce = None
i = 0
while nonce is None:
appendedSeed = seed + struct.pack(">Q", i)
i += 1
nonce, iterationsCount = self.solver.search(initialHash, target, appendedSeed, timeout)
self.assertTrue(utils.checkProof(nonce, initialHash, target))
def tearDown(self):
self.solver.setParallelism(0)
class TestDumbSolver(TestSolver):
Solver = dumbsolver.DumbSolver
class TestForkingSolver(TestSolver):
Solver = forkingsolver.ForkingSolver
class TestFastSolver(TestSolver):
Solver = fastsolver.FastSolver
class TestGPUSolver(TestSolver):
Solver = gpusolver.GPUSolver
class TestWorkProver(unittest.TestCase):
def setUp(self):
self.thread = __init__.WorkProver(codePath, None, seed, None)
self.thread.start()
def checkTaskLinks(self):
IDs = set(self.thread.tasks.keys())
if len(IDs) == 0:
return
self.assertIn(self.thread.currentTaskID, IDs)
linkID = next(iter(IDs))
for i in xrange(len(IDs)):
self.assertIn(linkID, IDs)
IDs.remove(linkID)
nextLinkID = self.thread.tasks[linkID].next
self.assertEqual(self.thread.tasks[nextLinkID].previous, linkID)
linkID = nextLinkID
def testTasks(self):
self.thread.addTask(0, headlessPayload, TTL, None, byteDifficulty, lengthExtension)
self.checkTaskLinks()
self.thread.addTask(1, headlessPayload, TTL, None, byteDifficulty, lengthExtension)
self.thread.addTask(2, headlessPayload, TTL, None, byteDifficulty, lengthExtension)
self.checkTaskLinks()
self.thread.cancelTask(self.thread.currentTaskID)
self.thread.nextTask()
self.thread.nextTask()
self.thread.nextTask()
self.thread.addTask(3, headlessPayload, TTL, None, byteDifficulty, lengthExtension)
self.checkTaskLinks()
def testSearch(self):
self.thread.commandsQueue.put((
"addTask", 0,
headlessPayload, TTL, None, byteDifficulty, lengthExtension
))
self.thread.commandsQueue.put((
"addTask", 1,
headlessPayload, TTL, None, byteDifficulty, lengthExtension
))
self.thread.commandsQueue.put((
"addTask", 2,
headlessPayload, TTL * 100, expiryTime, byteDifficulty, lengthExtension
))
self.thread.commandsQueue.put(("setSolver", "dumb", 1))
for i in xrange(3):
event, ID, nonce, localExpiryTime = self.thread.resultsQueue.get()
initialPayload = struct.pack(">Q", localExpiryTime) + headlessPayload
initialHash = utils.calculateInitialHash(initialPayload)
self.assertTrue(utils.checkProof(nonce, initialHash, target))
def tearDown(self):
self.thread.commandsQueue.put(("shutdown", ))
self.thread.join()
loader = unittest.TestLoader()
suite = unittest.TestSuite([
loader.loadTestsFromTestCase(TestUtils),
loader.loadTestsFromTestCase(TestDumbSolver),
loader.loadTestsFromTestCase(TestForkingSolver),
loader.loadTestsFromTestCase(TestFastSolver),
loader.loadTestsFromTestCase(TestGPUSolver),
loader.loadTestsFromTestCase(TestWorkProver)
])
if __name__ == "__main__":
import multiprocessing
multiprocessing.freeze_support()
runner = unittest.TextTestRunner()
runner.run(suite)

51
src/workprover/utils.py Normal file
View File

@ -0,0 +1,51 @@
import hashlib
import struct
import time
import math
import sys
import os
def calculateInitialHash(initialPayload):
return hashlib.sha512(initialPayload).digest()
def calculateDoubleHash(data):
return hashlib.sha512(hashlib.sha512(data).digest()).digest()
# Length including nonce
def calculateTarget(length, TTL, byteDifficulty, lengthExtension):
adjustedLength = length + lengthExtension
timeEquivalent = TTL * adjustedLength / 2 ** 16
difficulty = byteDifficulty * (adjustedLength + timeEquivalent)
return 2 ** 64 / difficulty
def checkProof(nonce, initialHash, target):
proof = nonce + initialHash
trial, = struct.unpack(">Q", calculateDoubleHash(proof)[: 8])
return trial <= target
def checkWorkSufficient(payload, byteDifficulty, lengthExtension):
expiryTime, = struct.unpack(">Q", payload[8: 16])
minimumTTL = max(300, expiryTime - int(time.time()))
nonce = payload[: 8]
initialHash = calculateInitialHash(payload[8: ])
target = calculateTarget(len(payload), minimumTTL, byteDifficulty, lengthExtension)
return checkProof(nonce, initialHash, target)
def estimateMaximumIterationsCount(target, probability):
coefficient = -math.log(1 - probability)
difficulty = 2. ** 64 / target
return int(coefficient * difficulty + 255) / 256 * 256
if hasattr(sys, "winver"):
getTimePoint = time.clock
else:
def getTimePoint():
return os.times()[4]