diff --git a/docs/conf.py b/docs/conf.py index 3464e056..ec1b0b74 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,7 +19,7 @@ import version # noqa:E402 # -- Project information ----------------------------------------------------- project = u'PyBitmessage' -copyright = u'2019, The Bitmessage Team' # pylint: disable=redefined-builtin +copyright = u'2019-2021, The Bitmessage Team' # pylint: disable=redefined-builtin author = u'The Bitmessage Team' # The short X.Y version diff --git a/docs/encrypted_payload.rst b/docs/encrypted_payload.rst new file mode 100644 index 00000000..346d370d --- /dev/null +++ b/docs/encrypted_payload.rst @@ -0,0 +1,19 @@ ++------------+-------------+-----------+--------------------------------------------+ +| Field Size | Description | Data type | Comments | ++============+=============+===========+============================================+ +| 16 | IV | uchar[] | Initialization Vector used for AES-256-CBC | ++------------+-------------+-----------+--------------------------------------------+ +| 2 | Curve type | uint16_t | Elliptic Curve type 0x02CA (714) | ++------------+-------------+-----------+--------------------------------------------+ +| 2 | X length | uint16_t | Length of X component of public key R | ++------------+-------------+-----------+--------------------------------------------+ +| X length | X | uchar[] | X component of public key R | ++------------+-------------+-----------+--------------------------------------------+ +| 2 | Y length | uint16_t | Length of Y component of public key R | ++------------+-------------+-----------+--------------------------------------------+ +| Y length | Y | uchar[] | Y component of public key R | ++------------+-------------+-----------+--------------------------------------------+ +| ? | encrypted | uchar[] | Cipher text | ++------------+-------------+-----------+--------------------------------------------+ +| 32 | MAC | uchar[] | HMACSHA256 Message Authentication Code | ++------------+-------------+-----------+--------------------------------------------+ diff --git a/docs/encryption.rst b/docs/encryption.rst new file mode 100644 index 00000000..925dd001 --- /dev/null +++ b/docs/encryption.rst @@ -0,0 +1,232 @@ +Encryption +========== + +Bitmessage uses the Elliptic Curve Integrated Encryption Scheme +`(ECIES) `_ +to encrypt the payload of the Message and Broadcast objects. + +The scheme uses Elliptic Curve Diffie-Hellman +`(ECDH) `_ to generate a shared secret used +to generate the encryption parameters for Advanced Encryption Standard with +256bit key and Cipher-Block Chaining +`(AES-256-CBC) `_. +The encrypted data will be padded to a 16 byte boundary in accordance to +`PKCS7 `_. This +means that the data is padded with N bytes of value N. + +The Key Derivation Function +`(KDF) `_ used to +generate the key material for AES is +`SHA512 `_. The Message Authentication +Code (MAC) scheme used is `HMACSHA256 `_. + +Format +------ + +(See also: :doc:`protocol`) + +.. include:: encrypted_payload.rst + +In order to reconstitute a usable (65 byte) public key (starting with 0x04), +the X and Y components need to be expanded by prepending them with 0x00 bytes +until the individual component lengths are 32 bytes. + +Encryption +---------- + + 1. The destination public key is called K. + 2. Generate 16 random bytes using a secure random number generator. + Call them IV. + 3. Generate a new random EC key pair with private key called r and public key + called R. + 4. Do an EC point multiply with public key K and private key r. This gives you + public key P. + 5. Use the X component of public key P and calculate the SHA512 hash H. + 6. The first 32 bytes of H are called key_e and the last 32 bytes are called + key_m. + 7. Pad the input text to a multiple of 16 bytes, in accordance to PKCS7. + 8. Encrypt the data with AES-256-CBC, using IV as initialization vector, + key_e as encryption key and the padded input text as payload. Call the + output cipher text. + 9. Calculate a 32 byte MAC with HMACSHA256, using key_m as salt and + IV + R + cipher text as data. Call the output MAC. + +The resulting data is: IV + R + cipher text + MAC + +Decryption +---------- + + 1. The private key used to decrypt is called k. + 2. Do an EC point multiply with private key k and public key R. This gives you + public key P. + 3. Use the X component of public key P and calculate the SHA512 hash H. + 4. The first 32 bytes of H are called key_e and the last 32 bytes are called + key_m. + 5. Calculate MAC' with HMACSHA256, using key_m as salt and + IV + R + cipher text as data. + 6. Compare MAC with MAC'. If not equal, decryption will fail. + 7. Decrypt the cipher text with AES-256-CBC, using IV as initialization + vector, key_e as decryption key and the cipher text as payload. The output + is the padded input text. + +.. highlight:: nasm + +Partial Example +--------------- + +.. list-table:: Public key K: + :header-rows: 1 + :widths: auto + + * - Data + - Comments + * - + + :: + + 04 09 d4 e5 c0 ab 3d 25 + fe 04 8c 64 c9 da 1a 24 + 2c 7f 19 41 7e 95 17 cd + 26 69 50 d7 2c 75 57 13 + 58 5c 61 78 e9 7f e0 92 + fc 89 7c 9a 1f 17 20 d5 + 77 0a e8 ea ad 2f a8 fc + bd 08 e9 32 4a 5d de 18 + 57 + - Public key, 0x04 prefix, then 32 bytes X and 32 bytes Y. + + +.. list-table:: Initialization Vector IV: + :header-rows: 1 + :widths: auto + + * - Data + - Comments + * - + + :: + + bd db 7c 28 29 b0 80 38 + 75 30 84 a2 f3 99 16 81 + - 16 bytes generated with a secure random number generator. + +.. list-table:: Randomly generated key pair with private key r and public key R: + :header-rows: 1 + :widths: auto + + * - Data + - Comments + * - + + :: + + 5b e6 fa cd 94 1b 76 e9 + d3 ea d0 30 29 fb db 6b + 6e 08 09 29 3f 7f b1 97 + d0 c5 1f 84 e9 6b 8b a4 + - Private key r + * - + + :: + + 04 02 93 21 3d cf 13 88 + b6 1c 2a e5 cf 80 fe e6 + ff ff c0 49 a2 f9 fe 73 + 65 fe 38 67 81 3c a8 12 + 92 df 94 68 6c 6a fb 56 + 5a c6 14 9b 15 3d 61 b3 + b2 87 ee 2c 7f 99 7c 14 + 23 87 96 c1 2b 43 a3 86 + 5a + - Public key R + +.. list-table:: Derived public key P (point multiply r with K): + :header-rows: 1 + :widths: auto + + * - Data + - Comments + * - + + :: + + 04 0d b8 e3 ad 8c 0c d7 + 3f a2 b3 46 71 b7 b2 47 + 72 9b 10 11 41 57 9d 19 + 9e 0d c0 bd 02 4e ae fd + 89 ca c8 f5 28 dc 90 b6 + 68 11 ab ac 51 7d 74 97 + be 52 92 93 12 29 be 0b + 74 3e 05 03 f4 43 c3 d2 + 96 + - Public key P + * - + + :: + + 0d b8 e3 ad 8c 0c d7 3f + a2 b3 46 71 b7 b2 47 72 + 9b 10 11 41 57 9d 19 9e + 0d c0 bd 02 4e ae fd 89 + - X component of public key P + +.. list-table:: SHA512 of public key P X component (H): + :header-rows: 1 + :widths: auto + + * - Data + - Comments + * - + + :: + + 17 05 43 82 82 67 86 71 + 05 26 3d 48 28 ef ff 82 + d9 d5 9c bf 08 74 3b 69 + 6b cc 5d 69 fa 18 97 b4 + - First 32 bytes of H called key_e + * - + + :: + + f8 3f 1e 9c c5 d6 b8 44 + 8d 39 dc 6a 9d 5f 5b 7f + 46 0e 4a 78 e9 28 6e e8 + d9 1c e1 66 0a 53 ea cd + - Last 32 bytes of H called key_m + +.. list-table:: Padded input: + :header-rows: 1 + :widths: auto + + * - Data + - Comments + * - + + :: + + 54 68 65 20 71 75 69 63 + 6b 20 62 72 6f 77 6e 20 + 66 6f 78 20 6a 75 6d 70 + 73 20 6f 76 65 72 20 74 + 68 65 20 6c 61 7a 79 20 + 64 6f 67 2e 04 04 04 04 + - The quick brown fox jumps over the lazy dog.0x04,0x04,0x04,0x04 + +.. list-table:: Cipher text: + :header-rows: 1 + :widths: auto + + * - Data + - Comments + * - + + :: + + 64 20 3d 5b 24 68 8e 25 + 47 bb a3 45 fa 13 9a 5a + 1d 96 22 20 d4 d4 8a 0c + f3 b1 57 2c 0d 95 b6 16 + 43 a6 f9 a0 d7 5a f7 ea + cc 1b d9 57 14 7b f7 23 + - 3 blocks of 16 bytes of encrypted data. diff --git a/docs/extended_encoding.rst b/docs/extended_encoding.rst new file mode 100644 index 00000000..25539ad4 --- /dev/null +++ b/docs/extended_encoding.rst @@ -0,0 +1,55 @@ +Extended encoding +================= + +Extended encoding is an attempt to create a standard for transmitting structured +data. The goals are flexibility, wide platform support and extensibility. It is +currently available in the v0.6 branch and can be enabled by holding "Shift" +while clicking on Send. It is planned that v5 addresses will have to support +this. It's a work in progress, the basic plain text message works but don't +expect anthing else at this time. + +The data structure is in msgpack, then compressed with zlib. The top level is +a key/value store, and the "" key (empty string) contains the value of the type +of object, which can then have its individual format and standards. + +Text fields are encoded using UTF-8. + +Types +----- + +You can find the implementations in the ``src/messagetypes`` directory of +PyBitmessage. Each type has its own file which includes one class, and they are +dynamically loaded on startup. It's planned that this will also contain +initialisation, rendering and so on, so that developers can simply add a new +object type by adding a single file in the messagetypes directory and not have +to change any other part of the code. + +message +^^^^^^^ + +The replacement for the old messages. Mandatory keys are ``body`` and +``subject``, others are currently not implemented and not mandatory. Proposed +other keys: + +``parents``: + array of msgids referring to messages that logically precede it in a + conversation. Allows to create a threaded conversation view + +``files``: + array of files (which is a key/value pair): + + ``name``: + file name, mandatory + ``data``: + the binary data of the file + ``type``: + MIME content type + ``disposition``: + MIME content disposition, possible values are "inline" and "attachment" + +vote +^^^^ + +Dummy code available in the repository. Supposed to serve voting in a chan +(thumbs up/down) for decentralised moderation. Does not actually do anything at +the moment and specification can change. diff --git a/docs/index.rst b/docs/index.rst index 5e8a1c1a..4e647278 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,17 @@ .. mdinclude:: ../README.md + :end-line: 20 -Documentation -------------- +Protocol documentation +---------------------- +.. toctree:: + :maxdepth: 2 + + protocol + encryption + pow + +Code documentation +------------------ .. toctree:: :maxdepth: 3 @@ -14,3 +24,6 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` + +.. mdinclude:: ../README.md + :start-line: 21 diff --git a/docs/pow.rst b/docs/pow.rst new file mode 100644 index 00000000..3786b075 --- /dev/null +++ b/docs/pow.rst @@ -0,0 +1,77 @@ +Proof of work +============= + +This page describes Bitmessage's Proof of work ("POW") mechanism as it exists in +Protocol Version 3. In this document, hash() means SHA512(). SHA512 was chosen +as it is widely supported and so that Bitcoin POW hardware cannot trivially be +used for Bitmessage POWs. The author acknowledges that they are essentially the +same algorithm with a different key size. + +Both ``averageProofOfWorkNonceTrialsPerByte`` and ``payloadLengthExtraBytes`` +are set by the owner of a Bitmessage address. The default and minimum for each +is 1000. (This is the same as difficulty 1. If the difficulty is 2, then this +value is 2000). The purpose of ``payloadLengthExtraBytes`` is to add some extra +weight to small messages. + +Do a POW +-------- + +Let us use a ``msg`` message as an example:: + + payload = embeddedTime + encodedObjectVersion + encodedStreamNumber + encrypted + +``payloadLength`` + the length of payload, in bytes, + 8 + (to account for the nonce which we will append later) +``TTL`` + the number of seconds in between now and the object expiresTime. + +.. include:: pow_formula.rst + +:: + + initialHash = hash(payload) + +start with ``trialValue = 99999999999999999999`` + +also start with ``nonce = 0`` where nonce is 8 bytes in length and can be +hashed as if it is a string. + +:: + + while trialValue > target: + nonce = nonce + 1 + resultHash = hash(hash( nonce || initialHash )) + trialValue = the first 8 bytes of resultHash, converted to an integer + +When this loop finishes, you will have your 8 byte nonce value which you can +prepend onto the front of the payload. The message is then ready to send. + +Check a POW +----------- + +Let us assume that ``payload`` contains the payload for a msg message (the nonce +down through the encrypted message data). + +``nonce`` + the first 8 bytes of payload +``dataToCheck`` + the ninth byte of payload on down (thus it is everything except the nonce) + +:: + + initialHash = hash(dataToCheck) + + resultHash = hash(hash( nonce || initialHash )) + +``POWValue`` + the first eight bytes of resultHash converted to an integer +``TTL`` + the number of seconds in between now and the object ``expiresTime``. + +.. include:: pow_formula.rst + +If ``POWValue`` is less than or equal to ``target``, then the POW check passes. + + + diff --git a/docs/pow_formula.rst b/docs/pow_formula.rst new file mode 100644 index 00000000..16c3f174 --- /dev/null +++ b/docs/pow_formula.rst @@ -0,0 +1,7 @@ + +.. math:: + + target = \frac{2^{64}}{{\displaystyle + nonceTrialsPerByte (payloadLength + payloadLengthExtraBytes + \frac{ + TTL (payloadLength + payloadLengthExtraBytes)}{2^{16}}) + }} diff --git a/docs/protocol.rst b/docs/protocol.rst new file mode 100644 index 00000000..06e10766 --- /dev/null +++ b/docs/protocol.rst @@ -0,0 +1,940 @@ +Protocol specification +====================== + +.. warning:: All objects sent on the network should support protocol v3 + starting on Sun, 16 Nov 2014 22:00:00 GMT. + +.. toctree:: + :maxdepth: 2 + +Common standards +---------------- + +Hashes +^^^^^^ + +Most of the time `SHA-512 `_ hashes are +used, however `RIPEMD-160 `_ is also used +when creating an address. + +A double-round of SHA-512 is used for the Proof Of Work. Example of +double-SHA-512 encoding of string "hello": + +.. highlight:: nasm + +:: + + hello + 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043(first round of sha-512) + 0592a10584ffabf96539f3d780d776828c67da1ab5b169e9e8aed838aaecc9ed36d49ff1423c55f019e050c66c6324f53588be88894fef4dcffdb74b98e2b200(second round of sha-512) + +For Bitmessage addresses (RIPEMD-160) this would give: + +:: + + hello + 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043(first round is sha-512) + 79a324faeebcbf9849f310545ed531556882487e (with ripemd-160) + + +Common structures +----------------- + +All integers are encoded in big endian. (This is different from Bitcoin). + +.. list-table:: Message structure + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 4 + - magic + - uint32_t + - Magic value indicating message origin network, and used to seek to next + message when stream state is unknown + * - 12 + - command + - char[12] + - ASCII string identifying the packet content, NULL padded (non-NULL + padding results in packet rejected) + * - 4 + - length + - uint32_t + - Length of payload in number of bytes. Because of other restrictions, + there is no reason why this length would ever be larger than 1600003 + bytes. Some clients include a sanity-check to avoid processing messages + which are larger than this. + * - 4 + - checksum + - uint32_t + - First 4 bytes of sha512(payload) + * - ? + - message_payload + - uchar[] + - The actual data, a :ref:`message ` or an object_. + Not to be confused with objectPayload. + +Known magic values: + ++-------------+-------------------+ +| Magic value | Sent over wire as | ++=============+===================+ +| 0xE9BEB4D9 | E9 BE B4 D9 | ++-------------+-------------------+ + +.. _varint: + +Variable length integer +^^^^^^^^^^^^^^^^^^^^^^^ + +Integer can be encoded depending on the represented value to save space. +Variable length integers always precede an array/vector of a type of data that +may vary in length. Varints MUST use the minimum possible number of bytes to +encode a value. For example, the value 6 can be encoded with one byte therefore +a varint that uses three bytes to encode the value 6 is malformed and the +decoding task must be aborted. + ++---------------+----------------+------------------------------------------+ +| Value | Storage length | Format | ++===============+================+==========================================+ +| < 0xfd | 1 | uint8_t | ++---------------+----------------+------------------------------------------+ +| <= 0xffff | 3 | 0xfd followed by the integer as uint16_t | ++---------------+----------------+------------------------------------------+ +| <= 0xffffffff | 5 | 0xfe followed by the integer as uint32_t | ++---------------+----------------+------------------------------------------+ +| - | 9 | 0xff followed by the integer as uint64_t | ++---------------+----------------+------------------------------------------+ + +Variable length string +^^^^^^^^^^^^^^^^^^^^^^ + +Variable length string can be stored using a variable length integer followed by +the string itself. + ++------------+-------------+------------+----------------------------------+ +| Field Size | Description | Data type | Comments | ++============+=============+============+==================================+ +| 1+ | length | |var_int| | Length of the string | ++------------+-------------+------------+----------------------------------+ +| ? | string | char[] | The string itself (can be empty) | ++------------+-------------+------------+----------------------------------+ + +Variable length list of integers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +n integers can be stored using n+1 :ref:`variable length integers ` +where the first var_int equals n. + ++------------+-------------+-----------+----------------------------+ +| Field Size | Description | Data type | Comments | ++============+=============+===========+============================+ +| 1+ | count | |var_int| | Number of var_ints below | ++------------+-------------+-----------+----------------------------+ +| 1+ | | var_int | The first value stored | ++------------+-------------+-----------+----------------------------+ +| 1+ | | var_int | The second value stored... | ++------------+-------------+-----------+----------------------------+ +| 1+ | | var_int | etc... | ++------------+-------------+-----------+----------------------------+ + +.. |var_int| replace:: :ref:`var_int ` + +Network address +^^^^^^^^^^^^^^^ + +When a network address is needed somewhere, this structure is used. Network +addresses are not prefixed with a timestamp or stream in the version_ message. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 8 + - time + - uint64 + - the Time. + * - 4 + - stream + - uint32 + - Stream number for this node + * - 8 + - services + - uint64_t + - same service(s) listed in version_ + * - 16 + - IPv6/4 + - char[16] + - IPv6 address. IPv4 addresses are written into the message as a 16 byte + `IPv4-mapped IPv6 address `_ + (12 bytes 00 00 00 00 00 00 00 00 00 00 FF FF, followed by the 4 bytes of + the IPv4 address). + * - 2 + - port + - uint16_t + - port number + +Inventory Vectors +^^^^^^^^^^^^^^^^^ + +Inventory vectors are used for notifying other nodes about objects they have or +data which is being requested. Two rounds of SHA-512 are used, resulting in a +64 byte hash. Only the first 32 bytes are used; the later 32 bytes are ignored. + +Inventory vectors consist of the following data format: + ++------------+-------------+-----------+--------------------+ +| Field Size | Description | Data type | Comments | ++============+=============+===========+====================+ +| 32 | hash | char[32] | Hash of the object | ++------------+-------------+-----------+--------------------+ + +Encrypted payload +^^^^^^^^^^^^^^^^^ + +Bitmessage uses `ECIES `_ to encrypt its messages. For more information see :doc:`encryption` + +.. include:: encrypted_payload.rst + +Unencrypted Message Data +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 1+ + - msg_version + - var_int + - Message format version. **This field is not included after the + protocol v3 upgrade period**. + * - 1+ + - address_version + - var_int + - Sender's address version number. This is needed in order to calculate + the sender's address to show in the UI, and also to allow for forwards + compatible changes to the public-key data included below. + * - 1+ + - stream + - var_int + - Sender's stream number + * - 4 + - behavior bitfield + - uint32_t + - A bitfield of optional behaviors and features that can be expected from + the node with this pubkey included in this msg message (the sender's + pubkey). + * - 64 + - public signing key + - uchar[] + - The ECC public key used for signing (uncompressed format; + normally prepended with \x04) + * - 64 + - public encryption key + - uchar[] + - The ECC public key used for encryption (uncompressed format; + normally prepended with \x04) + * - 1+ + - nonce_trials_per_byte + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is the + average number of nonce trials a node will have to perform to meet the + Proof of Work requirement. 1000 is the network minimum so any lower + values will be automatically raised to 1000. **This field is new and is + only included when the address_version >= 3**. + * - 1+ + - extra_bytes + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is added + to the data length to make sending small messages more difficult. + 1000 is the network minimum so any lower values will be automatically + raised to 1000. **This field is new and is only included when the + address_version >= 3**. + * - 20 + - destination ripe + - uchar[] + - The ripe hash of the public key of the receiver of the message + * - 1+ + - encoding + - var_int + - :ref:`Message Encoding ` type + * - 1+ + - message_length + - var_int + - Message Length + * - message_length + - message + - uchar[] + - The message. + * - 1+ + - ack_length + - var_int + - Length of the acknowledgement data + * - ack_length + - ack_data + - uchar[] + - The acknowledgement data to be transmitted. This takes the form of a + Bitmessage protocol message, like another msg message. The POW therein + must already be completed. + * - 1+ + - sig_length + - var_int + - Length of the signature + * - sig_length + - signature + - uchar[] + - The ECDSA signature which covers the object header starting with the + time, appended with the data described in this table down to the + ack_data. + +.. _msg-encodings: + +Message Encodings +""""""""""""""""" + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Value + - Name + - Description + * - 0 + - IGNORE + - Any data with this number may be ignored. The sending node might simply + be sharing its public key with you. + * - 1 + - TRIVIAL + - UTF-8. No 'Subject' or 'Body' sections. Useful for simple strings + of data, like URIs or magnet links. + * - 2 + - SIMPLE + - UTF-8. Uses 'Subject' and 'Body' sections. No MIME is used. + :: + messageToTransmit = 'Subject:' + subject + '\n' + 'Body:' + message + * - 3 + - EXTENDED + - See :doc:`extended_encoding` + +Further values for the message encodings can be decided upon by the community. +Any MIME or MIME-like encoding format, should they be used, should make use of +Bitmessage's 8-bit bytes. + +.. _behavior-bitfield: + +Pubkey bitfield features +"""""""""""""""""""""""" + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Bit + - Name + - Description + * - 0 + - undefined + - The most significant bit at the beginning of the structure. Undefined + * - 1 + - undefined + - The next most significant bit. Undefined + * - ... + - ... + - ... + * - 27 + - onion_router + - (**Proposal**) Node can be used to onion-route messages. In theory any + node can onion route, but since it requires more resources, they may have + the functionality disabled. This field will be used to indicate that the + node is willing to do this. + * - 28 + - forward_secrecy + - (**Proposal**) Receiving node supports a forward secrecy encryption + extension. The exact design is pending. + * - 29 + - chat + - (**Proposal**) Address if for chatting rather than messaging. + * - 30 + - include_destination + - (**Proposal**) Receiving node expects that the RIPE hash encoded in their + address preceedes the encrypted message data of msg messages bound for + them. + + .. note:: since hardly anyone implements this, this will be redesigned as + `simple recipient verification `_ + * - 31 + - does_ack + - If true, the receiving node does send acknowledgements (rather than + dropping them). + +.. _msg-types: + +Message types +------------- + +Undefined messages received on the wire must be ignored. + +version +^^^^^^^ + +When a node creates an outgoing connection, it will immediately advertise its +version. The remote node will respond with its version. No futher communication +is possible until both peers have exchanged their version. + +.. list-table:: Payload + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 4 + - version + - int32_t + - Identifies protocol version being used by the node. Should equal 3. + Nodes should disconnect if the remote node's version is lower but + continue with the connection if it is higher. + * - 8 + - services + - uint64_t + - bitfield of features to be enabled for this connection + * - 8 + - timestamp + - int64_t + - standard UNIX timestamp in seconds + * - 26 + - addr_recv + - net_addr + - The network address of the node receiving this message (not including the + time or stream number) + * - 26 + - addr_from + - net_addr + - The network address of the node emitting this message (not including the + time or stream number and the ip itself is ignored by the receiver) + * - 8 + - nonce + - uint64_t + - Random nonce used to detect connections to self. + * - 1+ + - user_agent + - var_str + - :doc:`useragent` (0x00 if string is 0 bytes long). Sending nodes must not + include a user_agent longer than 5000 bytes. + * - 1+ + - stream_numbers + - var_int_list + - The stream numbers that the emitting node is interested in. Sending nodes + must not include more than 160000 stream numbers. + +A "verack" packet shall be sent if the version packet was accepted. Once you +have sent and received a verack messages with the remote node, send an addr +message advertising up to 1000 peers of which you are aware, and one or more +inv messages advertising all of the valid objects of which you are aware. + +.. list-table:: The following services are currently assigned + :header-rows: 1 + :widths: auto + + * - Value + - Name + - Description + * - 1 + - NODE_NETWORK + - This is a normal network node. + * - 2 + - NODE_SSL + - This node supports SSL/TLS in the current connect (python < 2.7.9 only + supports a SSL client, so in that case it would only have this on when + the connection is a client). + * - 3 + - NODE_POW + - (**Proposal**) This node may do PoW on behalf of some its peers (PoW + offloading/delegating), but it doesn't have to. Clients may have to meet + additional requirements (e.g. TLS authentication) + * - 4 + - NODE_DANDELION + - Node supports `dandelion `_ + +verack +^^^^^^ + +The *verack* message is sent in reply to *version*. This message consists of +only a :ref:`message header ` with the command string +"verack". The TCP timeout starts out at 20 seconds; after verack messages are +exchanged, the timeout is raised to 10 minutes. + +If both sides announce that they support SSL, they MUST perform a SSL handshake +immediately after they both send and receive verack. During this SSL handshake, +the TCP client acts as a SSL client, and the TCP server acts as a SSL server. +The current implementation (v0.5.4 or later) requires the AECDH-AES256-SHA +cipher over TLSv1 protocol, and prefers the secp256k1 curve (but other curves +may be accepted, depending on the version of python and OpenSSL used). + +addr +^^^^ + +Provide information on known nodes of the network. Non-advertised nodes should +be forgotten after typically 3 hours + +Payload: + ++------------+-------------+-----------+---------------------------------------+ +| Field Size | Description | Data type | Comments | ++============+=============+===========+=======================================+ +| 1+ | count | |var_int| | Number of address entries (max: 1000) | ++------------+-------------+-----------+---------------------------------------+ +| 38 | addr_list | net_addr | Address of other nodes on the network.| ++------------+-------------+-----------+---------------------------------------+ + +inv +^^^ + +Allows a node to advertise its knowledge of one or more objects. Payload +(maximum payload length: 50000 items): + ++------------+-------------+------------+-----------------------------+ +| Field Size | Description | Data type | Comments | ++============+=============+============+=============================+ +| ? | count | |var_int| | Number of inventory entries | ++------------+-------------+------------+-----------------------------+ +| 32x? | inventory | inv_vect[] | Inventory vectors | ++------------+-------------+------------+-----------------------------+ + + +getdata +^^^^^^^ + +getdata is used in response to an inv message to retrieve the content of a +specific object after filtering known elements. + +Payload (maximum payload length: 50000 entries): + ++------------+-------------+------------+-----------------------------+ +| Field Size | Description | Data type | Comments | ++============+=============+============+=============================+ +| ? | count | |var_int| | Number of inventory entries | ++------------+-------------+------------+-----------------------------+ +| 32x? | inventory | inv_vect[] | Inventory vectors | ++------------+-------------+------------+-----------------------------+ + + +object +^^^^^^ + +An object is a message which is shared throughout a stream. It is the only +message which propagates; all others are only between two nodes. Objects have a +type, like 'msg', or 'broadcast'. To be a valid object, the +:doc:`pow` must be done. The maximum allowable length of an object +(not to be confused with the ``objectPayload``) is |2^18| bytes. + +.. |2^18| replace:: 2\ :sup:`18`\ + +.. list-table:: Message structure + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 8 + - nonce + - uint64_t + - Random nonce used for the :doc:`pow` + * - 8 + - expiresTime + - uint64_t + - The "end of life" time of this object (be aware, in version 2 of the + protocol this was the generation time). Objects shall be shared with + peers until its end-of-life time has been reached. The node should store + the inventory vector of that object for some extra period of time to + avoid reloading it from another node with a small time delay. The time + may be no further than 28 days + 3 hours in the future. + * - 4 + - objectType + - uint32_t + - Four values are currently defined: 0-"getpubkey", 1-"pubkey", 2-"msg", + 3-"broadcast". All other values are reserved. Nodes should relay objects + even if they use an undefined object type. + * - 1+ + - version + - var_int + - The object's version. Note that msg objects won't contain a version + until Sun, 16 Nov 2014 22:00:00 GMT. + * - 1+ + - stream number + - var_int + - The stream number in which this object may propagate + * - ? + - objectPayload + - uchar[] + - This field varies depending on the object type; see below. + + +Object types +------------ + +Here are the payloads for various object types. + +getpubkey +^^^^^^^^^ + +When a node has the hash of a public key (from an address) but not the public +key itself, it must send out a request for the public key. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 20 + - ripe + - uchar[] + - The ripemd hash of the public key. This field is only included when the + address version is <= 3. + * - 32 + - tag + - uchar[] + - The tag derived from the address version, stream number, and ripe. This + field is only included when the address version is >= 4. + +pubkey +^^^^^^ + +A version 2 pubkey. This is still in use and supported by current clients but +*new* v2 addresses are not generated by clients. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 4 + - |behavior_bitfield| + - uint32_t + - A bitfield of optional behaviors and features that can be expected from + the node receiving the message. + * - 64 + - public signing key + - uchar[] + - The ECC public key used for signing (uncompressed format; + normally prepended with \x04 ) + * - 64 + - public encryption key + - uchar[] + - The ECC public key used for encryption (uncompressed format; + normally prepended with \x04 ) + +.. list-table:: A version 3 pubkey + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 4 + - |behavior_bitfield| + - uint32_t + - A bitfield of optional behaviors and features that can be expected from + the node receiving the message. + * - 64 + - public signing key + - uchar[] + - The ECC public key used for signing (uncompressed format; + normally prepended with \x04 ) + * - 64 + - public encryption key + - uchar[] + - The ECC public key used for encryption (uncompressed format; + normally prepended with \x04 ) + * - 1+ + - nonce_trials_per_byte + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is the + average number of nonce trials a node will have to perform to meet the + Proof of Work requirement. 1000 is the network minimum so any lower + values will be automatically raised to 1000. + * - 1+ + - extra_bytes + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is added + to the data length to make sending small messages more difficult. + 1000 is the network minimum so any lower values will be automatically + raised to 1000. + * - 1+ + - sig_length + - var_int + - Length of the signature + * - sig_length + - signature + - uchar[] + - The ECDSA signature which, as of protocol v3, covers the object + header starting with the time, appended with the data described in this + table down to the extra_bytes. + +.. list-table:: A version 4 pubkey + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 32 + - tag + - uchar[] + - The tag, made up of bytes 32-64 of the double hash of the address data + (see example python code below) + * - ? + - encrypted + - uchar[] + - Encrypted pubkey data. + +When version 4 pubkeys are created, most of the data in the pubkey is encrypted. +This is done in such a way that only someone who has the Bitmessage address +which corresponds to a pubkey can decrypt and use that pubkey. This prevents +people from gathering pubkeys sent around the network and using the data from +them to create messages to be used in spam or in flooding attacks. + +In order to encrypt the pubkey data, a double SHA-512 hash is calculated from +the address version number, stream number, and ripe hash of the Bitmessage +address that the pubkey corresponds to. The first 32 bytes of this hash are used +to create a public and private key pair with which to encrypt and decrypt the +pubkey data, using the same algorithm as message encryption +(see :doc:`encryption`). The remaining 32 bytes of this hash are added to the +unencrypted part of the pubkey and used as a tag, as above. This allows nodes to +determine which pubkey to decrypt when they wish to send a message. + +In PyBitmessage, the double hash of the address data is calculated using the +python code below: + +.. code-block:: python + + doubleHashOfAddressData = hashlib.sha512(hashlib.sha512( + encodeVarint(addressVersionNumber) + encodeVarint(streamNumber) + hash + ).digest()).digest() + + +.. list-table:: Encrypted data in version 4 pubkeys: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 4 + - |behavior_bitfield| + - uint32_t + - A bitfield of optional behaviors and features that can be expected from + the node receiving the message. + * - 64 + - public signing key + - uchar[] + - The ECC public key used for signing (uncompressed format; + normally prepended with \x04 ) + * - 64 + - public encryption key + - uchar[] + - The ECC public key used for encryption (uncompressed format; + normally prepended with \x04 ) + * - 1+ + - nonce_trials_per_byte + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is the + average number of nonce trials a node will have to perform to meet the + Proof of Work requirement. 1000 is the network minimum so any lower + values will be automatically raised to 1000. + * - 1+ + - extra_bytes + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is added + to the data length to make sending small messages more difficult. + 1000 is the network minimum so any lower values will be automatically + raised to 1000. + * - 1+ + - sig_length + - var_int + - Length of the signature + * - sig_length + - signature + - uchar[] + - The ECDSA signature which covers everything from the object header + starting with the time, then appended with the decrypted data down to + the extra_bytes. This was changed in protocol v3. + +msg +^^^ + +Used for person-to-person messages. Note that msg objects won't contain a +version in the object header until Sun, 16 Nov 2014 22:00:00 GMT. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - ? + - encrypted + - uchar[] + - Encrypted data. See `Encrypted payload`_. + See also `Unencrypted Message Data`_ + +broadcast +^^^^^^^^^ + +Users who are subscribed to the sending address will see the message appear in +their inbox. Broadcasts are version 4 or 5. + +Pubkey objects and v5 broadcast objects are encrypted the same way: The data +encoded in the sender's Bitmessage address is hashed twice. The first 32 bytes +of the resulting hash constitutes the "private" encryption key and the last +32 bytes constitute a **tag** so that anyone listening can easily decide if +this particular message is interesting. The sender calculates the public key +from the private key and then encrypts the object with this public key. Thus +anyone who knows the Bitmessage address of the sender of a broadcast or pubkey +object can decrypt it. + +The version of broadcast objects was previously 2 or 3 but was changed to 4 or +5 for protocol v3. Having a broadcast version of 5 indicates that a tag is used +which, in turn, is used when the sender's address version is >=4. + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 32 + - tag + - uchar[] + - The tag. This field is new and only included when the broadcast version + is >= 5. Changed in protocol v3 + * - ? + - encrypted + - uchar[] + - Encrypted broadcast data. The keys are derived as described in the + paragraph above. See Encrypted payload for details about the encryption + algorithm itself. + +Unencrypted data format: + +.. list-table:: + :header-rows: 1 + :widths: auto + + * - Field Size + - Description + - Data type + - Comments + * - 1+ + - broadcast version + - var_int + - The version number of this broadcast protocol message which is equal + to 2 or 3. This is included here so that it can be signed. This is + no longer included in protocol v3 + * - 1+ + - address version + - var_int + - The sender's address version + * - 1+ + - stream number + - var_int + - The sender's stream number + * - 4 + - |behavior_bitfield| + - uint32_t + - A bitfield of optional behaviors and features that can be expected from + the owner of this pubkey. + * - 64 + - public signing key + - uchar[] + - The ECC public key used for signing (uncompressed format; + normally prepended with \x04) + * - 64 + - public encryption key + - uchar[] + - The ECC public key used for encryption (uncompressed format; + normally prepended with \x04) + * - 1+ + - nonce_trials_per_byte + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is the + average number of nonce trials a node will have to perform to meet the + Proof of Work requirement. 1000 is the network minimum so any lower + values will be automatically raised to 1000. This field is new and is + only included when the address_version >= 3. + * - 1+ + - extra_bytes + - var_int + - Used to calculate the difficulty target of messages accepted by this + node. The higher this value, the more difficult the Proof of Work must + be before this individual will accept the message. This number is added + to the data length to make sending small messages more difficult. + 1000 is the network minimum so any lower values will be automatically + raised to 1000. This field is new and is only included when the + address_version >= 3. + * - 1+ + - encoding + - var_int + - The encoding type of the message + * - 1+ + - messageLength + - var_int + - The message length in bytes + * - messageLength + - message + - uchar[] + - The message + * - 1+ + - sig_length + - var_int + - Length of the signature + * - sig_length + - signature + - uchar[] + - The signature which did cover the unencrypted data from the broadcast + version down through the message. In protocol v3, it covers the + unencrypted object header starting with the time, all appended with + the decrypted data. + +.. |behavior_bitfield| replace:: :ref:`behavior bitfield ` diff --git a/docs/useragent.rst b/docs/useragent.rst new file mode 100644 index 00000000..3523a274 --- /dev/null +++ b/docs/useragent.rst @@ -0,0 +1,53 @@ +User Agent +========== + +Bitmessage user agents are a modified browser user agent with more structure +to aid parsers and provide some coherence. The user agent strings are arranged +in a stack with the most underlying software listed first. + +Basic format:: + + /Name:Version/Name:Version/.../ + +Example:: + + /PyBitmessage:0.2.2/Corporate Mail System:0.8/ + /Surdo:5.64/surdo-qt:0.4/ + +The version numbers are not defined to any strict format, although this guide +recommends: + + * Version numbers in the form of Major.Minor.Revision (2.6.41) + * Repository builds using a date in the format of YYYYMMDD (20110128) + +For git repository builds, implementations are free to use the git commitish. +However the issue lies in that it is not immediately obvious without the +repository which version preceeds another. For this reason, we lightly +recommend dates in the format specified above, although this is by no means +a requirement. + +Optional ``-r1``, ``-r2``, ... can be appended to user agent version numbers. +This is another light recommendation, but not a requirement. Implementations +are free to specify version numbers in whatever format needed insofar as it +does not include ``(``, ``)``, ``:`` or ``/`` to interfere with the user agent +syntax. + +An optional comments field after the version number is also allowed. Comments +should be delimited by parenthesis ``(...)``. The contents of comments is +entirely implementation defined although this document recommends the use of +semi-colons ``;`` as a delimiter between pieces of information. + +Example:: + + /cBitmessage:0.2(iPad; U; CPU OS 3_2_1)/AndroidBuild:0.8/ + +Reserved symbols are therefore: ``/ : ( )`` + +They should not be misused beyond what is specified in this section. + +``/`` + separates the code-stack +``:`` + specifies the implementation version of the particular stack +``( and )`` + delimits a comment which optionally separates data using ``;``