diff --git a/bigchaindb/common/transaction.py b/bigchaindb/common/transaction.py index 13af253c..6ba7ab80 100644 --- a/bigchaindb/common/transaction.py +++ b/bigchaindb/common/transaction.py @@ -26,6 +26,8 @@ from bigchaindb.common.utils import serialize UnspentOutput = namedtuple( 'UnspentOutput', ( + # TODO 'utxo_hash': sha3_256(f'{txid}{output_index}'.encode()) + # 'utxo_hash', # noqa 'transaction_id', 'output_index', 'amount', diff --git a/bigchaindb/tendermint/core.py b/bigchaindb/tendermint/core.py index 704dd592..80d40859 100644 --- a/bigchaindb/tendermint/core.py +++ b/bigchaindb/tendermint/core.py @@ -20,9 +20,7 @@ class App(BaseApplication): State Machine.""" def __init__(self, bigchaindb=None): - if not bigchaindb: - bigchaindb = BigchainDB() - self.bigchaindb = bigchaindb + self.bigchaindb = bigchaindb or BigchainDB() self.block_txn_ids = [] self.block_txn_hash = '' self.block_transactions = [] diff --git a/bigchaindb/tendermint/lib.py b/bigchaindb/tendermint/lib.py index 8ab806ac..0175ebbf 100644 --- a/bigchaindb/tendermint/lib.py +++ b/bigchaindb/tendermint/lib.py @@ -8,13 +8,19 @@ from copy import deepcopy from os import getenv from uuid import uuid4 +try: + from hashlib import sha3_256 +except ImportError: + # NOTE: neeeded for Python < 3.6 + from sha3 import sha3_256 + import requests from bigchaindb import backend from bigchaindb import Bigchain from bigchaindb.models import Transaction from bigchaindb.common.exceptions import SchemaValidationError, ValidationError -from bigchaindb.tendermint.utils import encode_transaction +from bigchaindb.tendermint.utils import encode_transaction, merkleroot from bigchaindb.tendermint import fastquery from bigchaindb import exceptions as core_exceptions @@ -130,6 +136,40 @@ class BigchainDB(Bigchain): return backend.query.store_unspent_outputs( self.connection, *unspent_outputs) + def get_utxoset_merkle_root(self): + """Returns the merkle root of the utxoset. This implies that + the utxoset is first put into a merkle tree. + + For now, the merkle tree and its root will be computed each + time. This obviously is not efficient and a better approach + that limits the repetition of the same computation when + unnecesary should be sought. For instance, future optimizations + could simply re-compute the branches of the tree that were + affected by a change. + + The transaction hash (id) and output index should be sufficient + to uniquely identify a utxo, and consequently only that + information from a utxo record is needed to compute the merkle + root. Hence, each node of the merkle tree should contain the + tuple (txid, output_index). + + .. important:: The leaves of the tree will need to be sorted in + some kind of lexicographical order. + + Returns: + str: Merkle root in hexadecimal form. + """ + utxoset = backend.query.get_unspent_outputs(self.connection) + # TODO Once ready, use the already pre-computed utxo_hash field. + # See common/transactions.py for details. + hashes = [ + sha3_256( + f'''{utxo['transaction_id']}{utxo['output_index']}'''.encode() + ).digest() for utxo in utxoset + ] + # TODO Notice the sorted call! + return merkleroot(sorted(hashes)) + def get_unspent_outputs(self): """Get the utxoset. diff --git a/bigchaindb/tendermint/utils.py b/bigchaindb/tendermint/utils.py index b84954a5..eba62908 100644 --- a/bigchaindb/tendermint/utils.py +++ b/bigchaindb/tendermint/utils.py @@ -1,6 +1,11 @@ import base64 import json -import sha3 +from binascii import hexlify + +try: + from hashlib import sha3_256 +except ImportError: + from sha3 import sha3_256 def encode_transaction(value): @@ -25,8 +30,38 @@ def calculate_hash(key_list): if not key_list: return '' - full_hash = sha3.sha3_256() + full_hash = sha3_256() for key in key_list: full_hash.update(key.encode('utf8')) return full_hash.hexdigest() + + +def merkleroot(hashes): + """ + Computes the merkle root for a given list. + + Args: + hashes (:obj:`list` of :obj:`bytes`): The leaves of the tree. + + Returns: + str: Merkle root in hexadecimal form. + + """ + # XXX TEMPORARY -- MUST REVIEW and possibly CHANGE + # The idea here is that the UTXO SET would be empty and this function + # would be invoked to compute the merkle root, and since there is nothing, + # i.e. an empty list, then the hash of the empty string is returned. + # This seems too easy but maybe that is good enough? TO REVIEW! + if not hashes: + return sha3_256(b'').hexdigest() + # XXX END TEMPORARY -- MUST REVIEW ... + if len(hashes) == 1: + return hexlify(hashes[0]).decode() + if len(hashes) % 2 == 1: + hashes.append(hashes[-1]) + parent_hashes = [ + sha3_256(hashes[i] + hashes[i+1]).digest() + for i in range(0, len(hashes)-1, 2) + ] + return merkleroot(parent_hashes) diff --git a/tests/tendermint/test_lib.py b/tests/tendermint/test_lib.py index a8bdb9ba..3f1fb438 100644 --- a/tests/tendermint/test_lib.py +++ b/tests/tendermint/test_lib.py @@ -1,6 +1,12 @@ import os from unittest.mock import patch +try: + from hashlib import sha3_256 +except ImportError: + # NOTE: neeeded for Python < 3.6 + from sha3 import sha3_256 + import pytest from pymongo import MongoClient @@ -311,3 +317,16 @@ def test_store_many_unspent_outputs(b, unspent_outputs, utxo_collection): assert utxo_collection.find( {'transaction_id': unspent_outputs[0]['transaction_id']} ).count() == 3 + + +def test_get_utxoset_merkle_root_when_no_utxo(b): + assert b.get_utxoset_merkle_root() == sha3_256(b'').hexdigest() + + +@pytest.mark.bdb +@pytest.mark.usefixture('utxoset') +def test_get_utxoset_merkle_root(b, utxoset): + expected_merkle_root = ( + '86d311c03115bf4d287f8449ca5828505432d69b82762d47077b1c00fe426eac') + merkle_root = b.get_utxoset_merkle_root() + assert merkle_root == expected_merkle_root diff --git a/tests/tendermint/test_utils.py b/tests/tendermint/test_utils.py index ce91c173..a2db214f 100644 --- a/tests/tendermint/test_utils.py +++ b/tests/tendermint/test_utils.py @@ -1,6 +1,15 @@ import base64 import json +try: + from hashlib import sha3_256 +except ImportError: + from sha3 import sha3_256 + +import pytest + +pytestmark = pytest.mark.tendermint + def test_encode_decode_transaction(b): from bigchaindb.tendermint.utils import (encode_transaction, @@ -25,3 +34,11 @@ def test_calculate_hash_no_key(b): # pass an empty list assert calculate_hash([]) == '' + + +# TODO test for the case of an empty list of hashes, and possibly other cases. +def test_merkleroot(): + from bigchaindb.tendermint.utils import merkleroot + hashes = [sha3_256(i.encode()).digest() for i in 'abc'] + assert merkleroot(hashes) == ( + '78c7c394d3158c218916b7ae0ebdea502e0f4e85c08e3b371e3dfd824d389fa3')