Problem: represent utxoset state as a hash

Solution: create a merkle tree out of the utxoset and compute its merkle
root
This commit is contained in:
Sylvain Bellemare
2018-02-22 21:27:31 +01:00
parent 2d2182dd19
commit 56dfd9bab0
6 changed files with 117 additions and 6 deletions

View File

@@ -26,6 +26,8 @@ from bigchaindb.common.utils import serialize
UnspentOutput = namedtuple(
'UnspentOutput', (
# TODO 'utxo_hash': sha3_256(f'{txid}{output_index}'.encode())
# 'utxo_hash', # noqa
'transaction_id',
'output_index',
'amount',

View File

@@ -20,9 +20,7 @@ class App(BaseApplication):
State Machine."""
def __init__(self, bigchaindb=None):
if not bigchaindb:
bigchaindb = BigchainDB()
self.bigchaindb = bigchaindb
self.bigchaindb = bigchaindb or BigchainDB()
self.block_txn_ids = []
self.block_txn_hash = ''
self.block_transactions = []

View File

@@ -8,13 +8,19 @@ from copy import deepcopy
from os import getenv
from uuid import uuid4
try:
from hashlib import sha3_256
except ImportError:
# NOTE: neeeded for Python < 3.6
from sha3 import sha3_256
import requests
from bigchaindb import backend
from bigchaindb import Bigchain
from bigchaindb.models import Transaction
from bigchaindb.common.exceptions import SchemaValidationError, ValidationError
from bigchaindb.tendermint.utils import encode_transaction
from bigchaindb.tendermint.utils import encode_transaction, merkleroot
from bigchaindb.tendermint import fastquery
from bigchaindb import exceptions as core_exceptions
@@ -130,6 +136,40 @@ class BigchainDB(Bigchain):
return backend.query.store_unspent_outputs(
self.connection, *unspent_outputs)
def get_utxoset_merkle_root(self):
"""Returns the merkle root of the utxoset. This implies that
the utxoset is first put into a merkle tree.
For now, the merkle tree and its root will be computed each
time. This obviously is not efficient and a better approach
that limits the repetition of the same computation when
unnecesary should be sought. For instance, future optimizations
could simply re-compute the branches of the tree that were
affected by a change.
The transaction hash (id) and output index should be sufficient
to uniquely identify a utxo, and consequently only that
information from a utxo record is needed to compute the merkle
root. Hence, each node of the merkle tree should contain the
tuple (txid, output_index).
.. important:: The leaves of the tree will need to be sorted in
some kind of lexicographical order.
Returns:
str: Merkle root in hexadecimal form.
"""
utxoset = backend.query.get_unspent_outputs(self.connection)
# TODO Once ready, use the already pre-computed utxo_hash field.
# See common/transactions.py for details.
hashes = [
sha3_256(
f'''{utxo['transaction_id']}{utxo['output_index']}'''.encode()
).digest() for utxo in utxoset
]
# TODO Notice the sorted call!
return merkleroot(sorted(hashes))
def get_unspent_outputs(self):
"""Get the utxoset.

View File

@@ -1,6 +1,11 @@
import base64
import json
import sha3
from binascii import hexlify
try:
from hashlib import sha3_256
except ImportError:
from sha3 import sha3_256
def encode_transaction(value):
@@ -25,8 +30,38 @@ def calculate_hash(key_list):
if not key_list:
return ''
full_hash = sha3.sha3_256()
full_hash = sha3_256()
for key in key_list:
full_hash.update(key.encode('utf8'))
return full_hash.hexdigest()
def merkleroot(hashes):
"""
Computes the merkle root for a given list.
Args:
hashes (:obj:`list` of :obj:`bytes`): The leaves of the tree.
Returns:
str: Merkle root in hexadecimal form.
"""
# XXX TEMPORARY -- MUST REVIEW and possibly CHANGE
# The idea here is that the UTXO SET would be empty and this function
# would be invoked to compute the merkle root, and since there is nothing,
# i.e. an empty list, then the hash of the empty string is returned.
# This seems too easy but maybe that is good enough? TO REVIEW!
if not hashes:
return sha3_256(b'').hexdigest()
# XXX END TEMPORARY -- MUST REVIEW ...
if len(hashes) == 1:
return hexlify(hashes[0]).decode()
if len(hashes) % 2 == 1:
hashes.append(hashes[-1])
parent_hashes = [
sha3_256(hashes[i] + hashes[i+1]).digest()
for i in range(0, len(hashes)-1, 2)
]
return merkleroot(parent_hashes)

View File

@@ -1,6 +1,12 @@
import os
from unittest.mock import patch
try:
from hashlib import sha3_256
except ImportError:
# NOTE: neeeded for Python < 3.6
from sha3 import sha3_256
import pytest
from pymongo import MongoClient
@@ -311,3 +317,16 @@ def test_store_many_unspent_outputs(b, unspent_outputs, utxo_collection):
assert utxo_collection.find(
{'transaction_id': unspent_outputs[0]['transaction_id']}
).count() == 3
def test_get_utxoset_merkle_root_when_no_utxo(b):
assert b.get_utxoset_merkle_root() == sha3_256(b'').hexdigest()
@pytest.mark.bdb
@pytest.mark.usefixture('utxoset')
def test_get_utxoset_merkle_root(b, utxoset):
expected_merkle_root = (
'86d311c03115bf4d287f8449ca5828505432d69b82762d47077b1c00fe426eac')
merkle_root = b.get_utxoset_merkle_root()
assert merkle_root == expected_merkle_root

View File

@@ -1,6 +1,15 @@
import base64
import json
try:
from hashlib import sha3_256
except ImportError:
from sha3 import sha3_256
import pytest
pytestmark = pytest.mark.tendermint
def test_encode_decode_transaction(b):
from bigchaindb.tendermint.utils import (encode_transaction,
@@ -25,3 +34,11 @@ def test_calculate_hash_no_key(b):
# pass an empty list
assert calculate_hash([]) == ''
# TODO test for the case of an empty list of hashes, and possibly other cases.
def test_merkleroot():
from bigchaindb.tendermint.utils import merkleroot
hashes = [sha3_256(i.encode()).digest() for i in 'abc']
assert merkleroot(hashes) == (
'78c7c394d3158c218916b7ae0ebdea502e0f4e85c08e3b371e3dfd824d389fa3')