Problem: Validation code not optimized (#2490)

Solution: memoize data
This commit is contained in:
Vanshdeep Singh 2018-09-04 15:30:52 +02:00 committed by vrde
parent c72c7a4626
commit cb22557771
10 changed files with 195 additions and 25 deletions

View File

@ -91,10 +91,8 @@ def get_assets(conn, asset_ids):
@register_query(LocalMongoDBConnection)
def get_spent(conn, transaction_id, output):
query = {'inputs.fulfills': {
'transaction_id': transaction_id,
'output_index': output}}
query = {'inputs.fulfills': {'transaction_id': transaction_id,
'output_index': output}}
return conn.run(
conn.collection('transactions')
.find(query, {'_id': 0}))

View File

@ -0,0 +1,58 @@
import functools
import codecs
from functools import lru_cache
class HDict(dict):
def __hash__(self):
return hash(codecs.decode(self['id'], 'hex'))
@lru_cache(maxsize=16384)
def from_dict(func, *args, **kwargs):
return func(*args, **kwargs)
def memoize_from_dict(func):
@functools.wraps(func)
def memoized_func(*args, **kwargs):
if args[1].get('id', None):
args = list(args)
args[1] = HDict(args[1])
new_args = tuple(args)
return from_dict(func, *new_args, **kwargs)
else:
return func(*args, **kwargs)
return memoized_func
class ToDictWrapper():
def __init__(self, tx):
self.tx = tx
def __eq__(self, other):
return self.tx.id == other.tx.id
def __hash__(self):
return hash(self.tx.id)
@lru_cache(maxsize=16384)
def to_dict(func, tx_wrapped):
return func(tx_wrapped.tx)
def memoize_to_dict(func):
@functools.wraps(func)
def memoized_func(*args, **kwargs):
if args[0].id:
return to_dict(func, ToDictWrapper(args[0]))
else:
return func(*args, **kwargs)
return memoized_func

View File

@ -12,7 +12,8 @@ Attributes:
"""
from collections import namedtuple
from copy import deepcopy
from functools import reduce
from functools import reduce, lru_cache
import rapidjson
import base58
from cryptoconditions import Fulfillment, ThresholdSha256, Ed25519Sha256
@ -27,6 +28,7 @@ from bigchaindb.common.exceptions import (KeypairMismatchException,
AmountError, AssetIdMismatch,
ThresholdTooDeep)
from bigchaindb.common.utils import serialize
from .memoize import memoize_from_dict, memoize_to_dict
UnspentOutput = namedtuple(
@ -82,6 +84,11 @@ class Input(object):
# TODO: If `other !== Fulfillment` return `False`
return self.to_dict() == other.to_dict()
# NOTE: This function is used to provide a unique key for a given
# Input to suppliment memoization
def __hash__(self):
return hash((self.fulfillment, self.fulfills))
def to_dict(self):
"""Transforms the object to a Python dictionary.
@ -500,7 +507,7 @@ class Transaction(object):
VERSION = '2.0'
def __init__(self, operation, asset, inputs=None, outputs=None,
metadata=None, version=None, hash_id=None):
metadata=None, version=None, hash_id=None, tx_dict=None):
"""The constructor allows to create a customizable Transaction.
Note:
@ -553,6 +560,7 @@ class Transaction(object):
self.outputs = outputs or []
self.metadata = metadata
self._id = hash_id
self.tx_dict = tx_dict
@property
def unspent_outputs(self):
@ -990,7 +998,7 @@ class Transaction(object):
raise ValueError('Inputs and '
'output_condition_uris must have the same count')
tx_dict = self.to_dict()
tx_dict = self.tx_dict if self.tx_dict else self.to_dict()
tx_dict = Transaction._remove_signatures(tx_dict)
tx_dict['id'] = None
tx_serialized = Transaction._to_str(tx_dict)
@ -1003,6 +1011,7 @@ class Transaction(object):
return all(validate(i, cond)
for i, cond in enumerate(output_condition_uris))
@lru_cache(maxsize=16384)
def _input_valid(self, input_, operation, message, output_condition_uri=None):
"""Validates a single Input against a single Output.
@ -1048,6 +1057,11 @@ class Transaction(object):
ffill_valid = parsed_ffill.validate(message=message.digest())
return output_valid and ffill_valid
# This function is required by `lru_cache` to create a key for memoization
def __hash__(self):
return hash(self.id)
@memoize_to_dict
def to_dict(self):
"""Transforms the object to a Python dictionary.
@ -1150,7 +1164,9 @@ class Transaction(object):
tx_body (dict): The Transaction to be transformed.
"""
# NOTE: Remove reference to avoid side effects
tx_body = deepcopy(tx_body)
# tx_body = deepcopy(tx_body)
tx_body = rapidjson.loads(rapidjson.dumps(tx_body))
try:
proposed_tx_id = tx_body['id']
except KeyError:
@ -1167,6 +1183,7 @@ class Transaction(object):
raise InvalidHash(err_msg.format(proposed_tx_id))
@classmethod
@memoize_from_dict
def from_dict(cls, tx, skip_schema_validation=True):
"""Transforms a Python dictionary to a Transaction object.
@ -1184,7 +1201,7 @@ class Transaction(object):
inputs = [Input.from_dict(input_) for input_ in tx['inputs']]
outputs = [Output.from_dict(output) for output in tx['outputs']]
return cls(tx['operation'], tx['asset'], inputs, outputs,
tx['metadata'], tx['version'], hash_id=tx['id'])
tx['metadata'], tx['version'], hash_id=tx['id'], tx_dict=tx)
@classmethod
def from_db(cls, bigchain, tx_dict_list):

View File

@ -9,6 +9,7 @@ MongoDB.
import logging
from collections import namedtuple
from uuid import uuid4
import rapidjson
try:
from hashlib import sha3_256
@ -77,10 +78,11 @@ class BigchainDB(object):
raise ValidationError('Mode must be one of the following {}.'
.format(', '.join(self.mode_list)))
tx_dict = transaction.tx_dict if transaction.tx_dict else transaction.to_dict()
payload = {
'method': mode,
'jsonrpc': '2.0',
'params': [encode_transaction(transaction.to_dict())],
'params': [encode_transaction(tx_dict)],
'id': str(uuid4())
}
# TODO: handle connection errors!
@ -122,10 +124,9 @@ class BigchainDB(object):
txns = []
assets = []
txn_metadatas = []
for transaction_obj in transactions:
# self.update_utxoset(transaction)
transaction = transaction_obj.to_dict()
if transaction['operation'] == transaction_obj.CREATE:
for t in transactions:
transaction = t.tx_dict if t.tx_dict else rapidjson.loads(rapidjson.dumps(t.to_dict()))
if transaction['operation'] == t.CREATE:
asset = transaction.pop('asset')
asset['id'] = transaction['id']
assets.append(asset)
@ -224,6 +225,10 @@ class BigchainDB(object):
return backend.query.delete_unspent_outputs(
self.connection, *unspent_outputs)
def is_committed(self, transaction_id):
transaction = backend.query.get_transaction(self.connection, transaction_id)
return bool(transaction)
def get_transaction(self, transaction_id):
transaction = backend.query.get_transaction(self.connection, transaction_id)

View File

@ -27,7 +27,7 @@ class Transaction(Transaction):
if self.operation == Transaction.CREATE:
duplicates = any(txn for txn in current_transactions if txn.id == self.id)
if bigchain.get_transaction(self.to_dict()['id']) or duplicates:
if bigchain.is_committed(self.id) or duplicates:
raise DuplicateTransaction('transaction `{}` already exists'
.format(self.id))

View File

@ -35,12 +35,6 @@ class ValidatorElection(Transaction):
INCONCLUSIVE = 'inconclusive'
ELECTION_THRESHOLD = 2 / 3
def __init__(self, operation, asset, inputs, outputs,
metadata=None, version=None, hash_id=None):
# operation `CREATE` is being passed as argument as `VALIDATOR_ELECTION` is an extension
# of `CREATE` and any validation on `CREATE` in the parent class should apply to it
super().__init__(operation, asset, inputs, outputs, metadata, version, hash_id)
@classmethod
def get_validator_change(cls, bigchain, height=None):
"""Return the latest change to the validator set

View File

@ -205,7 +205,7 @@ def test_get_owned_ids(signed_create_tx, user_pk):
conn = connect()
# insert a transaction
conn.db.transactions.insert_one(signed_create_tx.to_dict())
conn.db.transactions.insert_one(deepcopy(signed_create_tx.to_dict()))
txns = list(query.get_owned_ids(conn, user_pk))
@ -224,7 +224,7 @@ def test_get_spending_transactions(user_pk, user_sk):
tx2 = Transaction.transfer([inputs[0]], out, tx1.id).sign([user_sk])
tx3 = Transaction.transfer([inputs[1]], out, tx1.id).sign([user_sk])
tx4 = Transaction.transfer([inputs[2]], out, tx1.id).sign([user_sk])
txns = [tx.to_dict() for tx in [tx1, tx2, tx3, tx4]]
txns = [deepcopy(tx.to_dict()) for tx in [tx1, tx2, tx3, tx4]]
conn.db.transactions.insert_many(txns)
links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()]

View File

@ -0,0 +1,92 @@
# Copyright BigchainDB GmbH and BigchainDB contributors
# SPDX-License-Identifier: (Apache-2.0 AND CC-BY-4.0)
# Code is Apache-2.0 and docs are CC-BY-4.0
import pytest
from copy import deepcopy
from bigchaindb.models import Transaction
from bigchaindb.common.crypto import generate_key_pair
from bigchaindb.common.memoize import to_dict, from_dict
pytestmark = [pytest.mark.tendermint, pytest.mark.bdb]
def test_memoize_to_dict(b):
alice = generate_key_pair()
asset = {
'data': {'id': 'test_id'},
}
assert to_dict.cache_info().hits == 0
assert to_dict.cache_info().misses == 0
tx = Transaction.create([alice.public_key],
[([alice.public_key], 1)],
asset=asset,)\
.sign([alice.private_key])
tx.to_dict()
assert to_dict.cache_info().hits == 0
assert to_dict.cache_info().misses == 1
tx.to_dict()
tx.to_dict()
assert to_dict.cache_info().hits == 2
assert to_dict.cache_info().misses == 1
def test_memoize_from_dict(b):
alice = generate_key_pair()
asset = {
'data': {'id': 'test_id'},
}
assert from_dict.cache_info().hits == 0
assert from_dict.cache_info().misses == 0
tx = Transaction.create([alice.public_key],
[([alice.public_key], 1)],
asset=asset,)\
.sign([alice.private_key])
tx_dict = deepcopy(tx.to_dict())
Transaction.from_dict(tx_dict)
assert from_dict.cache_info().hits == 0
assert from_dict.cache_info().misses == 1
Transaction.from_dict(tx_dict)
Transaction.from_dict(tx_dict)
assert from_dict.cache_info().hits == 2
assert from_dict.cache_info().misses == 1
def test_memoize_input_valid(b):
alice = generate_key_pair()
asset = {
'data': {'id': 'test_id'},
}
assert Transaction._input_valid.cache_info().hits == 0
assert Transaction._input_valid.cache_info().misses == 0
tx = Transaction.create([alice.public_key],
[([alice.public_key], 1)],
asset=asset,)\
.sign([alice.private_key])
tx.inputs_valid()
assert Transaction._input_valid.cache_info().hits == 0
assert Transaction._input_valid.cache_info().misses == 1
tx.inputs_valid()
tx.inputs_valid()
assert Transaction._input_valid.cache_info().hits == 2
assert Transaction._input_valid.cache_info().misses == 1

View File

@ -13,7 +13,7 @@ from cryptoconditions import Ed25519Sha256
from pytest import mark, raises
from sha3 import sha3_256
pytestmark = mark.tendermint
pytestmark = [mark.tendermint, mark.bdb]
def test_input_serialization(ffill_uri, user_pub):
@ -533,7 +533,7 @@ def test_validate_input_with_invalid_parameters(utx):
input_conditions = [out.fulfillment.condition_uri for out in utx.outputs]
tx_dict = utx.to_dict()
tx_serialized = Transaction._to_str(tx_dict)
valid = utx._input_valid(utx.inputs[0], tx_serialized, input_conditions)
valid = utx._input_valid(utx.inputs[0], tx_serialized, input_conditions[0])
assert not valid

View File

@ -144,11 +144,17 @@ def _bdb(_setup_database, _configure_bigchaindb):
from bigchaindb import config
from bigchaindb.backend import connect
from .utils import flush_db
from bigchaindb.common.memoize import to_dict, from_dict
from bigchaindb.models import Transaction
conn = connect()
yield
dbname = config['database']['name']
flush_db(conn, dbname)
to_dict.cache_clear()
from_dict.cache_clear()
Transaction._input_valid.cache_clear()
# We need this function to avoid loading an existing
# conf file located in the home of the user running